Avoid (*it++) pattern in utf8.hh

This commit is contained in:
Maxime Coste 2015-09-25 13:19:21 +01:00
parent 48a7448b11
commit 4ea89def3b

View File

@ -13,11 +13,15 @@ namespace Kakoune
namespace utf8 namespace utf8
{ {
template<typename Iterator>
[[gnu::always_inline]]
inline char read(Iterator& it) { char c = *it; ++it; return c; }
// returns an iterator to next character first byte // returns an iterator to next character first byte
template<typename Iterator> template<typename Iterator>
Iterator next(Iterator it, const Iterator& end) Iterator next(Iterator it, const Iterator& end)
{ {
if (it != end and *it++ & 0x80) if (it != end and read(it) & 0x80)
while (it != end and (*(it) & 0xC0) == 0x80) while (it != end and (*(it) & 0xC0) == 0x80)
++it; ++it;
return it; return it;
@ -116,7 +120,7 @@ Codepoint read_codepoint(Iterator& it, const Iterator& end)
return InvalidPolicy{}(-1); return InvalidPolicy{}(-1);
// According to rfc3629, UTF-8 allows only up to 4 bytes. // According to rfc3629, UTF-8 allows only up to 4 bytes.
// (21 bits codepoint) // (21 bits codepoint)
unsigned char byte = *it++; unsigned char byte = read(it);
if (not (byte & 0x80)) // 0xxxxxxx if (not (byte & 0x80)) // 0xxxxxxx
return byte; return byte;
@ -124,25 +128,25 @@ Codepoint read_codepoint(Iterator& it, const Iterator& end)
return InvalidPolicy{}(byte); return InvalidPolicy{}(byte);
if ((byte & 0xE0) == 0xC0) // 110xxxxx if ((byte & 0xE0) == 0xC0) // 110xxxxx
return ((byte & 0x1F) << 6) | (*it++ & 0x3F); return ((byte & 0x1F) << 6) | (read(it) & 0x3F);
if ((byte & 0xF0) == 0xE0) // 1110xxxx if ((byte & 0xF0) == 0xE0) // 1110xxxx
{ {
Codepoint cp = ((byte & 0x0F) << 12) | ((*it++ & 0x3F) << 6); Codepoint cp = ((byte & 0x0F) << 12) | ((read(it) & 0x3F) << 6);
if (it == end) if (it == end)
return InvalidPolicy{}(cp); return InvalidPolicy{}(cp);
return cp | (*it++ & 0x3F); return cp | (read(it) & 0x3F);
} }
if ((byte & 0xF8) == 0xF0) // 11110xxx if ((byte & 0xF8) == 0xF0) // 11110xxx
{ {
Codepoint cp = ((byte & 0x0F) << 18) | ((*it++ & 0x3F) << 12); Codepoint cp = ((byte & 0x0F) << 18) | ((read(it) & 0x3F) << 12);
if (it == end) if (it == end)
return InvalidPolicy{}(cp); return InvalidPolicy{}(cp);
cp |= (*it++ & 0x3F) << 6; cp |= (read(it) & 0x3F) << 6;
if (it == end) if (it == end)
return InvalidPolicy{}(cp); return InvalidPolicy{}(cp);
return cp | (*it++ & 0x3F); return cp | (read(it) & 0x3F);
} }
return InvalidPolicy{}(byte); return InvalidPolicy{}(byte);
} }