From 4ea89def3b8c41d7b5f20a963a71f336e5fcee0e Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Fri, 25 Sep 2015 13:19:21 +0100 Subject: [PATCH] Avoid (*it++) pattern in utf8.hh --- src/utf8.hh | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/utf8.hh b/src/utf8.hh index 09d66b99..f38327eb 100644 --- a/src/utf8.hh +++ b/src/utf8.hh @@ -13,11 +13,15 @@ namespace Kakoune namespace utf8 { +template +[[gnu::always_inline]] +inline char read(Iterator& it) { char c = *it; ++it; return c; } + // returns an iterator to next character first byte template Iterator next(Iterator it, const Iterator& end) { - if (it != end and *it++ & 0x80) + if (it != end and read(it) & 0x80) while (it != end and (*(it) & 0xC0) == 0x80) ++it; return it; @@ -116,7 +120,7 @@ Codepoint read_codepoint(Iterator& it, const Iterator& end) return InvalidPolicy{}(-1); // According to rfc3629, UTF-8 allows only up to 4 bytes. // (21 bits codepoint) - unsigned char byte = *it++; + unsigned char byte = read(it); if (not (byte & 0x80)) // 0xxxxxxx return byte; @@ -124,25 +128,25 @@ Codepoint read_codepoint(Iterator& it, const Iterator& end) return InvalidPolicy{}(byte); if ((byte & 0xE0) == 0xC0) // 110xxxxx - return ((byte & 0x1F) << 6) | (*it++ & 0x3F); + return ((byte & 0x1F) << 6) | (read(it) & 0x3F); if ((byte & 0xF0) == 0xE0) // 1110xxxx { - Codepoint cp = ((byte & 0x0F) << 12) | ((*it++ & 0x3F) << 6); + Codepoint cp = ((byte & 0x0F) << 12) | ((read(it) & 0x3F) << 6); if (it == end) return InvalidPolicy{}(cp); - return cp | (*it++ & 0x3F); + return cp | (read(it) & 0x3F); } if ((byte & 0xF8) == 0xF0) // 11110xxx { - Codepoint cp = ((byte & 0x0F) << 18) | ((*it++ & 0x3F) << 12); + Codepoint cp = ((byte & 0x0F) << 18) | ((read(it) & 0x3F) << 12); if (it == end) return InvalidPolicy{}(cp); - cp |= (*it++ & 0x3F) << 6; + cp |= (read(it) & 0x3F) << 6; if (it == end) return InvalidPolicy{}(cp); - return cp | (*it++ & 0x3F); + return cp | (read(it) & 0x3F); } return InvalidPolicy{}(byte); }