From 3f70d91f8c716ef2dbc76abb9c878f86ecb946f7 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Tue, 1 Jul 2014 23:47:09 +0100 Subject: [PATCH] Use unsigned char rather than char in utf8 decoding to avoid sign extension --- src/utf8.hh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utf8.hh b/src/utf8.hh index 5aac8c2a..60460a7a 100644 --- a/src/utf8.hh +++ b/src/utf8.hh @@ -95,12 +95,12 @@ namespace InvalidBytePolicy struct Assert { - Codepoint operator()(char byte) const { kak_assert(false); return byte; } + Codepoint operator()(unsigned char byte) const { kak_assert(false); return byte; } }; struct Pass { - Codepoint operator()(char byte) const { return byte; } + Codepoint operator()(unsigned char byte) const { return byte; } }; } @@ -114,7 +114,7 @@ Codepoint codepoint(Iterator it) // According to rfc3629, UTF-8 allows only up to 4 bytes. // (21 bits codepoint) Codepoint cp; - char byte = *it++; + unsigned char byte = *it++; if (not (byte & 0x80)) // 0xxxxxxx cp = byte; else if ((byte & 0xE0) == 0xC0) // 110xxxxx @@ -141,7 +141,7 @@ template ByteCount codepoint_size(Iterator it) { - char byte = *it; + unsigned char byte = *it; if (not (byte & 0x80)) // 0xxxxxxx return 1; else if ((byte & 0xE0) == 0xC0) // 110xxxxx