Use unsigned char rather than char in utf8 decoding to avoid sign extension

This commit is contained in:
Maxime Coste 2014-07-01 23:47:09 +01:00
parent 70db72b175
commit 3f70d91f8c

View File

@ -95,12 +95,12 @@ namespace InvalidBytePolicy
struct Assert
{
Codepoint operator()(char byte) const { kak_assert(false); return byte; }
Codepoint operator()(unsigned char byte) const { kak_assert(false); return byte; }
};
struct Pass
{
Codepoint operator()(char byte) const { return byte; }
Codepoint operator()(unsigned char byte) const { return byte; }
};
}
@ -114,7 +114,7 @@ Codepoint codepoint(Iterator it)
// According to rfc3629, UTF-8 allows only up to 4 bytes.
// (21 bits codepoint)
Codepoint cp;
char byte = *it++;
unsigned char byte = *it++;
if (not (byte & 0x80)) // 0xxxxxxx
cp = byte;
else if ((byte & 0xE0) == 0xC0) // 110xxxxx
@ -141,7 +141,7 @@ template<typename InvalidPolicy = InvalidBytePolicy::Assert,
typename Iterator>
ByteCount codepoint_size(Iterator it)
{
char byte = *it;
unsigned char byte = *it;
if (not (byte & 0x80)) // 0xxxxxxx
return 1;
else if ((byte & 0xE0) == 0xC0) // 110xxxxx