Add utf8::read_codepoint that both gets the codepoint and advance iterator

This commit is contained in:
Maxime Coste 2015-09-24 23:00:47 +01:00
parent 12ef466f3a
commit aa4b98af7c
2 changed files with 17 additions and 11 deletions

View File

@ -861,20 +861,19 @@ void expand_unprintable(const Context& context, HighlightFlags flags, DisplayBuf
for (auto it = buffer.iterator_at(atom_it->begin()), for (auto it = buffer.iterator_at(atom_it->begin()),
end = buffer.iterator_at(atom_it->end()); it < end;) end = buffer.iterator_at(atom_it->end()); it < end;)
{ {
Codepoint cp = utf8::codepoint<utf8::InvalidPolicy::Pass>(it, end); auto coord = it.coord();
auto next = utf8::next(it, end); Codepoint cp = utf8::read_codepoint<utf8::InvalidPolicy::Pass>(it, end);
if (cp != '\n' and not iswprint(cp)) if (cp != '\n' and not iswprint(cp))
{ {
if (it.coord() != atom_it->begin()) if (coord != atom_it->begin())
atom_it = ++line.split(atom_it, it.coord()); atom_it = ++line.split(atom_it, coord);
if (next.coord() < atom_it->end()) if (it.coord() < atom_it->end())
atom_it = line.split(atom_it, next.coord()); atom_it = line.split(atom_it, it.coord());
atom_it->replace(format("U+{}", hex(cp))); atom_it->replace(format("U+{}", hex(cp)));
atom_it->face = { Color::Red, Color::Black }; atom_it->face = { Color::Red, Color::Black };
break; break;
} }
it = next;
} }
} }
} }

View File

@ -110,7 +110,7 @@ struct Pass
// is pointed by it // is pointed by it
template<typename InvalidPolicy = utf8::InvalidPolicy::Pass, template<typename InvalidPolicy = utf8::InvalidPolicy::Pass,
typename Iterator> typename Iterator>
Codepoint codepoint(Iterator it, const Iterator& end) Codepoint read_codepoint(Iterator& it, const Iterator& end)
{ {
if (it == end) if (it == end)
return InvalidPolicy{}(-1); return InvalidPolicy{}(-1);
@ -124,14 +124,14 @@ Codepoint codepoint(Iterator it, const Iterator& end)
return InvalidPolicy{}(byte); return InvalidPolicy{}(byte);
if ((byte & 0xE0) == 0xC0) // 110xxxxx if ((byte & 0xE0) == 0xC0) // 110xxxxx
return ((byte & 0x1F) << 6) | (*it & 0x3F); return ((byte & 0x1F) << 6) | (*it++ & 0x3F);
if ((byte & 0xF0) == 0xE0) // 1110xxxx if ((byte & 0xF0) == 0xE0) // 1110xxxx
{ {
Codepoint cp = ((byte & 0x0F) << 12) | ((*it++ & 0x3F) << 6); Codepoint cp = ((byte & 0x0F) << 12) | ((*it++ & 0x3F) << 6);
if (it == end) if (it == end)
return InvalidPolicy{}(cp); return InvalidPolicy{}(cp);
return cp | (*it & 0x3F); return cp | (*it++ & 0x3F);
} }
if ((byte & 0xF8) == 0xF0) // 11110xxx if ((byte & 0xF8) == 0xF0) // 11110xxx
@ -142,11 +142,18 @@ Codepoint codepoint(Iterator it, const Iterator& end)
cp |= (*it++ & 0x3F) << 6; cp |= (*it++ & 0x3F) << 6;
if (it == end) if (it == end)
return InvalidPolicy{}(cp); return InvalidPolicy{}(cp);
return cp | (*it & 0x3F); return cp | (*it++ & 0x3F);
} }
return InvalidPolicy{}(byte); return InvalidPolicy{}(byte);
} }
template<typename InvalidPolicy = utf8::InvalidPolicy::Pass,
typename Iterator>
Codepoint codepoint(Iterator it, const Iterator& end)
{
return read_codepoint(it, end);
}
template<typename InvalidPolicy = utf8::InvalidPolicy::Pass> template<typename InvalidPolicy = utf8::InvalidPolicy::Pass>
ByteCount codepoint_size(char byte) ByteCount codepoint_size(char byte)
{ {