Faster implementation of utf8::advance not copying iterators at each step

This commit is contained in:
Maxime Coste 2016-07-15 20:19:44 +01:00
parent 73fdc726fb
commit 1401c55531

View File

@ -17,6 +17,14 @@ template<typename Iterator>
[[gnu::always_inline]] [[gnu::always_inline]]
inline char read(Iterator& it) { char c = *it; ++it; return c; } inline char read(Iterator& it) { char c = *it; ++it; return c; }
// return true if it points to the first byte of a (either single or
// multibyte) character
[[gnu::always_inline]]
inline bool is_character_start(char c)
{
return (c & 0xC0) != 0x80;
}
// returns an iterator to next character first byte // returns an iterator to next character first byte
template<typename Iterator> template<typename Iterator>
Iterator next(Iterator it, const Iterator& end) Iterator next(Iterator it, const Iterator& end)
@ -52,27 +60,28 @@ Iterator previous(Iterator it, const Iterator& begin)
template<typename Iterator> template<typename Iterator>
Iterator advance(Iterator it, const Iterator& end, CharCount d) Iterator advance(Iterator it, const Iterator& end, CharCount d)
{ {
if (it == end)
return it;
if (d < 0) if (d < 0)
{ {
while (it != end and d++) while (it != end and d != 0)
it = utf8::previous(it, end); {
if (is_character_start(*--it))
++d;
}
} }
else else if (d > 0)
{ {
while (it != end and d--) while (it != end and d != 0)
it = utf8::next(it, end); {
if (is_character_start(*++it))
--d;
}
} }
return it; return it;
} }
// return true if it points to the first byte of a (either single or
// multibyte) character
[[gnu::always_inline]]
inline bool is_character_start(char c)
{
return (c & 0xC0) != 0x80;
}
// returns the character count between begin and end // returns the character count between begin and end
template<typename Iterator> template<typename Iterator>
CharCount distance(Iterator begin, const Iterator& end) CharCount distance(Iterator begin, const Iterator& end)