Speed up WordSplitter
Only do utf8 decoding once per codepoint instead of twice, limit the byte length instead of the codepoint length.
This commit is contained in:
parent
b33b673f10
commit
be33dee211
|
@ -34,30 +34,21 @@ struct WordSplitter
|
|||
const auto* end = m_splitter->m_content.end();
|
||||
auto extra_chars = m_splitter->m_extra_word_chars;
|
||||
|
||||
while (true)
|
||||
do
|
||||
{
|
||||
m_word_begin = m_word_end;
|
||||
while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars))
|
||||
utf8::to_next(m_word_begin, end);
|
||||
m_word_end = m_word_begin;
|
||||
CharCount word_len = 0;
|
||||
while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars))
|
||||
{
|
||||
utf8::to_next(m_word_end, end);
|
||||
++word_len;
|
||||
}
|
||||
if (m_word_begin == end or word_len < WordDB::max_word_len)
|
||||
break;
|
||||
}
|
||||
auto it = m_word_begin = m_word_end;
|
||||
while (it != end and not is_word(utf8::read_codepoint(it, end), extra_chars))
|
||||
m_word_begin = it;
|
||||
|
||||
m_word_end = it;
|
||||
while (it != end and is_word(utf8::read_codepoint(it, end), extra_chars))
|
||||
m_word_end = it;
|
||||
} while (m_word_begin != end and (m_word_end - m_word_begin) > WordDB::max_word_len);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend bool operator==(const Iterator& lhs, const Iterator& rhs)
|
||||
{ return lhs.m_word_begin == rhs.m_word_begin and lhs.m_word_end == rhs.m_word_end; }
|
||||
|
||||
friend bool operator!=(const Iterator& lhs, const Iterator& rhs)
|
||||
{ return not (lhs == rhs); }
|
||||
friend bool operator==(const Iterator& lhs, const Iterator& rhs) = default;
|
||||
|
||||
const char* m_word_begin;
|
||||
const char* m_word_end;
|
||||
|
|
|
@ -18,7 +18,7 @@ class Buffer;
|
|||
class WordDB : public OptionManagerWatcher
|
||||
{
|
||||
public:
|
||||
static constexpr CharCount max_word_len = 50;
|
||||
static constexpr ByteCount max_word_len = 50;
|
||||
|
||||
WordDB(const Buffer& buffer);
|
||||
~WordDB();
|
||||
|
|
Loading…
Reference in New Issue
Block a user