diff --git a/src/word_db.cc b/src/word_db.cc index 56b7fedf..2ea35513 100644 --- a/src/word_db.cc +++ b/src/word_db.cc @@ -34,30 +34,21 @@ struct WordSplitter const auto* end = m_splitter->m_content.end(); auto extra_chars = m_splitter->m_extra_word_chars; - while (true) + do { - m_word_begin = m_word_end; - while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars)) - utf8::to_next(m_word_begin, end); - m_word_end = m_word_begin; - CharCount word_len = 0; - while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars)) - { - utf8::to_next(m_word_end, end); - ++word_len; - } - if (m_word_begin == end or word_len < WordDB::max_word_len) - break; - } + auto it = m_word_begin = m_word_end; + while (it != end and not is_word(utf8::read_codepoint(it, end), extra_chars)) + m_word_begin = it; + + m_word_end = it; + while (it != end and is_word(utf8::read_codepoint(it, end), extra_chars)) + m_word_end = it; + } while (m_word_begin != end and (m_word_end - m_word_begin) > WordDB::max_word_len); return *this; } - friend bool operator==(const Iterator& lhs, const Iterator& rhs) - { return lhs.m_word_begin == rhs.m_word_begin and lhs.m_word_end == rhs.m_word_end; } - - friend bool operator!=(const Iterator& lhs, const Iterator& rhs) - { return not (lhs == rhs); } + friend bool operator==(const Iterator& lhs, const Iterator& rhs) = default; const char* m_word_begin; const char* m_word_end; diff --git a/src/word_db.hh b/src/word_db.hh index ac7b8c01..dcb46653 100644 --- a/src/word_db.hh +++ b/src/word_db.hh @@ -18,7 +18,7 @@ class Buffer; class WordDB : public OptionManagerWatcher { public: - static constexpr CharCount max_word_len = 50; + static constexpr ByteCount max_word_len = 50; WordDB(const Buffer& buffer); ~WordDB();