Limit WordDB word length to 50 bytes

Should improve both performance and relevancy of the word completions.
2019-03-22 18:03:49 +11:00 · 2019-03-22 18:03:49 +11:00 · ad882c3370
commit ad882c3370
parent b9c1fa61a0
1 changed files with 18 additions and 6 deletions
--- a/src/word_db.cc
+++ b/src/word_db.cc
@ -21,6 +21,8 @@ WordDB& get_word_db(const Buffer& buffer)

 struct WordSplitter
 {
+    static constexpr CharCount max_word_len = 50;
+
    struct Iterator
    {
        Iterator(const char* begin, const WordSplitter& splitter)
@ -34,12 +36,22 @@ struct WordSplitter
            const auto* end = m_splitter->m_content.end();
            auto extra_chars = m_splitter->m_extra_word_chars;

-            m_word_begin = m_word_end;
-            while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars))
-                utf8::to_next(m_word_begin, end);
-            m_word_end = m_word_begin;
-            while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars))
-                utf8::to_next(m_word_end, end);
+            while (true)
+            {
+                m_word_begin = m_word_end;
+                while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars))
+                    utf8::to_next(m_word_begin, end);
+                m_word_end = m_word_begin;
+                CharCount word_len = 0;
+                while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars))
+                {
+                    utf8::to_next(m_word_end, end);
+                    ++word_len;
+                }
+                if (m_word_begin == end or word_len < max_word_len)
+                    break;
+            }
+
            return *this;
        }