#include "word_db.hh" #include "buffer.hh" #include "line_modification.hh" #include "option_types.hh" #include "unit_tests.hh" #include "utils.hh" #include "value.hh" namespace Kakoune { WordDB& get_word_db(const Buffer& buffer) { static const ValueId word_db_id = get_free_value_id(); Value& cache_val = buffer.values()[word_db_id]; if (not cache_val) cache_val = Value(WordDB{buffer}); return cache_val.as(); } struct WordSplitter { struct Iterator { Iterator(const char* begin, const WordSplitter& splitter) : m_word_begin{begin}, m_word_end{begin}, m_splitter{&splitter} { operator++(); } StringView operator*() const { return {m_word_begin, m_word_end}; } Iterator& operator++() { const auto* end = m_splitter->m_content.end(); auto extra_chars = m_splitter->m_extra_word_chars; do { auto it = m_word_begin = m_word_end; while (it != end and not is_word(utf8::read_codepoint(it, end), extra_chars)) m_word_begin = it; m_word_end = it; while (it != end and is_word(utf8::read_codepoint(it, end), extra_chars)) m_word_end = it; } while (m_word_begin != end and (m_word_end - m_word_begin) > WordDB::max_word_len); return *this; } friend bool operator==(const Iterator& lhs, const Iterator& rhs) = default; const char* m_word_begin; const char* m_word_end; const WordSplitter* m_splitter; }; StringView m_content; ConstArrayView m_extra_word_chars; Iterator begin() const { return {m_content.begin(), *this}; } Iterator end() const { return {m_content.end(), *this}; } }; static ConstArrayView get_extra_word_chars(const Buffer& buffer) { return buffer.options()["extra_word_chars"].get>(); } void WordDB::add_words(StringView line, ConstArrayView extra_word_chars) { for (auto&& w : WordSplitter{line, extra_word_chars}) { auto hash = hash_value(w); auto index = m_words.find_index(w, hash); if (index >= 0) ++m_words.item(index).value.refcount; else { auto word = intern(w, hash); auto view = word->strview(); m_words.insert({view, {std::move(word), used_letters(view), 1}}, hash); } } } void WordDB::remove_words(StringView line, ConstArrayView extra_word_chars) { for (auto&& w : WordSplitter{line, extra_word_chars}) { auto it = m_words.find(w); kak_assert(it != m_words.end() and it->value.refcount > 0); if (--it->value.refcount == 0) m_words.unordered_remove(it->key); } } WordDB::WordDB(const Buffer& buffer) : m_buffer{&buffer} { buffer.options().register_watcher(*this); rebuild_db(); } WordDB::WordDB(WordDB&& other) noexcept : m_buffer{std::move(other.m_buffer)}, m_timestamp{other.m_timestamp}, m_words{std::move(other.m_words)}, m_lines{std::move(other.m_lines)} { kak_assert(m_buffer); m_buffer->options().unregister_watcher(other); other.m_buffer = nullptr; m_buffer->options().register_watcher(*this); } WordDB::~WordDB() { if (m_buffer) m_buffer->options().unregister_watcher(*this); } void WordDB::rebuild_db() { auto& buffer = *m_buffer; m_words.clear(); m_lines.clear(); m_lines.reserve((int)buffer.line_count()); auto extra_word_chars = get_extra_word_chars(buffer); for (auto line = 0_line, end = buffer.line_count(); line < end; ++line) { m_lines.push_back(buffer.line_storage(line)); add_words(m_lines.back()->strview(), extra_word_chars); } m_timestamp = buffer.timestamp(); } void WordDB::update_db() { auto& buffer = *m_buffer; auto modifs = compute_line_modifications(buffer, m_timestamp); m_timestamp = buffer.timestamp(); if (modifs.empty()) return; Lines new_lines; new_lines.reserve((int)buffer.line_count()); auto extra_word_chars = get_extra_word_chars(buffer); auto old_line = 0_line; for (auto& modif : modifs) { kak_assert(0_line <= modif.new_line and modif.new_line <= buffer.line_count()); kak_assert(modif.new_line < buffer.line_count() or modif.num_added == 0); kak_assert(old_line <= modif.old_line); while (old_line < modif.old_line) new_lines.push_back(std::move(m_lines[(int)old_line++])); kak_assert((int)new_lines.size() == (int)modif.new_line); while (old_line < modif.old_line + modif.num_removed) { kak_assert(old_line < m_lines.size()); remove_words(m_lines[(int)old_line++]->strview(), extra_word_chars); } for (auto l = 0_line; l < modif.num_added; ++l) { new_lines.push_back(buffer.line_storage(modif.new_line + l)); add_words(new_lines.back()->strview(), extra_word_chars); } } while (old_line != (int)m_lines.size()) new_lines.push_back(std::move(m_lines[(int)old_line++])); m_lines = std::move(new_lines); } void WordDB::on_option_changed(const Option& option) { if (option.name() == "extra_word_chars") rebuild_db(); } int WordDB::get_word_occurences(StringView word) const { auto it = m_words.find(word); if (it != m_words.end()) return it->value.refcount; return 0; } RankedMatchList WordDB::find_matching(StringView query) { update_db(); const UsedLetters letters = used_letters(query); RankedMatchList res; for (auto&& word : m_words) { if (RankedMatch match{word.key, word.value.letters, query, letters}) res.push_back(match); } return res; } UnitTest test_word_db{[]() { auto cmp_words = [](const RankedMatch& lhs, const RankedMatch& rhs) { return lhs.candidate() < rhs.candidate(); }; using WordList = Vector; auto eq = [](ArrayView lhs, const WordList& rhs) { return lhs.size() == rhs.size() and std::equal(lhs.begin(), lhs.end(), rhs.begin(), [](const RankedMatch& lhs, const StringView& rhs) { return lhs.candidate() == rhs; }); }; auto make_lines = [](auto&&... lines) { return BufferLines{StringData::create(lines)...}; }; Buffer buffer("test", Buffer::Flags::None, make_lines("tchou mutch\n", "tchou kanaky tchou\n", "\n", "tchaa tchaa\n", "allo\n")); WordDB word_db(buffer); auto res = word_db.find_matching(""); std::sort(res.begin(), res.end(), cmp_words); kak_assert(eq(res, WordList{ "allo", "kanaky", "mutch", "tchaa", "tchou" })); kak_assert(word_db.get_word_occurences("tchou") == 3); kak_assert(word_db.get_word_occurences("allo") == 1); buffer.erase({1, 6}, {4, 0}); res = word_db.find_matching(""); std::sort(res.begin(), res.end(), cmp_words); kak_assert(eq(res, WordList{ "allo", "mutch", "tchou" })); buffer.insert({1, 0}, "re"); res = word_db.find_matching(""); std::sort(res.begin(), res.end(), cmp_words); kak_assert(eq(res, WordList{ "allo", "mutch", "retchou", "tchou" })); }}; }