From 997aadd33fc0efada5011e95f99df87b8b6b88b0 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Thu, 16 Jan 2014 22:07:42 +0000 Subject: [PATCH] Word completion use a WordDB stored as a buffer value. --- src/input_handler.cc | 33 +++++++------ src/unit_tests.cc | 24 +++++++++ src/word_db.cc | 114 +++++++++++++++++++++++++++++++++++++++++++ src/word_db.hh | 40 +++++++++++++++ 4 files changed, 196 insertions(+), 15 deletions(-) create mode 100644 src/word_db.cc create mode 100644 src/word_db.hh diff --git a/src/input_handler.cc b/src/input_handler.cc index 30c31413..99121ba9 100644 --- a/src/input_handler.cc +++ b/src/input_handler.cc @@ -10,6 +10,7 @@ #include "client.hh" #include "color_registry.hh" #include "file.hh" +#include "word_db.hh" #include @@ -707,6 +708,15 @@ public: } using StringList = std::vector; + static WordDB& get_word_db(const Buffer& buffer) + { + static const ValueId word_db_id = ValueId::get_free_id(); + Value& cache_val = buffer.values()[word_db_id]; + if (not cache_val) + cache_val = Value(WordDB{buffer}); + return cache_val.as(); + } + template BufferCompletion complete_word(const Buffer& buffer, BufferCoord cursor_pos) { @@ -721,31 +731,24 @@ public: if (not is_word(*begin)) ++begin; - String ex = R"(\<\Q)" + String{begin, end} + R"(\E\w+\>)"; - Regex re(ex.begin(), ex.end()); - using RegexIt = boost::regex_iterator; + String prefix{begin, end}; + std::unordered_set matches; - for (RegexIt it(buffer.begin(), buffer.end(), re), re_end; it != re_end; ++it) - { - auto& match = (*it)[0]; - if (match.first <= pos and pos < match.second) - continue; - matches.insert(String{match.first, match.second}); - } + auto bufmatches = get_word_db(buffer).find_prefix(prefix); + matches.insert(bufmatches.begin(), bufmatches.end()); + if (other_buffers) { for (const auto& buf : BufferManager::instance()) { if (buf.get() == &buffer) continue; - for (RegexIt it(buf->begin(), buf->end(), re), re_end; it != re_end; ++it) - { - auto& match = (*it)[0]; - matches.insert(String{match.first, match.second}); - } + bufmatches = get_word_db(*buf).find_prefix(prefix); + matches.insert(bufmatches.begin(), bufmatches.end()); } } + matches.erase(prefix); CandidateList result; std::copy(make_move_iterator(matches.begin()), make_move_iterator(matches.end()), diff --git a/src/unit_tests.cc b/src/unit_tests.cc index 3b2b2960..66fe53f2 100644 --- a/src/unit_tests.cc +++ b/src/unit_tests.cc @@ -2,6 +2,7 @@ #include "buffer.hh" #include "keys.hh" #include "selectors.hh" +#include "word_db.hh" using namespace Kakoune; @@ -67,6 +68,28 @@ void test_undo_group_optimizer() kak_assert(lines[i] == buffer[LineCount((int)i)]); } +void test_word_db() +{ + Buffer buffer("test", Buffer::Flags::None, + { "tchou mutch\n", + "tchou kanaky tchou\n", + "\n", + "tchaa tchaa\n", + "allo\n"}); + WordDB word_db(buffer); + auto res = word_db.find_prefix(""); + std::sort(res.begin(), res.end()); + kak_assert(res == std::vector{ "allo" COMMA "kanaky" COMMA "mutch" COMMA "tchaa" COMMA "tchou" }); + buffer.erase(buffer.iterator_at({1, 6}), buffer.iterator_at({4, 0})); + res = word_db.find_prefix(""); + std::sort(res.begin(), res.end()); + kak_assert(res == std::vector{ "allo" COMMA "mutch" COMMA "tchou" }); + buffer.insert(buffer.iterator_at({1, 0}), "re"); + res = word_db.find_prefix(""); + std::sort(res.begin(), res.end()); + kak_assert(res == std::vector{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" }); +} + void test_utf8() { String str = "maïs mélange bientôt"; @@ -121,4 +144,5 @@ void run_unit_tests() test_keys(); test_buffer(); test_undo_group_optimizer(); + test_word_db(); } diff --git a/src/word_db.cc b/src/word_db.cc new file mode 100644 index 00000000..c64ae01a --- /dev/null +++ b/src/word_db.cc @@ -0,0 +1,114 @@ +#include "word_db.hh" + +#include "utils.hh" +#include "utf8_iterator.hh" + +namespace Kakoune +{ + +WordDB::WordDB(const Buffer& buffer) + : BufferChangeListener_AutoRegister{const_cast(buffer)} +{ + for (auto line = 0_line, end = buffer.line_count(); line < end; ++line) + add_words(line, buffer[line]); +} + +void WordDB::add_words(LineCount line, const String& content) +{ + using Iterator = utf8::utf8_iterator; + auto word_start = content.begin(); + bool in_word = false; + for (Iterator it{word_start}, end{content.end()}; it != end; ++it) + { + Codepoint c = *it; + const bool word = is_word(c); + if (not in_word and word) + { + word_start = it.base(); + in_word = true; + } + else if (in_word and not word) + { + String w{word_start, it.base()}; + m_word_to_lines[w].push_back(line); + m_line_to_words[line].push_back(w); + in_word = false; + } + } +} + +WordDB::LineToWords::iterator WordDB::remove_line(LineToWords::iterator it) +{ + if (it == m_line_to_words.end()) + return it; + + for (auto& word : it->second) + { + auto wtl_it = m_word_to_lines.find(word); + auto& lines = wtl_it->second; + lines.erase(find(lines, it->first)); + if (lines.empty()) + m_word_to_lines.erase(wtl_it); + } + return m_line_to_words.erase(it); +} + +void WordDB::update_lines(LineToWords::iterator begin, LineToWords::iterator end, + LineCount num) +{ + std::vector>> + to_update{std::make_move_iterator(begin), std::make_move_iterator(end)}; + m_line_to_words.erase(begin, end); + + for (auto& elem : to_update) + { + for (auto& word : elem.second) + { + auto& lines = m_word_to_lines[word]; + *find(lines, elem.first) += num; + } + elem.first += num; + } + m_line_to_words.insert(std::make_move_iterator(to_update.begin()), + std::make_move_iterator(to_update.end())); +} + +void WordDB::on_insert(const Buffer& buffer, BufferCoord begin, BufferCoord end) +{ + auto num = end.line - begin.line; + if (num > 0) + update_lines(m_line_to_words.upper_bound(begin.line), + m_line_to_words.end(), num); + + remove_line(m_line_to_words.find(begin.line)); + for (auto line = begin.line; line <= end.line; ++line) + add_words(line, buffer[line]); +} + +void WordDB::on_erase(const Buffer& buffer, BufferCoord begin, BufferCoord end) +{ + auto first = m_line_to_words.lower_bound(begin.line); + auto last = m_line_to_words.upper_bound(end.line); + while (first != last) + first = remove_line(first); + + auto num = end.line - begin.line; + if (num > 0) + update_lines(last, m_line_to_words.end(), -num); + + add_words(begin.line, buffer[begin.line]); +} + +std::vector WordDB::find_prefix(const String& prefix) const +{ + std::vector res; + for (auto& word : m_word_to_lines) + { + if (prefix_match(word.first, prefix)) + res.push_back(word.first); + } + return res; +} + +} diff --git a/src/word_db.hh b/src/word_db.hh new file mode 100644 index 00000000..c7f8a83d --- /dev/null +++ b/src/word_db.hh @@ -0,0 +1,40 @@ +#ifndef word_db_hh_INCLUDED +#define word_db_hh_INCLUDED + +#include "buffer.hh" + +#include + +namespace Kakoune +{ + +class String; + +// maintain a database of words available in a buffer +class WordDB : public BufferChangeListener_AutoRegister +{ +public: + WordDB(const Buffer& buffer); + + void on_insert(const Buffer& buffer, BufferCoord begin, BufferCoord end) override; + void on_erase(const Buffer& buffer, BufferCoord begin, BufferCoord end) override; + + std::vector find_prefix(const String& prefix) const; + +private: + using WordToLines = std::unordered_map>; + using LineToWords = std::map>; + + void add_words(LineCount line, const String& content); + LineToWords::iterator remove_line(LineToWords::iterator it); + void update_lines(LineToWords::iterator begin, LineToWords::iterator end, + LineCount num); + + WordToLines m_word_to_lines; + LineToWords m_line_to_words; +}; + +} + +#endif // word_db_hh_INCLUDED +