Word completion use a WordDB stored as a buffer value.

2014-01-16 22:07:42 +00:00 · 2014-01-16 22:07:42 +00:00 · 997aadd33f
commit 997aadd33f
parent a6b386e9b8
4 changed files with 196 additions and 15 deletions
--- a/src/input_handler.cc
+++ b/src/input_handler.cc
@ -10,6 +10,7 @@
 #include "client.hh"
 #include "color_registry.hh"
 #include "file.hh"
 #include "word_db.hh"
 #include <unordered_map>
@ -707,6 +708,15 @@ public:
    }
    using StringList = std::vector<String>;
    static WordDB& get_word_db(const Buffer& buffer)
    {
        static const ValueId word_db_id = ValueId::get_free_id();
        Value& cache_val = buffer.values()[word_db_id];
        if (not cache_val)
            cache_val = Value(WordDB{buffer});
        return cache_val.as<WordDB>();
    }
    template<bool other_buffers>
    BufferCompletion complete_word(const Buffer& buffer, BufferCoord cursor_pos)
    {
@ -721,31 +731,24 @@ public:
        if (not is_word(*begin))
            ++begin;
-        String ex = R"(\<\Q)" + String{begin, end} + R"(\E\w+\>)";
+        String prefix{begin, end};
-        Regex re(ex.begin(), ex.end());
+
        using RegexIt = boost::regex_iterator<BufferIterator>;
        std::unordered_set<String> matches;
-        for (RegexIt it(buffer.begin(), buffer.end(), re), re_end; it != re_end; ++it)
+        auto bufmatches = get_word_db(buffer).find_prefix(prefix);
-        {
+        matches.insert(bufmatches.begin(), bufmatches.end());
-            auto& match = (*it)[0];
+
            if (match.first <= pos and pos < match.second)
                continue;
            matches.insert(String{match.first, match.second});
        }
        if (other_buffers)
        {
            for (const auto& buf : BufferManager::instance())
            {
                if (buf.get() == &buffer)
                    continue;
-                for (RegexIt it(buf->begin(), buf->end(), re), re_end; it != re_end; ++it)
+                bufmatches = get_word_db(*buf).find_prefix(prefix);
-                {
+                matches.insert(bufmatches.begin(), bufmatches.end());
                    auto& match = (*it)[0];
                    matches.insert(String{match.first, match.second});
                }
            }
        }
        matches.erase(prefix);
        CandidateList result;
        std::copy(make_move_iterator(matches.begin()),
                  make_move_iterator(matches.end()),
--- a/src/unit_tests.cc
+++ b/src/unit_tests.cc
@ -2,6 +2,7 @@
 #include "buffer.hh"
 #include "keys.hh"
 #include "selectors.hh"
 #include "word_db.hh"
 using namespace Kakoune;
@ -67,6 +68,28 @@ void test_undo_group_optimizer()
        kak_assert(lines[i] == buffer[LineCount((int)i)]);
 }
 void test_word_db()
 {
    Buffer buffer("test", Buffer::Flags::None,
                  { "tchou mutch\n",
                    "tchou kanaky tchou\n",
                    "\n",
                    "tchaa tchaa\n",
                    "allo\n"});
    WordDB word_db(buffer);
    auto res = word_db.find_prefix("");
    std::sort(res.begin(), res.end());
    kak_assert(res == std::vector<String>{ "allo" COMMA "kanaky" COMMA "mutch" COMMA "tchaa" COMMA "tchou" });
    buffer.erase(buffer.iterator_at({1, 6}), buffer.iterator_at({4, 0}));
    res = word_db.find_prefix("");
    std::sort(res.begin(), res.end());
    kak_assert(res == std::vector<String>{ "allo" COMMA "mutch" COMMA "tchou" });
    buffer.insert(buffer.iterator_at({1, 0}), "re");
    res = word_db.find_prefix("");
    std::sort(res.begin(), res.end());
    kak_assert(res == std::vector<String>{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" });
 }
 void test_utf8()
 {
    String str = "maïs mélange bientôt";
@ -121,4 +144,5 @@ void run_unit_tests()
    test_keys();
    test_buffer();
    test_undo_group_optimizer();
    test_word_db();
 }
--- a/src/word_db.cc
+++ b/src/word_db.cc
@ -0,0 +1,114 @@
 #include "word_db.hh"
 #include "utils.hh"
 #include "utf8_iterator.hh"
 namespace Kakoune
 {
 WordDB::WordDB(const Buffer& buffer)
    : BufferChangeListener_AutoRegister{const_cast<Buffer&>(buffer)}
 {
    for (auto line = 0_line, end = buffer.line_count(); line < end; ++line)
        add_words(line, buffer[line]);
 }
 void WordDB::add_words(LineCount line, const String& content)
 {
    using Iterator = utf8::utf8_iterator<String::const_iterator,
                                         utf8::InvalidBytePolicy::Pass>;
    auto word_start = content.begin();
    bool in_word = false;
    for (Iterator it{word_start}, end{content.end()}; it != end; ++it)
    {
        Codepoint c = *it;
        const bool word = is_word(c);
        if (not in_word and word)
        {
            word_start = it.base();
            in_word = true;
        }
        else if (in_word and not word)
        {
            String w{word_start, it.base()};
            m_word_to_lines[w].push_back(line);
            m_line_to_words[line].push_back(w);
            in_word = false;
        }
    }
 }
 WordDB::LineToWords::iterator WordDB::remove_line(LineToWords::iterator it)
 {
    if (it == m_line_to_words.end())
        return it;
    for (auto& word : it->second)
    {
        auto wtl_it = m_word_to_lines.find(word);
        auto& lines = wtl_it->second;
        lines.erase(find(lines, it->first));
        if (lines.empty())
            m_word_to_lines.erase(wtl_it);
    }
    return m_line_to_words.erase(it);
 }
 void WordDB::update_lines(LineToWords::iterator begin, LineToWords::iterator end,
                          LineCount num)
 {
    std::vector<std::pair<LineCount, std::vector<String>>>
        to_update{std::make_move_iterator(begin), std::make_move_iterator(end)};
    m_line_to_words.erase(begin, end);
    for (auto& elem : to_update)
    {
        for (auto& word : elem.second)
        {
            auto& lines = m_word_to_lines[word];
            *find(lines, elem.first) += num;
        }
        elem.first += num;
    }
    m_line_to_words.insert(std::make_move_iterator(to_update.begin()),
                           std::make_move_iterator(to_update.end()));
 }
 void WordDB::on_insert(const Buffer& buffer, BufferCoord begin, BufferCoord end)
 {
    auto num = end.line - begin.line;
    if (num > 0)
        update_lines(m_line_to_words.upper_bound(begin.line),
                     m_line_to_words.end(), num);
    remove_line(m_line_to_words.find(begin.line));
    for (auto line = begin.line; line <= end.line; ++line)
        add_words(line, buffer[line]);
 }
 void WordDB::on_erase(const Buffer& buffer, BufferCoord begin, BufferCoord end)
 {
    auto first = m_line_to_words.lower_bound(begin.line);
    auto last = m_line_to_words.upper_bound(end.line);
    while (first != last)
        first = remove_line(first);
    auto num = end.line - begin.line;
    if (num > 0)
        update_lines(last, m_line_to_words.end(), -num);
    add_words(begin.line, buffer[begin.line]);
 }
 std::vector<String> WordDB::find_prefix(const String& prefix) const
 {
    std::vector<String> res;
    for (auto& word : m_word_to_lines)
    {
        if (prefix_match(word.first, prefix))
            res.push_back(word.first);
    }
    return res;
 }
 }
--- a/src/word_db.hh
+++ b/src/word_db.hh
@ -0,0 +1,40 @@
 #ifndef word_db_hh_INCLUDED
 #define word_db_hh_INCLUDED
 #include "buffer.hh"
 #include <set>
 namespace Kakoune
 {
 class String;
 // maintain a database of words available in a buffer
 class WordDB : public BufferChangeListener_AutoRegister
 {
 public:
    WordDB(const Buffer& buffer);
    void on_insert(const Buffer& buffer, BufferCoord begin, BufferCoord end) override;
    void on_erase(const Buffer& buffer, BufferCoord begin, BufferCoord end) override;
    std::vector<String> find_prefix(const String& prefix) const;
 private:
    using WordToLines = std::unordered_map<String, std::vector<LineCount>>;
    using LineToWords = std::map<LineCount, std::vector<String>>;
    void add_words(LineCount line, const String& content);
    LineToWords::iterator remove_line(LineToWords::iterator it);
    void update_lines(LineToWords::iterator begin, LineToWords::iterator end,
                      LineCount num);
    WordToLines m_word_to_lines;
    LineToWords m_line_to_words;
 };
 }
 #endif // word_db_hh_INCLUDED