Code refactoring in WordDB

This commit is contained in:
Maxime Coste 2014-12-23 19:32:42 +00:00
parent ed54e102ee
commit 79de7ee717
4 changed files with 44 additions and 52 deletions

View File

@ -93,8 +93,8 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos)
String current_word{begin, end}; String current_word{begin, end};
auto& word_db = get_word_db(buffer); auto& word_db = get_word_db(buffer);
auto matches = subseq ? word_db.find_subsequence(prefix) auto matches = word_db.find_matching(
: word_db.find_prefix(prefix); prefix, subseq ? subsequence_match : prefix_match);
if (word_db.get_word_occurences(current_word) <= 1) if (word_db.get_word_occurences(current_word) <= 1)
unordered_erase(matches, current_word); unordered_erase(matches, current_word);
@ -106,8 +106,8 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos)
if (buf.get() == &buffer) if (buf.get() == &buffer)
continue; continue;
auto& buf_word_db = get_word_db(*buf); auto& buf_word_db = get_word_db(*buf);
auto bufmatches = subseq ? buf_word_db.find_subsequence(prefix) auto bufmatches = buf_word_db.find_matching(
: buf_word_db.find_prefix(prefix); prefix, subseq ? subsequence_match : prefix_match);
std::move(bufmatches.begin(), bufmatches.end(), std::move(bufmatches.begin(), bufmatches.end(),
std::back_inserter(matches)); std::back_inserter(matches));
} }

View File

@ -80,17 +80,17 @@ void test_word_db()
"tchaa tchaa\n", "tchaa tchaa\n",
"allo\n"}); "allo\n"});
WordDB word_db(buffer); WordDB word_db(buffer);
auto res = word_db.find_prefix(""); auto res = word_db.find_matching("", prefix_match);
std::sort(res.begin(), res.end()); std::sort(res.begin(), res.end());
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "kanaky" COMMA "mutch" COMMA "tchaa" COMMA "tchou" }); kak_assert(res == std::vector<InternedString>{ "allo" COMMA "kanaky" COMMA "mutch" COMMA "tchaa" COMMA "tchou" });
kak_assert(word_db.get_word_occurences("tchou") == 3); kak_assert(word_db.get_word_occurences("tchou") == 3);
kak_assert(word_db.get_word_occurences("allo") == 1); kak_assert(word_db.get_word_occurences("allo") == 1);
buffer.erase(buffer.iterator_at({1, 6}), buffer.iterator_at({4, 0})); buffer.erase(buffer.iterator_at({1, 6}), buffer.iterator_at({4, 0}));
res = word_db.find_prefix(""); res = word_db.find_matching("", prefix_match);
std::sort(res.begin(), res.end()); std::sort(res.begin(), res.end());
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "mutch" COMMA "tchou" }); kak_assert(res == std::vector<InternedString>{ "allo" COMMA "mutch" COMMA "tchou" });
buffer.insert(buffer.iterator_at({1, 0}), "re"); buffer.insert(buffer.iterator_at({1, 0}), "re");
res = word_db.find_prefix(""); res = word_db.find_matching("", subsequence_match);
std::sort(res.begin(), res.end()); std::sort(res.begin(), res.end());
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" }); kak_assert(res == std::vector<InternedString>{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" });
} }

View File

@ -7,9 +7,9 @@
namespace Kakoune namespace Kakoune
{ {
static WordDB::UsedChars used_letters(StringView str) UsedLetters used_letters(StringView str)
{ {
WordDB::UsedChars res; UsedLetters res;
for (auto c : str) for (auto c : str)
{ {
if (c >= 'a' and c <= 'z') if (c >= 'a' and c <= 'z')
@ -26,7 +26,7 @@ static WordDB::UsedChars used_letters(StringView str)
return res; return res;
} }
static std::vector<InternedString> get_words(const InternedString& content) static WordDB::WordList get_words(const InternedString& content)
{ {
std::vector<InternedString> res; std::vector<InternedString> res;
using Iterator = utf8::iterator<const char*, utf8::InvalidPolicy::Pass>; using Iterator = utf8::iterator<const char*, utf8::InvalidPolicy::Pass>;
@ -52,25 +52,25 @@ static std::vector<InternedString> get_words(const InternedString& content)
return res; return res;
} }
static void add_words(WordDB::WordList& wl, const std::vector<InternedString>& words) void WordDB::add_words(const WordList& words)
{ {
for (auto& w : words) for (auto& w : words)
{ {
WordDB::WordInfo& info = wl[w]; WordDB::WordInfo& info = m_words[w];
++info.refcount; ++info.refcount;
if (info.letters.none()) if (info.letters.none())
info.letters = used_letters(w); info.letters = used_letters(w);
} }
} }
static void remove_words(WordDB::WordList& wl, const std::vector<InternedString>& words) void WordDB::remove_words(const WordList& words)
{ {
for (auto& w : words) for (auto& w : words)
{ {
auto it = wl.find(w); auto it = m_words.find(w);
kak_assert(it != wl.end() and it->second.refcount > 0); kak_assert(it != m_words.end() and it->second.refcount > 0);
if (--it->second.refcount == 0) if (--it->second.refcount == 0)
wl.erase(it); m_words.erase(it);
} }
} }
@ -81,7 +81,7 @@ WordDB::WordDB(const Buffer& buffer)
for (auto line = 0_line, end = buffer.line_count(); line < end; ++line) for (auto line = 0_line, end = buffer.line_count(); line < end; ++line)
{ {
m_line_to_words.push_back(get_words(buffer[line])); m_line_to_words.push_back(get_words(buffer[line]));
add_words(m_words, m_line_to_words.back()); add_words(m_line_to_words.back());
} }
} }
@ -112,7 +112,7 @@ void WordDB::update_db()
while (old_line <= modif.old_line + modif.num_removed) while (old_line <= modif.old_line + modif.num_removed)
{ {
kak_assert(old_line < m_line_to_words.size()); kak_assert(old_line < m_line_to_words.size());
remove_words(m_words, m_line_to_words[(int)old_line++]); remove_words(m_line_to_words[(int)old_line++]);
} }
for (auto l = 0_line; l <= modif.num_added; ++l) for (auto l = 0_line; l <= modif.num_added; ++l)
@ -121,7 +121,7 @@ void WordDB::update_db()
break; break;
new_lines.push_back(get_words(buffer[modif.new_line + l])); new_lines.push_back(get_words(buffer[modif.new_line + l]));
add_words(m_words, new_lines.back()); add_words(new_lines.back());
} }
} }
while (old_line != (int)m_line_to_words.size()) while (old_line != (int)m_line_to_words.size())
@ -130,32 +130,6 @@ void WordDB::update_db()
m_line_to_words = std::move(new_lines); m_line_to_words = std::move(new_lines);
} }
template<typename Func>
std::vector<InternedString> find_matching(const WordDB::WordList& words, StringView str, Func func)
{
const WordDB::UsedChars letters = used_letters(str);
std::vector<InternedString> res;
for (auto&& word : words)
{
if ((letters & word.second.letters) == letters and
func(word.first, str))
res.push_back(word.first);
}
return res;
}
std::vector<InternedString> WordDB::find_prefix(StringView prefix)
{
update_db();
return find_matching(m_words, prefix, prefix_match);
}
std::vector<InternedString> WordDB::find_subsequence(StringView subseq)
{
update_db();
return find_matching(m_words, subseq, subsequence_match);
}
int WordDB::get_word_occurences(StringView word) const int WordDB::get_word_occurences(StringView word) const
{ {
auto it = m_words.find(word); auto it = m_words.find(word);

View File

@ -10,6 +10,9 @@
namespace Kakoune namespace Kakoune
{ {
using UsedLetters = std::bitset<64>;
UsedLetters used_letters(StringView str);
// maintain a database of words available in a buffer // maintain a database of words available in a buffer
class WordDB class WordDB
{ {
@ -18,25 +21,40 @@ public:
WordDB(const WordDB&) = delete; WordDB(const WordDB&) = delete;
WordDB(WordDB&&) = default; WordDB(WordDB&&) = default;
std::vector<InternedString> find_prefix(StringView prefix); using WordList = std::vector<InternedString>;
std::vector<InternedString> find_subsequence(StringView subsequence); template<typename MatchFunc>
WordList find_matching(StringView str, MatchFunc match)
{
update_db();
const UsedLetters letters = used_letters(str);
std::vector<InternedString> res;
for (auto&& word : m_words)
{
if ((letters & word.second.letters) == letters and
match(word.first, str))
res.push_back(word.first);
}
return res;
}
int get_word_occurences(StringView word) const; int get_word_occurences(StringView word) const;
using UsedChars = std::bitset<64>;
struct WordInfo struct WordInfo
{ {
UsedChars letters; UsedLetters letters;
int refcount; int refcount;
}; };
using WordList = UnorderedMap<InternedString, WordInfo>; using WordToInfo = UnorderedMap<InternedString, WordInfo>;
private: private:
using LineToWords = std::vector<std::vector<InternedString>>; using LineToWords = std::vector<WordList>;
void update_db(); void update_db();
void add_words(const WordList& words);
void remove_words(const WordList& words);
safe_ptr<const Buffer> m_buffer; safe_ptr<const Buffer> m_buffer;
size_t m_timestamp; size_t m_timestamp;
WordList m_words; WordToInfo m_words;
LineToWords m_line_to_words; LineToWords m_line_to_words;
}; };