Code refactoring in WordDB
This commit is contained in:
parent
ed54e102ee
commit
79de7ee717
|
@ -93,8 +93,8 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos)
|
||||||
String current_word{begin, end};
|
String current_word{begin, end};
|
||||||
|
|
||||||
auto& word_db = get_word_db(buffer);
|
auto& word_db = get_word_db(buffer);
|
||||||
auto matches = subseq ? word_db.find_subsequence(prefix)
|
auto matches = word_db.find_matching(
|
||||||
: word_db.find_prefix(prefix);
|
prefix, subseq ? subsequence_match : prefix_match);
|
||||||
|
|
||||||
if (word_db.get_word_occurences(current_word) <= 1)
|
if (word_db.get_word_occurences(current_word) <= 1)
|
||||||
unordered_erase(matches, current_word);
|
unordered_erase(matches, current_word);
|
||||||
|
@ -106,8 +106,8 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos)
|
||||||
if (buf.get() == &buffer)
|
if (buf.get() == &buffer)
|
||||||
continue;
|
continue;
|
||||||
auto& buf_word_db = get_word_db(*buf);
|
auto& buf_word_db = get_word_db(*buf);
|
||||||
auto bufmatches = subseq ? buf_word_db.find_subsequence(prefix)
|
auto bufmatches = buf_word_db.find_matching(
|
||||||
: buf_word_db.find_prefix(prefix);
|
prefix, subseq ? subsequence_match : prefix_match);
|
||||||
std::move(bufmatches.begin(), bufmatches.end(),
|
std::move(bufmatches.begin(), bufmatches.end(),
|
||||||
std::back_inserter(matches));
|
std::back_inserter(matches));
|
||||||
}
|
}
|
||||||
|
|
|
@ -80,17 +80,17 @@ void test_word_db()
|
||||||
"tchaa tchaa\n",
|
"tchaa tchaa\n",
|
||||||
"allo\n"});
|
"allo\n"});
|
||||||
WordDB word_db(buffer);
|
WordDB word_db(buffer);
|
||||||
auto res = word_db.find_prefix("");
|
auto res = word_db.find_matching("", prefix_match);
|
||||||
std::sort(res.begin(), res.end());
|
std::sort(res.begin(), res.end());
|
||||||
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "kanaky" COMMA "mutch" COMMA "tchaa" COMMA "tchou" });
|
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "kanaky" COMMA "mutch" COMMA "tchaa" COMMA "tchou" });
|
||||||
kak_assert(word_db.get_word_occurences("tchou") == 3);
|
kak_assert(word_db.get_word_occurences("tchou") == 3);
|
||||||
kak_assert(word_db.get_word_occurences("allo") == 1);
|
kak_assert(word_db.get_word_occurences("allo") == 1);
|
||||||
buffer.erase(buffer.iterator_at({1, 6}), buffer.iterator_at({4, 0}));
|
buffer.erase(buffer.iterator_at({1, 6}), buffer.iterator_at({4, 0}));
|
||||||
res = word_db.find_prefix("");
|
res = word_db.find_matching("", prefix_match);
|
||||||
std::sort(res.begin(), res.end());
|
std::sort(res.begin(), res.end());
|
||||||
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "mutch" COMMA "tchou" });
|
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "mutch" COMMA "tchou" });
|
||||||
buffer.insert(buffer.iterator_at({1, 0}), "re");
|
buffer.insert(buffer.iterator_at({1, 0}), "re");
|
||||||
res = word_db.find_prefix("");
|
res = word_db.find_matching("", subsequence_match);
|
||||||
std::sort(res.begin(), res.end());
|
std::sort(res.begin(), res.end());
|
||||||
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" });
|
kak_assert(res == std::vector<InternedString>{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" });
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,9 +7,9 @@
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
static WordDB::UsedChars used_letters(StringView str)
|
UsedLetters used_letters(StringView str)
|
||||||
{
|
{
|
||||||
WordDB::UsedChars res;
|
UsedLetters res;
|
||||||
for (auto c : str)
|
for (auto c : str)
|
||||||
{
|
{
|
||||||
if (c >= 'a' and c <= 'z')
|
if (c >= 'a' and c <= 'z')
|
||||||
|
@ -26,7 +26,7 @@ static WordDB::UsedChars used_letters(StringView str)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::vector<InternedString> get_words(const InternedString& content)
|
static WordDB::WordList get_words(const InternedString& content)
|
||||||
{
|
{
|
||||||
std::vector<InternedString> res;
|
std::vector<InternedString> res;
|
||||||
using Iterator = utf8::iterator<const char*, utf8::InvalidPolicy::Pass>;
|
using Iterator = utf8::iterator<const char*, utf8::InvalidPolicy::Pass>;
|
||||||
|
@ -52,25 +52,25 @@ static std::vector<InternedString> get_words(const InternedString& content)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void add_words(WordDB::WordList& wl, const std::vector<InternedString>& words)
|
void WordDB::add_words(const WordList& words)
|
||||||
{
|
{
|
||||||
for (auto& w : words)
|
for (auto& w : words)
|
||||||
{
|
{
|
||||||
WordDB::WordInfo& info = wl[w];
|
WordDB::WordInfo& info = m_words[w];
|
||||||
++info.refcount;
|
++info.refcount;
|
||||||
if (info.letters.none())
|
if (info.letters.none())
|
||||||
info.letters = used_letters(w);
|
info.letters = used_letters(w);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void remove_words(WordDB::WordList& wl, const std::vector<InternedString>& words)
|
void WordDB::remove_words(const WordList& words)
|
||||||
{
|
{
|
||||||
for (auto& w : words)
|
for (auto& w : words)
|
||||||
{
|
{
|
||||||
auto it = wl.find(w);
|
auto it = m_words.find(w);
|
||||||
kak_assert(it != wl.end() and it->second.refcount > 0);
|
kak_assert(it != m_words.end() and it->second.refcount > 0);
|
||||||
if (--it->second.refcount == 0)
|
if (--it->second.refcount == 0)
|
||||||
wl.erase(it);
|
m_words.erase(it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ WordDB::WordDB(const Buffer& buffer)
|
||||||
for (auto line = 0_line, end = buffer.line_count(); line < end; ++line)
|
for (auto line = 0_line, end = buffer.line_count(); line < end; ++line)
|
||||||
{
|
{
|
||||||
m_line_to_words.push_back(get_words(buffer[line]));
|
m_line_to_words.push_back(get_words(buffer[line]));
|
||||||
add_words(m_words, m_line_to_words.back());
|
add_words(m_line_to_words.back());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,7 +112,7 @@ void WordDB::update_db()
|
||||||
while (old_line <= modif.old_line + modif.num_removed)
|
while (old_line <= modif.old_line + modif.num_removed)
|
||||||
{
|
{
|
||||||
kak_assert(old_line < m_line_to_words.size());
|
kak_assert(old_line < m_line_to_words.size());
|
||||||
remove_words(m_words, m_line_to_words[(int)old_line++]);
|
remove_words(m_line_to_words[(int)old_line++]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto l = 0_line; l <= modif.num_added; ++l)
|
for (auto l = 0_line; l <= modif.num_added; ++l)
|
||||||
|
@ -121,7 +121,7 @@ void WordDB::update_db()
|
||||||
break;
|
break;
|
||||||
|
|
||||||
new_lines.push_back(get_words(buffer[modif.new_line + l]));
|
new_lines.push_back(get_words(buffer[modif.new_line + l]));
|
||||||
add_words(m_words, new_lines.back());
|
add_words(new_lines.back());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while (old_line != (int)m_line_to_words.size())
|
while (old_line != (int)m_line_to_words.size())
|
||||||
|
@ -130,32 +130,6 @@ void WordDB::update_db()
|
||||||
m_line_to_words = std::move(new_lines);
|
m_line_to_words = std::move(new_lines);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Func>
|
|
||||||
std::vector<InternedString> find_matching(const WordDB::WordList& words, StringView str, Func func)
|
|
||||||
{
|
|
||||||
const WordDB::UsedChars letters = used_letters(str);
|
|
||||||
std::vector<InternedString> res;
|
|
||||||
for (auto&& word : words)
|
|
||||||
{
|
|
||||||
if ((letters & word.second.letters) == letters and
|
|
||||||
func(word.first, str))
|
|
||||||
res.push_back(word.first);
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<InternedString> WordDB::find_prefix(StringView prefix)
|
|
||||||
{
|
|
||||||
update_db();
|
|
||||||
return find_matching(m_words, prefix, prefix_match);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<InternedString> WordDB::find_subsequence(StringView subseq)
|
|
||||||
{
|
|
||||||
update_db();
|
|
||||||
return find_matching(m_words, subseq, subsequence_match);
|
|
||||||
}
|
|
||||||
|
|
||||||
int WordDB::get_word_occurences(StringView word) const
|
int WordDB::get_word_occurences(StringView word) const
|
||||||
{
|
{
|
||||||
auto it = m_words.find(word);
|
auto it = m_words.find(word);
|
||||||
|
|
|
@ -10,6 +10,9 @@
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
|
using UsedLetters = std::bitset<64>;
|
||||||
|
UsedLetters used_letters(StringView str);
|
||||||
|
|
||||||
// maintain a database of words available in a buffer
|
// maintain a database of words available in a buffer
|
||||||
class WordDB
|
class WordDB
|
||||||
{
|
{
|
||||||
|
@ -18,25 +21,40 @@ public:
|
||||||
WordDB(const WordDB&) = delete;
|
WordDB(const WordDB&) = delete;
|
||||||
WordDB(WordDB&&) = default;
|
WordDB(WordDB&&) = default;
|
||||||
|
|
||||||
std::vector<InternedString> find_prefix(StringView prefix);
|
using WordList = std::vector<InternedString>;
|
||||||
std::vector<InternedString> find_subsequence(StringView subsequence);
|
template<typename MatchFunc>
|
||||||
|
WordList find_matching(StringView str, MatchFunc match)
|
||||||
|
{
|
||||||
|
update_db();
|
||||||
|
const UsedLetters letters = used_letters(str);
|
||||||
|
std::vector<InternedString> res;
|
||||||
|
for (auto&& word : m_words)
|
||||||
|
{
|
||||||
|
if ((letters & word.second.letters) == letters and
|
||||||
|
match(word.first, str))
|
||||||
|
res.push_back(word.first);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
int get_word_occurences(StringView word) const;
|
int get_word_occurences(StringView word) const;
|
||||||
|
|
||||||
using UsedChars = std::bitset<64>;
|
|
||||||
struct WordInfo
|
struct WordInfo
|
||||||
{
|
{
|
||||||
UsedChars letters;
|
UsedLetters letters;
|
||||||
int refcount;
|
int refcount;
|
||||||
};
|
};
|
||||||
using WordList = UnorderedMap<InternedString, WordInfo>;
|
using WordToInfo = UnorderedMap<InternedString, WordInfo>;
|
||||||
private:
|
private:
|
||||||
using LineToWords = std::vector<std::vector<InternedString>>;
|
using LineToWords = std::vector<WordList>;
|
||||||
|
|
||||||
void update_db();
|
void update_db();
|
||||||
|
void add_words(const WordList& words);
|
||||||
|
void remove_words(const WordList& words);
|
||||||
|
|
||||||
safe_ptr<const Buffer> m_buffer;
|
safe_ptr<const Buffer> m_buffer;
|
||||||
size_t m_timestamp;
|
size_t m_timestamp;
|
||||||
WordList m_words;
|
WordToInfo m_words;
|
||||||
LineToWords m_line_to_words;
|
LineToWords m_line_to_words;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user