Refactor word_db, use an unordered map
This commit is contained in:
parent
c2c980c484
commit
fa886ffaac
|
@ -7,6 +7,25 @@
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
|
static WordDB::UsedChars used_letters(StringView str)
|
||||||
|
{
|
||||||
|
WordDB::UsedChars res;
|
||||||
|
for (auto c : str)
|
||||||
|
{
|
||||||
|
if (c >= 'a' and c <= 'z')
|
||||||
|
res.set(c - 'a');
|
||||||
|
else if (c >= 'A' and c <= 'Z')
|
||||||
|
res.set(c - 'A' + 26);
|
||||||
|
else if (c == '_')
|
||||||
|
res.set(53);
|
||||||
|
else if (c == '-')
|
||||||
|
res.set(54);
|
||||||
|
else
|
||||||
|
res.set(63);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
static std::vector<InternedString> get_words(const InternedString& content)
|
static std::vector<InternedString> get_words(const InternedString& content)
|
||||||
{
|
{
|
||||||
std::vector<InternedString> res;
|
std::vector<InternedString> res;
|
||||||
|
@ -36,7 +55,12 @@ static std::vector<InternedString> get_words(const InternedString& content)
|
||||||
static void add_words(WordDB::WordList& wl, const std::vector<InternedString>& words)
|
static void add_words(WordDB::WordList& wl, const std::vector<InternedString>& words)
|
||||||
{
|
{
|
||||||
for (auto& w : words)
|
for (auto& w : words)
|
||||||
++wl[w];
|
{
|
||||||
|
WordDB::WordInfo& info = wl[w];
|
||||||
|
++info.refcount;
|
||||||
|
if (info.letters.none())
|
||||||
|
info.letters = used_letters(w);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void remove_words(WordDB::WordList& wl, const std::vector<InternedString>& words)
|
static void remove_words(WordDB::WordList& wl, const std::vector<InternedString>& words)
|
||||||
|
@ -44,8 +68,8 @@ static void remove_words(WordDB::WordList& wl, const std::vector<InternedString>
|
||||||
for (auto& w : words)
|
for (auto& w : words)
|
||||||
{
|
{
|
||||||
auto it = wl.find(w);
|
auto it = wl.find(w);
|
||||||
kak_assert(it != wl.end() and it->second > 0);
|
kak_assert(it != wl.end() and it->second.refcount > 0);
|
||||||
if (--it->second == 0)
|
if (--it->second.refcount == 0)
|
||||||
wl.erase(it);
|
wl.erase(it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -106,38 +130,39 @@ void WordDB::update_db()
|
||||||
m_line_to_words = std::move(new_lines);
|
m_line_to_words = std::move(new_lines);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<InternedString> WordDB::find_prefix(StringView prefix)
|
template<typename Func>
|
||||||
|
std::vector<InternedString> find_matching(const WordDB::WordList& words, StringView str, Func func)
|
||||||
{
|
{
|
||||||
update_db();
|
WordDB::UsedChars letters = used_letters(str);
|
||||||
|
|
||||||
std::vector<InternedString> res;
|
std::vector<InternedString> res;
|
||||||
for (auto it = m_words.lower_bound(prefix); it != m_words.end(); ++it)
|
for (auto&& word : words)
|
||||||
{
|
{
|
||||||
if (not prefix_match(it->first, prefix))
|
if ((letters & word.second.letters) != letters)
|
||||||
break;
|
continue;
|
||||||
res.push_back(it->first);
|
if (func(word.first, str))
|
||||||
|
res.push_back(word.first);
|
||||||
}
|
}
|
||||||
|
std::sort(res.begin(), res.end());
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<InternedString> WordDB::find_subsequence(StringView subsequence)
|
std::vector<InternedString> WordDB::find_prefix(StringView prefix)
|
||||||
{
|
{
|
||||||
update_db();
|
update_db();
|
||||||
|
return find_matching(m_words, prefix, prefix_match);
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<InternedString> res;
|
std::vector<InternedString> WordDB::find_subsequence(StringView subseq)
|
||||||
for (auto it = m_words.begin(); it != m_words.end(); ++it)
|
{
|
||||||
{
|
update_db();
|
||||||
if (subsequence_match(it->first, subsequence))
|
return find_matching(m_words, subseq, subsequence_match);
|
||||||
res.push_back(it->first);
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int WordDB::get_word_occurences(StringView word) const
|
int WordDB::get_word_occurences(StringView word) const
|
||||||
{
|
{
|
||||||
auto it = m_words.find(word);
|
auto it = m_words.find(word);
|
||||||
if (it != m_words.end())
|
if (it != m_words.end())
|
||||||
return it->second;
|
return it->second.refcount;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#include "interned_string.hh"
|
#include "interned_string.hh"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <bitset>
|
||||||
|
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
@ -14,12 +15,20 @@ class WordDB
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
WordDB(const Buffer& buffer);
|
WordDB(const Buffer& buffer);
|
||||||
|
WordDB(const WordDB&) { kak_assert(false); }
|
||||||
|
WordDB(WordDB&&) = default;
|
||||||
|
|
||||||
std::vector<InternedString> find_prefix(StringView prefix);
|
std::vector<InternedString> find_prefix(StringView prefix);
|
||||||
std::vector<InternedString> find_subsequence(StringView subsequence);
|
std::vector<InternedString> find_subsequence(StringView subsequence);
|
||||||
int get_word_occurences(StringView word) const;
|
int get_word_occurences(StringView word) const;
|
||||||
|
|
||||||
using WordList = std::map<InternedString, int>;
|
using UsedChars = std::bitset<64>;
|
||||||
|
struct WordInfo
|
||||||
|
{
|
||||||
|
UsedChars letters;
|
||||||
|
int refcount;
|
||||||
|
};
|
||||||
|
using WordList = std::unordered_map<InternedString, WordInfo>;
|
||||||
private:
|
private:
|
||||||
using LineToWords = std::vector<std::vector<InternedString>>;
|
using LineToWords = std::vector<std::vector<InternedString>>;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user