2014-01-16 23:07:42 +01:00
|
|
|
#include "word_db.hh"
|
|
|
|
|
2019-01-24 11:02:07 +01:00
|
|
|
#include "buffer.hh"
|
2014-05-26 22:00:26 +02:00
|
|
|
#include "line_modification.hh"
|
2018-05-26 02:01:26 +02:00
|
|
|
#include "option_types.hh"
|
2015-05-22 14:58:56 +02:00
|
|
|
#include "unit_tests.hh"
|
2019-01-24 11:02:07 +01:00
|
|
|
#include "utils.hh"
|
|
|
|
#include "value.hh"
|
2014-01-16 23:07:42 +01:00
|
|
|
|
|
|
|
namespace Kakoune
|
|
|
|
{
|
|
|
|
|
2018-04-15 05:21:57 +02:00
|
|
|
WordDB& get_word_db(const Buffer& buffer)
|
|
|
|
{
|
|
|
|
static const ValueId word_db_id = get_free_value_id();
|
|
|
|
Value& cache_val = buffer.values()[word_db_id];
|
|
|
|
if (not cache_val)
|
|
|
|
cache_val = Value(WordDB{buffer});
|
|
|
|
return cache_val.as<WordDB>();
|
|
|
|
}
|
|
|
|
|
2018-07-26 15:05:00 +02:00
|
|
|
struct WordSplitter
|
2014-01-16 23:07:42 +01:00
|
|
|
{
|
2019-01-24 11:02:07 +01:00
|
|
|
struct Iterator
|
2014-01-16 23:07:42 +01:00
|
|
|
{
|
2018-07-26 15:05:00 +02:00
|
|
|
Iterator(const char* begin, const WordSplitter& splitter)
|
|
|
|
: m_word_begin{begin}, m_word_end{begin}, m_splitter{&splitter}
|
|
|
|
{ operator++(); }
|
|
|
|
|
|
|
|
StringView operator*() const { return {m_word_begin, m_word_end}; }
|
|
|
|
|
|
|
|
Iterator& operator++()
|
2014-01-16 23:07:42 +01:00
|
|
|
{
|
2018-07-26 15:05:00 +02:00
|
|
|
const auto* end = m_splitter->m_content.end();
|
2018-11-27 08:13:29 +01:00
|
|
|
auto extra_chars = m_splitter->m_extra_word_chars;
|
2018-07-26 15:05:00 +02:00
|
|
|
|
2019-03-22 08:03:49 +01:00
|
|
|
while (true)
|
|
|
|
{
|
|
|
|
m_word_begin = m_word_end;
|
|
|
|
while (m_word_begin != end and not is_word(utf8::codepoint(m_word_begin, end), extra_chars))
|
|
|
|
utf8::to_next(m_word_begin, end);
|
|
|
|
m_word_end = m_word_begin;
|
|
|
|
CharCount word_len = 0;
|
|
|
|
while (m_word_end != end and is_word(utf8::codepoint(m_word_end, end), extra_chars))
|
|
|
|
{
|
|
|
|
utf8::to_next(m_word_end, end);
|
|
|
|
++word_len;
|
|
|
|
}
|
2020-11-07 00:19:11 +01:00
|
|
|
if (m_word_begin == end or word_len < WordDB::max_word_len)
|
2019-03-22 08:03:49 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2018-07-26 15:05:00 +02:00
|
|
|
return *this;
|
2014-01-16 23:07:42 +01:00
|
|
|
}
|
2018-07-26 15:05:00 +02:00
|
|
|
|
|
|
|
friend bool operator==(const Iterator& lhs, const Iterator& rhs)
|
|
|
|
{ return lhs.m_word_begin == rhs.m_word_begin and lhs.m_word_end == rhs.m_word_end; }
|
|
|
|
|
|
|
|
friend bool operator!=(const Iterator& lhs, const Iterator& rhs)
|
|
|
|
{ return not (lhs == rhs); }
|
|
|
|
|
|
|
|
const char* m_word_begin;
|
|
|
|
const char* m_word_end;
|
|
|
|
const WordSplitter* m_splitter;
|
|
|
|
};
|
|
|
|
|
|
|
|
StringView m_content;
|
|
|
|
ConstArrayView<Codepoint> m_extra_word_chars;
|
|
|
|
|
|
|
|
Iterator begin() const { return {m_content.begin(), *this}; }
|
|
|
|
Iterator end() const { return {m_content.end(), *this}; }
|
|
|
|
};
|
2014-01-16 23:07:42 +01:00
|
|
|
|
2017-06-26 15:39:17 +02:00
|
|
|
static ConstArrayView<Codepoint> get_extra_word_chars(const Buffer& buffer)
|
2016-08-21 21:25:11 +02:00
|
|
|
{
|
2017-06-26 16:28:41 +02:00
|
|
|
return buffer.options()["extra_word_chars"].get<Vector<Codepoint, MemoryDomain::Options>>();
|
2016-08-21 21:25:11 +02:00
|
|
|
}
|
|
|
|
|
2019-08-31 05:14:46 +02:00
|
|
|
void WordDB::add_words(StringView line, ConstArrayView<Codepoint> extra_word_chars)
|
2014-01-16 23:07:42 +01:00
|
|
|
{
|
2019-08-31 05:14:46 +02:00
|
|
|
for (auto&& w : WordSplitter{line, extra_word_chars})
|
2014-10-28 20:23:02 +01:00
|
|
|
{
|
2016-02-05 10:36:07 +01:00
|
|
|
auto it = m_words.find(w);
|
2017-02-23 01:26:24 +01:00
|
|
|
if (it != m_words.end())
|
2017-03-07 01:30:54 +01:00
|
|
|
++it->value.refcount;
|
2017-02-23 01:26:24 +01:00
|
|
|
else
|
2016-02-05 10:36:07 +01:00
|
|
|
{
|
|
|
|
auto word = intern(w);
|
2017-02-23 01:26:24 +01:00
|
|
|
auto view = word->strview();
|
|
|
|
m_words.insert({view, {std::move(word), used_letters(view), 1}});
|
2016-02-05 10:36:07 +01:00
|
|
|
}
|
2014-10-28 20:23:02 +01:00
|
|
|
}
|
2014-01-24 01:56:33 +01:00
|
|
|
}
|
2014-01-16 23:07:42 +01:00
|
|
|
|
2019-08-31 05:14:46 +02:00
|
|
|
void WordDB::remove_words(StringView line, ConstArrayView<Codepoint> extra_word_chars)
|
2014-01-24 01:56:33 +01:00
|
|
|
{
|
2019-08-31 05:14:46 +02:00
|
|
|
for (auto&& w : WordSplitter{line, extra_word_chars})
|
2014-01-16 23:07:42 +01:00
|
|
|
{
|
2016-02-05 10:36:07 +01:00
|
|
|
auto it = m_words.find(w);
|
2017-03-07 01:30:54 +01:00
|
|
|
kak_assert(it != m_words.end() and it->value.refcount > 0);
|
|
|
|
if (--it->value.refcount == 0)
|
|
|
|
m_words.unordered_remove(it->key);
|
2014-01-16 23:07:42 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-24 01:56:33 +01:00
|
|
|
WordDB::WordDB(const Buffer& buffer)
|
2016-08-21 21:25:11 +02:00
|
|
|
: m_buffer{&buffer}
|
|
|
|
{
|
|
|
|
buffer.options().register_watcher(*this);
|
|
|
|
rebuild_db();
|
|
|
|
}
|
|
|
|
|
2017-03-17 00:08:10 +01:00
|
|
|
WordDB::WordDB(WordDB&& other) noexcept
|
2016-08-21 21:25:11 +02:00
|
|
|
: m_buffer{std::move(other.m_buffer)},
|
2017-02-23 01:35:27 +01:00
|
|
|
m_timestamp{other.m_timestamp},
|
2016-08-21 21:25:11 +02:00
|
|
|
m_words{std::move(other.m_words)},
|
2017-02-23 01:35:27 +01:00
|
|
|
m_lines{std::move(other.m_lines)}
|
2016-08-21 21:25:11 +02:00
|
|
|
{
|
|
|
|
kak_assert(m_buffer);
|
|
|
|
m_buffer->options().unregister_watcher(other);
|
|
|
|
other.m_buffer = nullptr;
|
|
|
|
|
|
|
|
m_buffer->options().register_watcher(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
WordDB::~WordDB()
|
2014-01-16 23:07:42 +01:00
|
|
|
{
|
2016-08-21 21:25:11 +02:00
|
|
|
if (m_buffer)
|
|
|
|
m_buffer->options().unregister_watcher(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
void WordDB::rebuild_db()
|
|
|
|
{
|
|
|
|
auto& buffer = *m_buffer;
|
|
|
|
|
|
|
|
m_words.clear();
|
|
|
|
m_lines.clear();
|
2015-01-15 14:58:55 +01:00
|
|
|
m_lines.reserve((int)buffer.line_count());
|
2019-08-31 05:14:46 +02:00
|
|
|
auto extra_word_chars = get_extra_word_chars(buffer);
|
2014-01-24 01:56:33 +01:00
|
|
|
for (auto line = 0_line, end = buffer.line_count(); line < end; ++line)
|
2014-01-16 23:07:42 +01:00
|
|
|
{
|
2015-01-27 14:11:32 +01:00
|
|
|
m_lines.push_back(buffer.line_storage(line));
|
2019-08-31 05:14:46 +02:00
|
|
|
add_words(m_lines.back()->strview(), extra_word_chars);
|
2014-01-16 23:07:42 +01:00
|
|
|
}
|
2016-08-21 21:25:11 +02:00
|
|
|
m_timestamp = buffer.timestamp();
|
2014-01-16 23:07:42 +01:00
|
|
|
}
|
|
|
|
|
2014-01-24 01:56:33 +01:00
|
|
|
void WordDB::update_db()
|
2014-01-16 23:07:42 +01:00
|
|
|
{
|
2014-05-14 22:19:19 +02:00
|
|
|
auto& buffer = *m_buffer;
|
|
|
|
|
2014-05-26 22:00:26 +02:00
|
|
|
auto modifs = compute_line_modifications(buffer, m_timestamp);
|
2014-05-14 22:19:19 +02:00
|
|
|
m_timestamp = buffer.timestamp();
|
|
|
|
|
2014-01-24 01:56:33 +01:00
|
|
|
if (modifs.empty())
|
|
|
|
return;
|
2014-01-16 23:07:42 +01:00
|
|
|
|
2015-01-15 14:58:55 +01:00
|
|
|
Lines new_lines;
|
2014-01-24 01:56:33 +01:00
|
|
|
new_lines.reserve((int)buffer.line_count());
|
|
|
|
|
2019-08-31 05:14:46 +02:00
|
|
|
auto extra_word_chars = get_extra_word_chars(buffer);
|
2014-01-24 01:56:33 +01:00
|
|
|
auto old_line = 0_line;
|
|
|
|
for (auto& modif : modifs)
|
|
|
|
{
|
2015-02-04 00:39:04 +01:00
|
|
|
kak_assert(0_line <= modif.new_line and modif.new_line <= buffer.line_count());
|
|
|
|
kak_assert(modif.new_line < buffer.line_count() or modif.num_added == 0);
|
2014-05-26 22:00:26 +02:00
|
|
|
kak_assert(old_line <= modif.old_line);
|
|
|
|
while (old_line < modif.old_line)
|
2015-01-15 14:58:55 +01:00
|
|
|
new_lines.push_back(std::move(m_lines[(int)old_line++]));
|
2014-01-24 01:56:33 +01:00
|
|
|
|
2014-05-26 22:00:26 +02:00
|
|
|
kak_assert((int)new_lines.size() == (int)modif.new_line);
|
|
|
|
|
2015-02-01 00:50:24 +01:00
|
|
|
while (old_line < modif.old_line + modif.num_removed)
|
2014-05-26 22:00:26 +02:00
|
|
|
{
|
2015-01-15 14:58:55 +01:00
|
|
|
kak_assert(old_line < m_lines.size());
|
2019-08-31 05:14:46 +02:00
|
|
|
remove_words(m_lines[(int)old_line++]->strview(), extra_word_chars);
|
2014-05-26 22:00:26 +02:00
|
|
|
}
|
2014-01-24 01:56:33 +01:00
|
|
|
|
2015-02-01 00:50:24 +01:00
|
|
|
for (auto l = 0_line; l < modif.num_added; ++l)
|
2014-01-24 01:56:33 +01:00
|
|
|
{
|
2015-01-27 14:11:32 +01:00
|
|
|
new_lines.push_back(buffer.line_storage(modif.new_line + l));
|
2019-08-31 05:14:46 +02:00
|
|
|
add_words(new_lines.back()->strview(), extra_word_chars);
|
2014-01-24 01:56:33 +01:00
|
|
|
}
|
|
|
|
}
|
2015-01-15 14:58:55 +01:00
|
|
|
while (old_line != (int)m_lines.size())
|
|
|
|
new_lines.push_back(std::move(m_lines[(int)old_line++]));
|
2014-01-24 01:56:33 +01:00
|
|
|
|
2015-01-15 14:58:55 +01:00
|
|
|
m_lines = std::move(new_lines);
|
2014-01-16 23:07:42 +01:00
|
|
|
}
|
|
|
|
|
2016-08-21 21:25:11 +02:00
|
|
|
void WordDB::on_option_changed(const Option& option)
|
|
|
|
{
|
2017-06-26 16:28:41 +02:00
|
|
|
if (option.name() == "extra_word_chars")
|
2016-08-21 21:25:11 +02:00
|
|
|
rebuild_db();
|
|
|
|
}
|
|
|
|
|
2014-10-01 01:20:12 +02:00
|
|
|
int WordDB::get_word_occurences(StringView word) const
|
2014-04-22 20:31:31 +02:00
|
|
|
{
|
2016-02-05 10:36:07 +01:00
|
|
|
auto it = m_words.find(word);
|
2014-04-22 20:31:31 +02:00
|
|
|
if (it != m_words.end())
|
2017-03-07 01:30:54 +01:00
|
|
|
return it->value.refcount;
|
2014-04-22 20:31:31 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-10-22 20:49:08 +02:00
|
|
|
RankedMatchList WordDB::find_matching(StringView query)
|
2015-10-18 17:55:21 +02:00
|
|
|
{
|
|
|
|
update_db();
|
|
|
|
const UsedLetters letters = used_letters(query);
|
2015-10-22 20:49:08 +02:00
|
|
|
RankedMatchList res;
|
2015-10-18 17:55:21 +02:00
|
|
|
for (auto&& word : m_words)
|
|
|
|
{
|
2017-03-07 01:30:54 +01:00
|
|
|
if (RankedMatch match{word.key, word.value.letters, query, letters})
|
2015-10-27 22:25:18 +01:00
|
|
|
res.push_back(match);
|
2015-10-18 17:55:21 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2015-05-22 14:58:56 +02:00
|
|
|
UnitTest test_word_db{[]()
|
|
|
|
{
|
2015-10-22 20:49:08 +02:00
|
|
|
auto cmp_words = [](const RankedMatch& lhs, const RankedMatch& rhs) {
|
2015-10-27 22:25:18 +01:00
|
|
|
return lhs.candidate() < rhs.candidate();
|
2015-10-21 21:16:36 +02:00
|
|
|
};
|
|
|
|
|
2018-07-26 15:05:00 +02:00
|
|
|
using WordList = Vector<StringView>;
|
2015-10-22 20:49:08 +02:00
|
|
|
auto eq = [](ArrayView<const RankedMatch> lhs, const WordList& rhs) {
|
2015-10-21 21:16:36 +02:00
|
|
|
return lhs.size() == rhs.size() and
|
|
|
|
std::equal(lhs.begin(), lhs.end(), rhs.begin(),
|
2015-10-22 20:49:08 +02:00
|
|
|
[](const RankedMatch& lhs, const StringView& rhs) {
|
2015-10-27 22:25:18 +01:00
|
|
|
return lhs.candidate() == rhs;
|
2015-10-21 21:16:36 +02:00
|
|
|
});
|
|
|
|
};
|
|
|
|
|
2021-05-28 09:03:06 +02:00
|
|
|
auto make_lines = [](auto&&... lines) { return BufferLines{StringData::create({lines})...}; };
|
|
|
|
|
2015-05-22 14:58:56 +02:00
|
|
|
Buffer buffer("test", Buffer::Flags::None,
|
2021-05-28 09:03:06 +02:00
|
|
|
make_lines("tchou mutch\n", "tchou kanaky tchou\n", "\n", "tchaa tchaa\n", "allo\n"));
|
2015-05-22 14:58:56 +02:00
|
|
|
WordDB word_db(buffer);
|
2015-10-21 21:16:36 +02:00
|
|
|
auto res = word_db.find_matching("");
|
|
|
|
std::sort(res.begin(), res.end(), cmp_words);
|
2016-05-17 20:39:55 +02:00
|
|
|
kak_assert(eq(res, WordList{ "allo", "kanaky", "mutch", "tchaa", "tchou" }));
|
2015-05-22 14:58:56 +02:00
|
|
|
kak_assert(word_db.get_word_occurences("tchou") == 3);
|
|
|
|
kak_assert(word_db.get_word_occurences("allo") == 1);
|
2016-03-16 14:59:30 +01:00
|
|
|
buffer.erase({1, 6}, {4, 0});
|
2015-10-21 21:16:36 +02:00
|
|
|
res = word_db.find_matching("");
|
|
|
|
std::sort(res.begin(), res.end(), cmp_words);
|
2016-05-17 20:39:55 +02:00
|
|
|
kak_assert(eq(res, WordList{ "allo", "mutch", "tchou" }));
|
2016-03-16 14:59:30 +01:00
|
|
|
buffer.insert({1, 0}, "re");
|
2015-10-21 21:16:36 +02:00
|
|
|
res = word_db.find_matching("");
|
|
|
|
std::sort(res.begin(), res.end(), cmp_words);
|
2016-05-17 20:39:55 +02:00
|
|
|
kak_assert(eq(res, WordList{ "allo", "mutch", "retchou", "tchou" }));
|
2015-05-22 14:58:56 +02:00
|
|
|
}};
|
|
|
|
|
2014-01-16 23:07:42 +01:00
|
|
|
}
|