Experiment with ranked word completion depending on word boundaries

This commit is contained in:
Maxime Coste 2015-10-18 16:55:21 +01:00
parent 944d8f53fb
commit 4f2584a091
3 changed files with 99 additions and 21 deletions

View File

@ -72,7 +72,7 @@ WordDB& get_word_db(const Buffer& buffer)
return cache_val.as<WordDB>();
}
template<bool other_buffers, bool subseq>
template<bool other_buffers>
InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos)
{
auto pos = buffer.iterator_at(cursor_pos);
@ -93,23 +93,23 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos)
String current_word{begin, end};
struct MatchAndBuffer {
MatchAndBuffer(StringView m, const Buffer* b = nullptr) : match(m), buffer(b) {}
struct RankedWordAndBuffer : WordDB::RankedWord
{
RankedWordAndBuffer(StringView w, int r = 0, const Buffer* b = nullptr)
: WordDB::RankedWord{w, r}, buffer{b} {}
bool operator==(const MatchAndBuffer& other) const { return match == other.match; }
bool operator<(const MatchAndBuffer& other) const { return match < other.match; }
bool operator==(const RankedWordAndBuffer& other) const { return word == other.word; }
bool operator<(const RankedWordAndBuffer& other) const { return rank > other.rank; }
StringView match;
const Buffer* buffer;
};
Vector<MatchAndBuffer> matches;
Vector<RankedWordAndBuffer> matches;
auto add_matches = [&](const Buffer& buf) {
auto& word_db = get_word_db(buf);
auto bufmatches = word_db.find_matching(
prefix, subseq ? subsequence_match : prefix_match);
auto bufmatches = word_db.find_matching(prefix);
for (auto& m : bufmatches)
matches.push_back({ m, &buf });
matches.push_back({ m.word, m.rank, &buf });
};
add_matches(buffer);
@ -131,8 +131,8 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos)
matches.erase(std::unique(matches.begin(), matches.end()), matches.end());
const auto longest = std::accumulate(matches.begin(), matches.end(), 0_char,
[](const CharCount& lhs, const MatchAndBuffer& rhs)
{ return std::max(lhs, rhs.match.char_length()); });
[](const CharCount& lhs, const RankedWordAndBuffer& rhs)
{ return std::max(lhs, rhs.word.char_length()); });
InsertCompletion::CandidateList candidates;
candidates.reserve(matches.size());
@ -141,15 +141,17 @@ InsertCompletion complete_word(const Buffer& buffer, ByteCoord cursor_pos)
DisplayLine menu_entry;
if (m.buffer)
{
const auto pad_len = longest + 1 - m.match.char_length();
menu_entry.push_back(m.match.str());
const auto pad_len = longest + 1 - m.word.char_length();
menu_entry.push_back(m.word.str());
menu_entry.push_back(String{' ', pad_len});
menu_entry.push_back({ m.buffer->display_name(), get_face("MenuInfo") });
}
else
menu_entry.push_back(m.match.str());
menu_entry.push_back(m.word.str());
candidates.push_back({m.match.str(), "", std::move(menu_entry)});
menu_entry.push_back({ " " + to_string(m.rank), get_face("cyan") });
candidates.push_back({m.word.str(), "", std::move(menu_entry)});
}
return { begin.coord(), cursor_pos, std::move(candidates), buffer.timestamp() };
@ -419,13 +421,11 @@ bool InsertCompleter::setup_ifn()
return true;
if (completer.mode == InsertCompleterDesc::Word and
*completer.param == "buffer" and
(try_complete(complete_word<false, false>) or
try_complete(complete_word<false, true>)))
try_complete(complete_word<false>))
return true;
if (completer.mode == InsertCompleterDesc::Word and
*completer.param == "all" and
(try_complete(complete_word<true, false>) or
try_complete(complete_word<true, true>)))
try_complete(complete_word<true>))
return true;
}
return false;
@ -504,7 +504,7 @@ void InsertCompleter::explicit_file_complete()
void InsertCompleter::explicit_word_complete()
{
try_complete(complete_word<true, true>);
try_complete(complete_word<true>);
}
void InsertCompleter::explicit_line_complete()

View File

@ -27,6 +27,13 @@ UsedLetters used_letters(StringView str)
return res;
}
constexpr UsedLetters upper_mask = 0xFFFFFFC000000;
UsedLetters to_lower(UsedLetters letters)
{
return ((letters & upper_mask) >> 26) | (letters & (~upper_mask));
}
static WordDB::WordList get_words(const SharedString& content)
{
WordDB::WordList res;
@ -136,6 +143,63 @@ int WordDB::get_word_occurences(StringView word) const
return 0;
}
WordDB::RankedWordList WordDB::find_matching(StringView query)
{
auto match_rank = [](StringView candidate, StringView query)
{
int rank = 0;
auto it = candidate.begin();
char prev = 0;
for (auto c : query)
{
if (it == candidate.end())
return 0;
const bool islow = islower(c);
auto eq_c = [islow, c](char ch) { return islow ? tolower(ch) == c : ch == c; };
if (eq_c(*it)) // improve rank on contiguous
++rank;
while (!eq_c(*it))
{
prev = *it;
if (++it == candidate.end())
return 0;
}
// Improve rank on word boundaries
if (prev == 0 or prev == '_' or
(islower(prev) and isupper(*it)))
rank += 5;
prev = c;
++rank;
++it;
}
return rank;
};
auto matches = [](UsedLetters query, UsedLetters letters)
{
return (query & letters) == query;
};
update_db();
const UsedLetters letters = used_letters(query);
RankedWordList res;
for (auto&& word : m_words)
{
UsedLetters word_letters = word.second.letters;
if (not matches(to_lower(letters), to_lower(word_letters)) or
not matches(letters & upper_mask, word_letters & upper_mask))
continue;
if (int rank = match_rank(word.first, query))
res.push_back({ word.first, rank });
}
return res;
}
UnitTest test_word_db{[]()
{
Buffer buffer("test", Buffer::Flags::None,
@ -160,4 +224,9 @@ UnitTest test_word_db{[]()
kak_assert(res == WordDB::WordList{ "allo" COMMA "mutch" COMMA "retchou" COMMA "tchou" });
}};
UnitTest test_used_letters{[]()
{
kak_assert(used_letters("abcd") == to_lower(used_letters("abcdABCD")));
}};
}

View File

@ -38,6 +38,15 @@ public:
return res;
}
struct RankedWord
{
StringView word;
int rank;
};
using RankedWordList = Vector<RankedWord>;
RankedWordList find_matching(StringView str);
int get_word_occurences(StringView word) const;
private:
void update_db();