Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does

This commit is contained in:
Maxime Coste 2015-10-29 13:36:30 +00:00
parent 89d22f3335
commit 24043bbffe
2 changed files with 89 additions and 38 deletions

View File

@ -1,65 +1,113 @@
#include "ranked_match.hh" #include "ranked_match.hh"
#include "unit_tests.hh"
namespace Kakoune namespace Kakoune
{ {
static bool match_rank(StringView candidate, StringView query) static int count_word_boundaries_match(StringView candidate, StringView query)
{ {
int rank = 0; int count = 0;
auto it = candidate.begin(); auto it = query.begin();
char prev = 0; char prev = 0;
for (auto c : query) for (auto c : candidate)
{ {
if (it == candidate.end()) const bool is_word_boundary = prev == 0 or
return 0; (ispunct(prev) and is_word(c)) or
(islower(prev) and isupper(c));
const bool islow = islower(c);
auto eq_c = [islow, c](char ch) { return islow ? tolower(ch) == c : ch == c; };
if (eq_c(*it)) // improve rank on contiguous
++rank;
while (!eq_c(*it))
{
prev = *it;
if (++it == candidate.end())
return 0;
}
// Improve rank on word boundaries
if (prev == 0 or prev == '_' or
(islower(prev) and isupper(*it)))
rank += 5;
prev = c; prev = c;
++rank;
if (not is_word_boundary)
continue;
const char lc = tolower(c);
for (; it != query.end(); ++it)
{
const char qc = *it;
if (qc == (islower(qc) ? lc : c))
{
++count;
++it;
break;
}
}
if (it == query.end())
break;
}
return count;
}
static bool smartcase_eq(char query, char candidate)
{
return query == (islower(query) ? tolower(candidate) : candidate);
}
static bool subsequence_match_smart_case(StringView str, StringView subseq)
{
auto it = str.begin();
for (auto& c : subseq)
{
if (it == str.end())
return false;
while (not smartcase_eq(c, *it))
{
if (++it == str.end())
return false;
}
++it; ++it;
} }
return rank; return true;
} }
RankedMatch::RankedMatch(StringView candidate, StringView query) RankedMatch::RankedMatch(StringView candidate, StringView query)
{ {
if (candidate.empty() or query.empty()) if (candidate.empty() or query.length() > candidate.length())
return;
if (query.empty())
{ {
m_candidate = candidate; m_candidate = candidate;
return; return;
} }
m_match_rank = match_rank(candidate, query); if (not subsequence_match_smart_case(candidate, query))
return;
m_candidate = candidate;
m_first_char_match = smartcase_eq(query[0], candidate[0]);
m_word_boundary_match_count = count_word_boundaries_match(candidate, query);
m_only_word_boundary = m_word_boundary_match_count == query.length();
m_prefix = std::equal(query.begin(), query.end(), candidate.begin(), smartcase_eq);
} }
bool RankedMatch::operator<(const RankedMatch& other) const bool RankedMatch::operator<(const RankedMatch& other) const
{ {
if (m_match_rank == other.m_match_rank) if (m_only_word_boundary or other.m_only_word_boundary)
return std::lexicographical_compare( return m_only_word_boundary and other.m_only_word_boundary ?
m_candidate.begin(), m_candidate.end(), m_word_boundary_match_count > other.m_word_boundary_match_count
other.m_candidate.begin(), other.m_candidate.end(), : m_only_word_boundary;
[](char a, char b) {
const bool low_a = islower(a), low_b = islower(b);
return low_a == low_b ? a < b : low_a;
});
return m_match_rank < other.m_match_rank; if (m_prefix != other.m_prefix)
return m_prefix;
if (m_word_boundary_match_count != other.m_word_boundary_match_count)
return m_word_boundary_match_count > other.m_word_boundary_match_count;
if (m_first_char_match != other.m_first_char_match)
return m_first_char_match;
return std::lexicographical_compare(
m_candidate.begin(), m_candidate.end(),
other.m_candidate.begin(), other.m_candidate.end(),
[](char a, char b) {
const bool low_a = islower(a), low_b = islower(b);
return low_a == low_b ? a < b : low_a;
});
} }
UnitTest test_ranked_match{[] {
kak_assert(count_word_boundaries_match("run_all_tests", "rat") == 3);
}};
} }

View File

@ -19,7 +19,10 @@ struct RankedMatch
private: private:
StringView m_candidate; StringView m_candidate;
int m_match_rank = 0; bool m_first_char_match = false;
bool m_prefix = false;
int m_word_boundary_match_count = 0;
bool m_only_word_boundary = false;
}; };
using RankedMatchList = Vector<RankedMatch>; using RankedMatchList = Vector<RankedMatch>;