Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does
This commit is contained in:
parent
89d22f3335
commit
24043bbffe
|
@ -1,65 +1,113 @@
|
||||||
#include "ranked_match.hh"
|
#include "ranked_match.hh"
|
||||||
|
|
||||||
|
#include "unit_tests.hh"
|
||||||
|
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
static bool match_rank(StringView candidate, StringView query)
|
static int count_word_boundaries_match(StringView candidate, StringView query)
|
||||||
{
|
{
|
||||||
int rank = 0;
|
int count = 0;
|
||||||
auto it = candidate.begin();
|
auto it = query.begin();
|
||||||
char prev = 0;
|
char prev = 0;
|
||||||
for (auto c : query)
|
for (auto c : candidate)
|
||||||
{
|
{
|
||||||
if (it == candidate.end())
|
const bool is_word_boundary = prev == 0 or
|
||||||
return 0;
|
(ispunct(prev) and is_word(c)) or
|
||||||
|
(islower(prev) and isupper(c));
|
||||||
const bool islow = islower(c);
|
|
||||||
auto eq_c = [islow, c](char ch) { return islow ? tolower(ch) == c : ch == c; };
|
|
||||||
|
|
||||||
if (eq_c(*it)) // improve rank on contiguous
|
|
||||||
++rank;
|
|
||||||
|
|
||||||
while (!eq_c(*it))
|
|
||||||
{
|
|
||||||
prev = *it;
|
|
||||||
if (++it == candidate.end())
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
// Improve rank on word boundaries
|
|
||||||
if (prev == 0 or prev == '_' or
|
|
||||||
(islower(prev) and isupper(*it)))
|
|
||||||
rank += 5;
|
|
||||||
|
|
||||||
prev = c;
|
prev = c;
|
||||||
++rank;
|
|
||||||
|
if (not is_word_boundary)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const char lc = tolower(c);
|
||||||
|
for (; it != query.end(); ++it)
|
||||||
|
{
|
||||||
|
const char qc = *it;
|
||||||
|
if (qc == (islower(qc) ? lc : c))
|
||||||
|
{
|
||||||
|
++count;
|
||||||
|
++it;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (it == query.end())
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool smartcase_eq(char query, char candidate)
|
||||||
|
{
|
||||||
|
return query == (islower(query) ? tolower(candidate) : candidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool subsequence_match_smart_case(StringView str, StringView subseq)
|
||||||
|
{
|
||||||
|
auto it = str.begin();
|
||||||
|
for (auto& c : subseq)
|
||||||
|
{
|
||||||
|
if (it == str.end())
|
||||||
|
return false;
|
||||||
|
while (not smartcase_eq(c, *it))
|
||||||
|
{
|
||||||
|
if (++it == str.end())
|
||||||
|
return false;
|
||||||
|
}
|
||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
return rank;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
RankedMatch::RankedMatch(StringView candidate, StringView query)
|
RankedMatch::RankedMatch(StringView candidate, StringView query)
|
||||||
{
|
{
|
||||||
if (candidate.empty() or query.empty())
|
if (candidate.empty() or query.length() > candidate.length())
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (query.empty())
|
||||||
{
|
{
|
||||||
m_candidate = candidate;
|
m_candidate = candidate;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_match_rank = match_rank(candidate, query);
|
if (not subsequence_match_smart_case(candidate, query))
|
||||||
|
return;
|
||||||
|
|
||||||
|
m_candidate = candidate;
|
||||||
|
|
||||||
|
m_first_char_match = smartcase_eq(query[0], candidate[0]);
|
||||||
|
m_word_boundary_match_count = count_word_boundaries_match(candidate, query);
|
||||||
|
m_only_word_boundary = m_word_boundary_match_count == query.length();
|
||||||
|
m_prefix = std::equal(query.begin(), query.end(), candidate.begin(), smartcase_eq);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RankedMatch::operator<(const RankedMatch& other) const
|
bool RankedMatch::operator<(const RankedMatch& other) const
|
||||||
{
|
{
|
||||||
if (m_match_rank == other.m_match_rank)
|
if (m_only_word_boundary or other.m_only_word_boundary)
|
||||||
return std::lexicographical_compare(
|
return m_only_word_boundary and other.m_only_word_boundary ?
|
||||||
m_candidate.begin(), m_candidate.end(),
|
m_word_boundary_match_count > other.m_word_boundary_match_count
|
||||||
other.m_candidate.begin(), other.m_candidate.end(),
|
: m_only_word_boundary;
|
||||||
[](char a, char b) {
|
|
||||||
const bool low_a = islower(a), low_b = islower(b);
|
|
||||||
return low_a == low_b ? a < b : low_a;
|
|
||||||
});
|
|
||||||
|
|
||||||
return m_match_rank < other.m_match_rank;
|
if (m_prefix != other.m_prefix)
|
||||||
|
return m_prefix;
|
||||||
|
|
||||||
|
if (m_word_boundary_match_count != other.m_word_boundary_match_count)
|
||||||
|
return m_word_boundary_match_count > other.m_word_boundary_match_count;
|
||||||
|
|
||||||
|
if (m_first_char_match != other.m_first_char_match)
|
||||||
|
return m_first_char_match;
|
||||||
|
|
||||||
|
return std::lexicographical_compare(
|
||||||
|
m_candidate.begin(), m_candidate.end(),
|
||||||
|
other.m_candidate.begin(), other.m_candidate.end(),
|
||||||
|
[](char a, char b) {
|
||||||
|
const bool low_a = islower(a), low_b = islower(b);
|
||||||
|
return low_a == low_b ? a < b : low_a;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UnitTest test_ranked_match{[] {
|
||||||
|
kak_assert(count_word_boundaries_match("run_all_tests", "rat") == 3);
|
||||||
|
}};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,10 @@ struct RankedMatch
|
||||||
|
|
||||||
private:
|
private:
|
||||||
StringView m_candidate;
|
StringView m_candidate;
|
||||||
int m_match_rank = 0;
|
bool m_first_char_match = false;
|
||||||
|
bool m_prefix = false;
|
||||||
|
int m_word_boundary_match_count = 0;
|
||||||
|
bool m_only_word_boundary = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
using RankedMatchList = Vector<RankedMatch>;
|
using RankedMatchList = Vector<RankedMatch>;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user