From 24043bbffe6b277bd5f9b4a68df751c6d2090967 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Thu, 29 Oct 2015 13:36:30 +0000 Subject: [PATCH] Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does --- src/ranked_match.cc | 122 ++++++++++++++++++++++++++++++-------------- src/ranked_match.hh | 5 +- 2 files changed, 89 insertions(+), 38 deletions(-) diff --git a/src/ranked_match.cc b/src/ranked_match.cc index 03cb2fcd..9acaa8b4 100644 --- a/src/ranked_match.cc +++ b/src/ranked_match.cc @@ -1,65 +1,113 @@ #include "ranked_match.hh" +#include "unit_tests.hh" + namespace Kakoune { -static bool match_rank(StringView candidate, StringView query) +static int count_word_boundaries_match(StringView candidate, StringView query) { - int rank = 0; - auto it = candidate.begin(); + int count = 0; + auto it = query.begin(); char prev = 0; - for (auto c : query) + for (auto c : candidate) { - if (it == candidate.end()) - return 0; - - const bool islow = islower(c); - auto eq_c = [islow, c](char ch) { return islow ? tolower(ch) == c : ch == c; }; - - if (eq_c(*it)) // improve rank on contiguous - ++rank; - - while (!eq_c(*it)) - { - prev = *it; - if (++it == candidate.end()) - return 0; - } - // Improve rank on word boundaries - if (prev == 0 or prev == '_' or - (islower(prev) and isupper(*it))) - rank += 5; - + const bool is_word_boundary = prev == 0 or + (ispunct(prev) and is_word(c)) or + (islower(prev) and isupper(c)); prev = c; - ++rank; + + if (not is_word_boundary) + continue; + + const char lc = tolower(c); + for (; it != query.end(); ++it) + { + const char qc = *it; + if (qc == (islower(qc) ? lc : c)) + { + ++count; + ++it; + break; + } + } + if (it == query.end()) + break; + } + return count; +} + +static bool smartcase_eq(char query, char candidate) +{ + return query == (islower(query) ? tolower(candidate) : candidate); +} + +static bool subsequence_match_smart_case(StringView str, StringView subseq) +{ + auto it = str.begin(); + for (auto& c : subseq) + { + if (it == str.end()) + return false; + while (not smartcase_eq(c, *it)) + { + if (++it == str.end()) + return false; + } ++it; } - return rank; + return true; } RankedMatch::RankedMatch(StringView candidate, StringView query) { - if (candidate.empty() or query.empty()) + if (candidate.empty() or query.length() > candidate.length()) + return; + + if (query.empty()) { m_candidate = candidate; return; } - m_match_rank = match_rank(candidate, query); + if (not subsequence_match_smart_case(candidate, query)) + return; + + m_candidate = candidate; + + m_first_char_match = smartcase_eq(query[0], candidate[0]); + m_word_boundary_match_count = count_word_boundaries_match(candidate, query); + m_only_word_boundary = m_word_boundary_match_count == query.length(); + m_prefix = std::equal(query.begin(), query.end(), candidate.begin(), smartcase_eq); } bool RankedMatch::operator<(const RankedMatch& other) const { - if (m_match_rank == other.m_match_rank) - return std::lexicographical_compare( - m_candidate.begin(), m_candidate.end(), - other.m_candidate.begin(), other.m_candidate.end(), - [](char a, char b) { - const bool low_a = islower(a), low_b = islower(b); - return low_a == low_b ? a < b : low_a; - }); + if (m_only_word_boundary or other.m_only_word_boundary) + return m_only_word_boundary and other.m_only_word_boundary ? + m_word_boundary_match_count > other.m_word_boundary_match_count + : m_only_word_boundary; - return m_match_rank < other.m_match_rank; + if (m_prefix != other.m_prefix) + return m_prefix; + + if (m_word_boundary_match_count != other.m_word_boundary_match_count) + return m_word_boundary_match_count > other.m_word_boundary_match_count; + + if (m_first_char_match != other.m_first_char_match) + return m_first_char_match; + + return std::lexicographical_compare( + m_candidate.begin(), m_candidate.end(), + other.m_candidate.begin(), other.m_candidate.end(), + [](char a, char b) { + const bool low_a = islower(a), low_b = islower(b); + return low_a == low_b ? a < b : low_a; + }); } +UnitTest test_ranked_match{[] { + kak_assert(count_word_boundaries_match("run_all_tests", "rat") == 3); +}}; + } diff --git a/src/ranked_match.hh b/src/ranked_match.hh index d6367453..2a805002 100644 --- a/src/ranked_match.hh +++ b/src/ranked_match.hh @@ -19,7 +19,10 @@ struct RankedMatch private: StringView m_candidate; - int m_match_rank = 0; + bool m_first_char_match = false; + bool m_prefix = false; + int m_word_boundary_match_count = 0; + bool m_only_word_boundary = false; }; using RankedMatchList = Vector;