2015-10-22 20:49:08 +02:00
|
|
|
#include "ranked_match.hh"
|
|
|
|
|
2016-11-29 00:53:50 +01:00
|
|
|
#include "flags.hh"
|
2015-10-29 14:36:30 +01:00
|
|
|
#include "unit_tests.hh"
|
2016-11-29 00:53:50 +01:00
|
|
|
#include "utf8_iterator.hh"
|
2017-10-09 16:12:42 +02:00
|
|
|
#include "optional.hh"
|
2015-10-29 14:36:30 +01:00
|
|
|
|
2016-09-30 23:13:01 +02:00
|
|
|
#include <algorithm>
|
|
|
|
|
2015-10-22 20:49:08 +02:00
|
|
|
namespace Kakoune
|
|
|
|
{
|
|
|
|
|
2016-03-25 21:35:57 +01:00
|
|
|
UsedLetters used_letters(StringView str)
|
|
|
|
{
|
|
|
|
UsedLetters res = 0;
|
|
|
|
for (auto c : str)
|
|
|
|
{
|
|
|
|
if (c >= 'a' and c <= 'z')
|
2016-11-22 23:20:30 +01:00
|
|
|
res |= 1uLL << (c - 'a');
|
2016-03-25 21:35:57 +01:00
|
|
|
else if (c >= 'A' and c <= 'Z')
|
2016-11-22 23:20:30 +01:00
|
|
|
res |= 1uLL << (c - 'A' + 26);
|
2016-03-25 21:35:57 +01:00
|
|
|
else if (c == '_')
|
2016-11-22 23:20:30 +01:00
|
|
|
res |= 1uLL << 53;
|
2016-03-25 21:35:57 +01:00
|
|
|
else if (c == '-')
|
2016-11-22 23:20:30 +01:00
|
|
|
res |= 1uLL << 54;
|
2016-03-25 21:35:57 +01:00
|
|
|
else
|
2016-11-22 23:20:30 +01:00
|
|
|
res |= 1uLL << 63;
|
2016-03-25 21:35:57 +01:00
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool matches(UsedLetters query, UsedLetters letters)
|
|
|
|
{
|
|
|
|
return (query & letters) == query;
|
|
|
|
}
|
|
|
|
|
2015-10-30 14:57:46 +01:00
|
|
|
using Utf8It = utf8::iterator<const char*>;
|
|
|
|
|
2015-10-29 14:36:30 +01:00
|
|
|
static int count_word_boundaries_match(StringView candidate, StringView query)
|
2015-10-22 20:49:08 +02:00
|
|
|
{
|
2015-10-29 14:36:30 +01:00
|
|
|
int count = 0;
|
2016-02-18 00:05:08 +01:00
|
|
|
Utf8It query_it{query.begin(), query};
|
2015-10-30 14:57:46 +01:00
|
|
|
Codepoint prev = 0;
|
|
|
|
for (Utf8It it{candidate.begin(), candidate}; it != candidate.end(); ++it)
|
2015-10-22 20:49:08 +02:00
|
|
|
{
|
2015-10-30 14:57:46 +01:00
|
|
|
const Codepoint c = *it;
|
2015-10-29 14:36:30 +01:00
|
|
|
const bool is_word_boundary = prev == 0 or
|
2016-05-19 22:45:23 +02:00
|
|
|
(!iswalnum((wchar_t)prev) and iswalnum((wchar_t)c)) or
|
|
|
|
(iswlower((wchar_t)prev) and iswupper((wchar_t)c));
|
2015-10-29 14:36:30 +01:00
|
|
|
prev = c;
|
2015-10-22 20:49:08 +02:00
|
|
|
|
2015-10-29 14:36:30 +01:00
|
|
|
if (not is_word_boundary)
|
|
|
|
continue;
|
2015-10-22 20:49:08 +02:00
|
|
|
|
2015-11-11 01:21:20 +01:00
|
|
|
const Codepoint lc = to_lower(c);
|
2016-02-18 00:05:08 +01:00
|
|
|
for (auto qit = query_it; qit != query.end(); ++qit)
|
2015-10-22 20:49:08 +02:00
|
|
|
{
|
2015-10-30 14:57:46 +01:00
|
|
|
const Codepoint qc = *qit;
|
2017-01-30 00:37:10 +01:00
|
|
|
if (qc == (iswlower((wchar_t)qc) ? lc : c))
|
2015-10-29 14:36:30 +01:00
|
|
|
{
|
|
|
|
++count;
|
2016-02-18 00:05:08 +01:00
|
|
|
query_it = qit+1;
|
2015-10-29 14:36:30 +01:00
|
|
|
break;
|
|
|
|
}
|
2015-10-22 20:49:08 +02:00
|
|
|
}
|
2016-02-18 00:05:08 +01:00
|
|
|
if (query_it == query.end())
|
2015-10-29 14:36:30 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
2015-10-22 20:49:08 +02:00
|
|
|
|
2017-07-19 20:16:28 +02:00
|
|
|
static bool smartcase_eq(Codepoint candidate, Codepoint query)
|
2015-10-29 14:36:30 +01:00
|
|
|
{
|
2017-01-30 00:37:10 +01:00
|
|
|
return query == (iswlower((wchar_t)query) ? to_lower(candidate) : candidate);
|
2015-10-29 14:36:30 +01:00
|
|
|
}
|
|
|
|
|
2016-09-26 22:54:39 +02:00
|
|
|
struct SubseqRes
|
2015-10-29 14:36:30 +01:00
|
|
|
{
|
2016-09-26 22:54:39 +02:00
|
|
|
int max_index;
|
|
|
|
bool single_word;
|
|
|
|
};
|
|
|
|
|
2017-01-30 12:22:14 +01:00
|
|
|
static Optional<SubseqRes> subsequence_match_smart_case(StringView str, StringView subseq)
|
2016-09-26 22:54:39 +02:00
|
|
|
{
|
|
|
|
bool single_word = true;
|
|
|
|
int max_index = -1;
|
2016-03-25 00:45:56 +01:00
|
|
|
auto it = str.begin();
|
2016-02-29 00:05:51 +01:00
|
|
|
int index = 0;
|
2016-03-28 16:18:15 +02:00
|
|
|
for (auto subseq_it = subseq.begin(); subseq_it != subseq.end();)
|
2015-10-29 14:36:30 +01:00
|
|
|
{
|
|
|
|
if (it == str.end())
|
2017-01-30 12:22:14 +01:00
|
|
|
return {};
|
2016-03-28 16:18:15 +02:00
|
|
|
const Codepoint c = utf8::read_codepoint(subseq_it, subseq.end());
|
2016-09-26 22:54:39 +02:00
|
|
|
while (true)
|
2015-10-29 14:36:30 +01:00
|
|
|
{
|
2016-09-26 22:54:39 +02:00
|
|
|
auto str_c = utf8::read_codepoint(it, str.end());
|
2017-07-19 20:16:28 +02:00
|
|
|
if (smartcase_eq(str_c, c))
|
2016-09-26 22:54:39 +02:00
|
|
|
break;
|
2016-09-30 23:13:01 +02:00
|
|
|
|
|
|
|
if (max_index != -1 and single_word and not is_word(str_c))
|
2016-09-26 22:54:39 +02:00
|
|
|
single_word = false;
|
2016-09-30 23:13:01 +02:00
|
|
|
|
2016-02-29 00:05:51 +01:00
|
|
|
++index;
|
2016-03-25 00:45:56 +01:00
|
|
|
if (it == str.end())
|
2017-01-30 12:22:14 +01:00
|
|
|
return {};
|
2015-10-29 14:36:30 +01:00
|
|
|
}
|
2016-08-30 01:30:15 +02:00
|
|
|
max_index = index++;
|
2015-10-22 20:49:08 +02:00
|
|
|
}
|
2017-01-30 12:22:14 +01:00
|
|
|
return SubseqRes{max_index, single_word};
|
2015-10-22 20:49:08 +02:00
|
|
|
}
|
|
|
|
|
2016-03-25 21:35:57 +01:00
|
|
|
template<typename TestFunc>
|
|
|
|
RankedMatch::RankedMatch(StringView candidate, StringView query, TestFunc func)
|
2015-10-27 22:25:18 +01:00
|
|
|
{
|
2015-10-29 14:36:30 +01:00
|
|
|
if (candidate.empty() or query.length() > candidate.length())
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (query.empty())
|
2015-10-27 22:25:18 +01:00
|
|
|
{
|
|
|
|
m_candidate = candidate;
|
2016-09-26 22:54:39 +02:00
|
|
|
return;
|
|
|
|
}
|
2016-03-25 21:35:57 +01:00
|
|
|
|
2016-09-26 22:54:39 +02:00
|
|
|
if (not func())
|
|
|
|
return;
|
|
|
|
|
|
|
|
auto res = subsequence_match_smart_case(candidate, query);
|
2017-01-30 12:22:14 +01:00
|
|
|
if (not res)
|
2016-09-26 22:54:39 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
m_candidate = candidate;
|
2017-01-30 12:22:14 +01:00
|
|
|
m_max_index = res->max_index;
|
2016-09-26 22:54:39 +02:00
|
|
|
|
2017-01-30 12:22:14 +01:00
|
|
|
if (res->single_word)
|
2016-09-26 22:54:39 +02:00
|
|
|
m_flags |= Flags::SingleWord;
|
2017-07-19 20:16:28 +02:00
|
|
|
if (smartcase_eq(candidate[0], query[0]))
|
2016-09-26 22:54:39 +02:00
|
|
|
m_flags |= Flags::FirstCharMatch;
|
2016-09-30 23:13:01 +02:00
|
|
|
|
|
|
|
auto it = std::search(candidate.begin(), candidate.end(),
|
2017-07-19 20:16:28 +02:00
|
|
|
query.begin(), query.end(), smartcase_eq);
|
2016-09-30 23:13:01 +02:00
|
|
|
if (it != candidate.end())
|
2016-09-26 22:54:39 +02:00
|
|
|
{
|
2016-09-30 23:13:01 +02:00
|
|
|
m_flags |= Flags::Contiguous;
|
|
|
|
if (it == candidate.begin())
|
|
|
|
{
|
|
|
|
m_flags |= Flags::Prefix;
|
|
|
|
if (query.length() == candidate.length())
|
2017-07-19 20:18:47 +02:00
|
|
|
{
|
|
|
|
m_flags |= Flags::SmartFullMatch;
|
|
|
|
if (candidate == query)
|
|
|
|
m_flags |= Flags::FullMatch;
|
|
|
|
}
|
2016-09-30 23:13:01 +02:00
|
|
|
}
|
2015-10-27 22:25:18 +01:00
|
|
|
}
|
2016-09-30 23:13:01 +02:00
|
|
|
|
2016-09-26 22:54:39 +02:00
|
|
|
m_word_boundary_match_count = count_word_boundaries_match(candidate, query);
|
|
|
|
if (m_word_boundary_match_count == query.length())
|
|
|
|
m_flags |= Flags::OnlyWordBoundary;
|
2016-03-25 21:35:57 +01:00
|
|
|
}
|
2015-10-27 22:25:18 +01:00
|
|
|
|
2016-03-25 21:35:57 +01:00
|
|
|
RankedMatch::RankedMatch(StringView candidate, UsedLetters candidate_letters,
|
|
|
|
StringView query, UsedLetters query_letters)
|
|
|
|
: RankedMatch{candidate, query, [&] {
|
|
|
|
return matches(to_lower(query_letters), to_lower(candidate_letters)) and
|
|
|
|
matches(query_letters & upper_mask, candidate_letters & upper_mask);
|
|
|
|
}} {}
|
2015-10-29 14:36:30 +01:00
|
|
|
|
|
|
|
|
2016-03-25 21:35:57 +01:00
|
|
|
RankedMatch::RankedMatch(StringView candidate, StringView query)
|
|
|
|
: RankedMatch{candidate, query, [] { return true; }}
|
|
|
|
{
|
2015-10-27 22:25:18 +01:00
|
|
|
}
|
|
|
|
|
2019-08-23 16:48:58 +02:00
|
|
|
static bool is_word_boundary(Codepoint prev, Codepoint c)
|
|
|
|
{
|
|
|
|
return (iswalnum((wchar_t)prev)) != iswalnum((wchar_t)c) or
|
|
|
|
(iswlower((wchar_t)prev) != islower((wchar_t)c));
|
|
|
|
}
|
|
|
|
|
2015-10-27 22:25:18 +01:00
|
|
|
bool RankedMatch::operator<(const RankedMatch& other) const
|
|
|
|
{
|
2016-03-24 23:04:56 +01:00
|
|
|
kak_assert((bool)*this and (bool)other);
|
|
|
|
|
2016-08-30 00:56:22 +02:00
|
|
|
const auto diff = m_flags ^ other.m_flags;
|
|
|
|
// flags are different, use their ordering to return the first match
|
|
|
|
if (diff != Flags::None)
|
|
|
|
return (int)(m_flags & diff) > (int)(other.m_flags & diff);
|
2015-10-29 14:36:30 +01:00
|
|
|
|
2018-03-15 13:20:03 +01:00
|
|
|
// If we are SingleWord, FirstCharMatch will do the job, and we dont want to take
|
|
|
|
// other words boundaries into account.
|
|
|
|
if (not (m_flags & (Flags::Prefix | Flags::SingleWord)) and
|
2017-01-31 23:30:23 +01:00
|
|
|
m_word_boundary_match_count != other.m_word_boundary_match_count)
|
2015-10-29 14:36:30 +01:00
|
|
|
return m_word_boundary_match_count > other.m_word_boundary_match_count;
|
|
|
|
|
2016-08-30 01:30:15 +02:00
|
|
|
if (m_max_index != other.m_max_index)
|
|
|
|
return m_max_index < other.m_max_index;
|
2015-10-29 14:36:30 +01:00
|
|
|
|
2017-06-04 09:27:53 +02:00
|
|
|
// Reorder codepoints to improve matching behaviour
|
|
|
|
auto order = [](Codepoint cp) { return cp == '/' ? 0 : cp; };
|
|
|
|
|
2016-08-30 01:30:52 +02:00
|
|
|
auto it1 = m_candidate.begin(), it2 = other.m_candidate.begin();
|
2019-08-23 16:48:58 +02:00
|
|
|
const auto begin1 = it1, begin2 = it2;
|
2016-08-30 01:30:52 +02:00
|
|
|
const auto end1 = m_candidate.end(), end2 = other.m_candidate.end();
|
2017-01-30 00:50:33 +01:00
|
|
|
auto last1 = it1, last2 = it2;
|
2016-08-30 01:30:52 +02:00
|
|
|
while (true)
|
2016-03-28 15:44:49 +02:00
|
|
|
{
|
2016-08-30 01:30:52 +02:00
|
|
|
// find next mismatch
|
|
|
|
while (it1 != end1 and it2 != end2 and *it1 == *it2)
|
|
|
|
++it1, ++it2;
|
|
|
|
|
|
|
|
if (it1 == end1 or it2 == end2)
|
|
|
|
return it1 == end1 and it2 != end2;
|
|
|
|
|
|
|
|
// compare codepoints
|
2017-01-30 00:50:33 +01:00
|
|
|
it1 = utf8::character_start(it1, last1);
|
|
|
|
it2 = utf8::character_start(it2, last2);
|
2019-08-23 16:48:58 +02:00
|
|
|
const auto itsave1 = it1, itsave2 = it2;
|
2016-08-30 01:30:52 +02:00
|
|
|
const auto cp1 = utf8::read_codepoint(it1, end1);
|
2017-01-30 00:37:10 +01:00
|
|
|
const auto cp2 = utf8::read_codepoint(it2, end2);
|
2016-03-28 15:44:49 +02:00
|
|
|
if (cp1 != cp2)
|
|
|
|
{
|
2019-08-23 16:48:58 +02:00
|
|
|
const auto cplast1 = utf8::prev_codepoint(itsave1, begin1).value_or(Codepoint{0});
|
|
|
|
const auto cplast2 = utf8::prev_codepoint(itsave2, begin2).value_or(Codepoint{0});
|
|
|
|
const bool is_wb1 = is_word_boundary(cplast1, cp1);
|
|
|
|
const bool is_wb2 = is_word_boundary(cplast2, cp2);
|
|
|
|
if (is_wb1 != is_wb2)
|
|
|
|
return is_wb1;
|
2017-12-06 06:58:34 +01:00
|
|
|
|
2017-01-30 00:37:10 +01:00
|
|
|
const bool low1 = iswlower((wchar_t)cp1);
|
|
|
|
const bool low2 = iswlower((wchar_t)cp2);
|
2017-12-06 06:58:34 +01:00
|
|
|
if (low1 != low2)
|
|
|
|
return low1;
|
|
|
|
|
|
|
|
return order(cp1) < order(cp2);
|
2016-03-28 15:44:49 +02:00
|
|
|
}
|
2017-01-30 00:50:33 +01:00
|
|
|
last1 = it1; last2 = it2;
|
2016-03-28 15:44:49 +02:00
|
|
|
}
|
2015-10-27 22:25:18 +01:00
|
|
|
}
|
|
|
|
|
2015-10-29 14:36:30 +01:00
|
|
|
UnitTest test_ranked_match{[] {
|
|
|
|
kak_assert(count_word_boundaries_match("run_all_tests", "rat") == 3);
|
2016-02-18 00:05:08 +01:00
|
|
|
kak_assert(count_word_boundaries_match("run_all_tests", "at") == 2);
|
|
|
|
kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "wm") == 2);
|
|
|
|
kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "cobm") == 3);
|
|
|
|
kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "cWBM") == 4);
|
2016-05-17 20:40:36 +02:00
|
|
|
kak_assert(RankedMatch{"source", "so"} < RankedMatch{"source_data", "so"});
|
|
|
|
kak_assert(not (RankedMatch{"source_data", "so"} < RankedMatch{"source", "so"}));
|
2016-05-20 19:42:01 +02:00
|
|
|
kak_assert(not (RankedMatch{"source", "so"} < RankedMatch{"source", "so"}));
|
2016-09-26 22:54:39 +02:00
|
|
|
kak_assert(RankedMatch{"single/word", "wo"} < RankedMatch{"multiw/ord", "wo"});
|
2016-09-30 23:13:01 +02:00
|
|
|
kak_assert(RankedMatch{"foo/bar/foobar", "foobar"} < RankedMatch{"foo/bar/baz", "foobar"});
|
2016-11-14 20:14:09 +01:00
|
|
|
kak_assert(RankedMatch{"delete-buffer", "db"} < RankedMatch{"debug", "db"});
|
|
|
|
kak_assert(RankedMatch{"create_task", "ct"} < RankedMatch{"constructor", "ct"});
|
2017-01-31 23:30:23 +01:00
|
|
|
kak_assert(RankedMatch{"class", "cla"} < RankedMatch{"class::attr", "cla"});
|
2017-06-04 09:27:53 +02:00
|
|
|
kak_assert(RankedMatch{"meta/", "meta"} < RankedMatch{"meta-a/", "meta"});
|
2017-12-06 06:58:34 +01:00
|
|
|
kak_assert(RankedMatch{"find(1p)", "find"} < RankedMatch{"findfs(8)", "find"});
|
2019-08-23 16:48:58 +02:00
|
|
|
kak_assert(RankedMatch{"find(1p)", "fin"} < RankedMatch{"findfs(8)", "fin"});
|
|
|
|
kak_assert(RankedMatch{"sys_find(1p)", "sys_find"} < RankedMatch{"sys_findfs(8)", "sys_find"});
|
|
|
|
kak_assert(RankedMatch{"init", ""} < RankedMatch{"__init__", ""});
|
|
|
|
kak_assert(RankedMatch{"init", "ini"} < RankedMatch{"__init__", "ini"});
|
|
|
|
kak_assert(RankedMatch{"a", ""} < RankedMatch{"b", ""});
|
2018-03-15 13:20:03 +01:00
|
|
|
kak_assert(RankedMatch{"expresions", "expresins"} < RankedMatch{"expressionism's", "expresins"});
|
2015-10-29 14:36:30 +01:00
|
|
|
}};
|
|
|
|
|
2016-03-25 21:35:57 +01:00
|
|
|
UnitTest test_used_letters{[]()
|
|
|
|
{
|
|
|
|
kak_assert(used_letters("abcd") == to_lower(used_letters("abcdABCD")));
|
|
|
|
}};
|
|
|
|
|
2015-10-22 20:49:08 +02:00
|
|
|
}
|