home/src/ranked_match.cc

#include "ranked_match.hh"

#include "flags.hh"
#include "unit_tests.hh"
#include "utf8_iterator.hh"

#include <algorithm>

namespace Kakoune
{

UsedLetters used_letters(StringView str)
{
    UsedLetters res = 0;
    for (auto c : str)
    {
        if (c >= 'a' and c <= 'z')
            res |= 1uLL << (c - 'a');
        else if (c >= 'A' and c <= 'Z')
            res |= 1uLL << (c - 'A' + 26);
        else if (c == '_')
            res |= 1uLL << 53;
        else if (c == '-')
            res |= 1uLL << 54;
        else
            res |= 1uLL << 63;
    }
    return res;
}

bool matches(UsedLetters query, UsedLetters letters)
{
    return (query & letters) == query;
}

using Utf8It = utf8::iterator<const char*>;

static int count_word_boundaries_match(StringView candidate, StringView query)
{
    int count = 0;
    Utf8It query_it{query.begin(), query};
    Codepoint prev = 0;
    for (Utf8It it{candidate.begin(), candidate}; it != candidate.end(); ++it)
    {
        const Codepoint c = *it;
        const bool is_word_boundary = prev == 0 or
                                      (!iswalnum((wchar_t)prev) and iswalnum((wchar_t)c)) or
                                      (iswlower((wchar_t)prev) and iswupper((wchar_t)c));
        prev = c;

        if (not is_word_boundary)
            continue;

        const Codepoint lc = to_lower(c);
        for (auto qit = query_it; qit != query.end(); ++qit)
        {
            const Codepoint qc = *qit;
            if (qc == (iswlower((wchar_t)qc) ? lc  : c))
            {
                ++count;
                query_it = qit+1;
                break;
            }
        }
        if (query_it == query.end())
            break;
    }
    return count;
}

static bool smartcase_eq(Codepoint query, Codepoint candidate)
{
    return query == (iswlower((wchar_t)query) ? to_lower(candidate) : candidate);
}

struct SubseqRes
{
    int max_index;
    bool single_word;
};

static Optional<SubseqRes> subsequence_match_smart_case(StringView str, StringView subseq)
{
    bool single_word = true;
    int max_index = -1;
    auto it = str.begin();
    int index = 0;
    for (auto subseq_it = subseq.begin(); subseq_it != subseq.end();)
    {
        if (it == str.end())
            return {};
        const Codepoint c = utf8::read_codepoint(subseq_it, subseq.end());
        while (true)
        {
            auto str_c = utf8::read_codepoint(it, str.end());
            if (smartcase_eq(c, str_c))
                break;

            if (max_index != -1 and single_word and  not is_word(str_c))
                single_word = false;

            ++index;
            if (it == str.end())
                return {};
        }
        max_index = index++;
    }
    return SubseqRes{max_index, single_word};
}

template<typename TestFunc>
RankedMatch::RankedMatch(StringView candidate, StringView query, TestFunc func)
{
    if (candidate.empty() or query.length() > candidate.length())
        return;

    if (query.empty())
    {
        m_candidate = candidate;
        return;
    }

    if (not func())
        return;

    auto res = subsequence_match_smart_case(candidate, query);
    if (not res)
        return;

    m_candidate = candidate;
    m_max_index = res->max_index;

    if (res->single_word)
        m_flags |= Flags::SingleWord;
    if (smartcase_eq(query[0], candidate[0]))
        m_flags |= Flags::FirstCharMatch;

    auto it = std::search(candidate.begin(), candidate.end(),
                          query.begin(), query.end());
    if (it != candidate.end())
    {
        m_flags |= Flags::Contiguous;
        if (it == candidate.begin())
        {
            m_flags |= Flags::Prefix;
            if (query.length() == candidate.length())
                m_flags |= Flags::FullMatch;
        }
    }

    m_word_boundary_match_count = count_word_boundaries_match(candidate, query);
    if (m_word_boundary_match_count == query.length())
        m_flags |= Flags::OnlyWordBoundary;
}

RankedMatch::RankedMatch(StringView candidate, UsedLetters candidate_letters,
                         StringView query, UsedLetters query_letters)
    : RankedMatch{candidate, query, [&] {
        return matches(to_lower(query_letters), to_lower(candidate_letters)) and
               matches(query_letters & upper_mask, candidate_letters & upper_mask);
    }} {}


RankedMatch::RankedMatch(StringView candidate, StringView query)
    : RankedMatch{candidate, query, [] { return true; }}
{
}

bool RankedMatch::operator<(const RankedMatch& other) const
{
    kak_assert((bool)*this and (bool)other);

    const auto diff = m_flags ^ other.m_flags;
    // flags are different, use their ordering to return the first match
    if (diff != Flags::None)
        return (int)(m_flags & diff) > (int)(other.m_flags & diff);

    if (not (m_flags & Flags::Prefix) and
        m_word_boundary_match_count != other.m_word_boundary_match_count)
        return m_word_boundary_match_count > other.m_word_boundary_match_count;

    if (m_max_index != other.m_max_index)
        return m_max_index < other.m_max_index;

    // Reorder codepoints to improve matching behaviour
    auto order = [](Codepoint cp) { return cp == '/' ? 0 : cp; };

    auto it1 = m_candidate.begin(), it2 = other.m_candidate.begin();
    const auto end1 = m_candidate.end(), end2 = other.m_candidate.end();
    auto last1 = it1, last2 = it2;
    while (true)
    {
        // find next mismatch
        while (it1 != end1 and it2 != end2 and *it1 == *it2)
            ++it1, ++it2;

        if (it1 == end1 or it2 == end2)
            return it1 == end1 and it2 != end2;

        // compare codepoints
        it1 = utf8::character_start(it1, last1);
        it2 = utf8::character_start(it2, last2);
        const auto cp1 = utf8::read_codepoint(it1, end1);
        const auto cp2 = utf8::read_codepoint(it2, end2);
        if (cp1 != cp2)
        {
            const bool low1 = iswlower((wchar_t)cp1);
            const bool low2 = iswlower((wchar_t)cp2);
            return low1 == low2 ? order(cp1) < order(cp2) : low1;
        }
        last1 = it1; last2 = it2;
    }
}

UnitTest test_ranked_match{[] {
    kak_assert(count_word_boundaries_match("run_all_tests", "rat") == 3);
    kak_assert(count_word_boundaries_match("run_all_tests", "at") == 2);
    kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "wm") == 2);
    kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "cobm") == 3);
    kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "cWBM") == 4);
    kak_assert(RankedMatch{"source", "so"} < RankedMatch{"source_data", "so"});
    kak_assert(not (RankedMatch{"source_data", "so"} < RankedMatch{"source", "so"}));
    kak_assert(not (RankedMatch{"source", "so"} < RankedMatch{"source", "so"}));
    kak_assert(RankedMatch{"single/word", "wo"} < RankedMatch{"multiw/ord", "wo"});
    kak_assert(RankedMatch{"foo/bar/foobar", "foobar"} < RankedMatch{"foo/bar/baz", "foobar"});
    kak_assert(RankedMatch{"delete-buffer", "db"} < RankedMatch{"debug", "db"});
    kak_assert(RankedMatch{"create_task", "ct"} < RankedMatch{"constructor", "ct"});
    kak_assert(RankedMatch{"class", "cla"} < RankedMatch{"class::attr", "cla"});
    kak_assert(RankedMatch{"meta/", "meta"} < RankedMatch{"meta-a/", "meta"});
}};

UnitTest test_used_letters{[]()
{
    kak_assert(used_letters("abcd") == to_lower(used_letters("abcdABCD")));
}};

}
Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00			`#include "ranked_match.hh"`

Cleanup include dependencies a bit 2016-11-29 00:53:50 +01:00			`#include "flags.hh"`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`#include "unit_tests.hh"`
Cleanup include dependencies a bit 2016-11-29 00:53:50 +01:00			`#include "utf8_iterator.hh"`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00
Tweak ranked match comparison, give contiguous matches an edge 2016-09-30 23:13:01 +02:00			`#include <algorithm>`

Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00			`namespace Kakoune`
			`{`

Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00			`UsedLetters used_letters(StringView str)`
			`{`
			`UsedLetters res = 0;`
			`for (auto c : str)`
			`{`
			`if (c >= 'a' and c <= 'z')`
Fix literal type that must be 64 bits 2016-11-22 23:20:30 +01:00			`res \|= 1uLL << (c - 'a');`
Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00			`else if (c >= 'A' and c <= 'Z')`
Fix literal type that must be 64 bits 2016-11-22 23:20:30 +01:00			`res \|= 1uLL << (c - 'A' + 26);`
Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00			`else if (c == '_')`
Fix literal type that must be 64 bits 2016-11-22 23:20:30 +01:00			`res \|= 1uLL << 53;`
Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00			`else if (c == '-')`
Fix literal type that must be 64 bits 2016-11-22 23:20:30 +01:00			`res \|= 1uLL << 54;`
Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00			`else`
Fix literal type that must be 64 bits 2016-11-22 23:20:30 +01:00			`res \|= 1uLL << 63;`
Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00			`}`
			`return res;`
			`}`

			`bool matches(UsedLetters query, UsedLetters letters)`
			`{`
			`return (query & letters) == query;`
			`}`

Make word insert completion work better with unicode char 2015-10-30 14:57:46 +01:00			`using Utf8It = utf8::iterator<const char*>;`

Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`static int count_word_boundaries_match(StringView candidate, StringView query)`
Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00			`{`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`int count = 0;`
Fix count_word_boundaries_match 2016-02-18 00:05:08 +01:00			`Utf8It query_it{query.begin(), query};`
Make word insert completion work better with unicode char 2015-10-30 14:57:46 +01:00			`Codepoint prev = 0;`
			`for (Utf8It it{candidate.begin(), candidate}; it != candidate.end(); ++it)`
Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00			`{`
Make word insert completion work better with unicode char 2015-10-30 14:57:46 +01:00			`const Codepoint c = *it;`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`const bool is_word_boundary = prev == 0 or`
Go back to libc locale and use c_regex_traits Unfortunately, cygwin does not support c++ locales. 2016-05-19 22:45:23 +02:00			`(!iswalnum((wchar_t)prev) and iswalnum((wchar_t)c)) or`
			`(iswlower((wchar_t)prev) and iswupper((wchar_t)c));`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`prev = c;`
Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`if (not is_word_boundary)`
			`continue;`
Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00
Fix to_lower/to_upper handling to correctly support non unicode chars require a proper unicode locale setup on the system Fixes #94 2015-11-11 01:21:20 +01:00			`const Codepoint lc = to_lower(c);`
Fix count_word_boundaries_match 2016-02-18 00:05:08 +01:00			`for (auto qit = query_it; qit != query.end(); ++qit)`
Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00			`{`
Make word insert completion work better with unicode char 2015-10-30 14:57:46 +01:00			`const Codepoint qc = *qit;`
Use iswlower instead of islower islower can crash with big codepoints, and is incorrect anyway. 2017-01-30 00:37:10 +01:00			`if (qc == (iswlower((wchar_t)qc) ? lc : c))`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`{`
			`++count;`
Fix count_word_boundaries_match 2016-02-18 00:05:08 +01:00			`query_it = qit+1;`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`break;`
			`}`
Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00			`}`
Fix count_word_boundaries_match 2016-02-18 00:05:08 +01:00			`if (query_it == query.end())`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`break;`
			`}`
			`return count;`
			`}`
Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00
Make word insert completion work better with unicode char 2015-10-30 14:57:46 +01:00			`static bool smartcase_eq(Codepoint query, Codepoint candidate)`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`{`
Use iswlower instead of islower islower can crash with big codepoints, and is incorrect anyway. 2017-01-30 00:37:10 +01:00			`return query == (iswlower((wchar_t)query) ? to_lower(candidate) : candidate);`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`}`

Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`struct SubseqRes`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`{`
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`int max_index;`
			`bool single_word;`
			`};`

Warning fix in ranked_match.cc 2017-01-30 12:22:14 +01:00			`static Optional<SubseqRes> subsequence_match_smart_case(StringView str, StringView subseq)`
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`{`
			`bool single_word = true;`
			`int max_index = -1;`
Tweak implementation of subsequence_match_smart_case Remove use of utf8 iterators and use directly utf8 functions 2016-03-25 00:45:56 +01:00			`auto it = str.begin();`
Take subsequence matches index when sorting RankedMatch 2016-02-29 00:05:51 +01:00			`int index = 0;`
Tweak subsequence_match_smart_case 2016-03-28 16:18:15 +02:00			`for (auto subseq_it = subseq.begin(); subseq_it != subseq.end();)`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`{`
			`if (it == str.end())`
Warning fix in ranked_match.cc 2017-01-30 12:22:14 +01:00			`return {};`
Tweak subsequence_match_smart_case 2016-03-28 16:18:15 +02:00			`const Codepoint c = utf8::read_codepoint(subseq_it, subseq.end());`
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`while (true)`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`{`
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`auto str_c = utf8::read_codepoint(it, str.end());`
			`if (smartcase_eq(c, str_c))`
			`break;`
Tweak ranked match comparison, give contiguous matches an edge 2016-09-30 23:13:01 +02:00
			`if (max_index != -1 and single_word and not is_word(str_c))`
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`single_word = false;`
Tweak ranked match comparison, give contiguous matches an edge 2016-09-30 23:13:01 +02:00
Take subsequence matches index when sorting RankedMatch 2016-02-29 00:05:51 +01:00			`++index;`
Tweak implementation of subsequence_match_smart_case Remove use of utf8 iterators and use directly utf8 functions 2016-03-25 00:45:56 +01:00			`if (it == str.end())`
Warning fix in ranked_match.cc 2017-01-30 12:22:14 +01:00			`return {};`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`}`
Tweak RankedMatch, compare max match index instead of match indices sum 2016-08-30 01:30:15 +02:00			`max_index = index++;`
Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00			`}`
Warning fix in ranked_match.cc 2017-01-30 12:22:14 +01:00			`return SubseqRes{max_index, single_word};`
Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00			`}`

Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00			`template<typename TestFunc>`
			`RankedMatch::RankedMatch(StringView candidate, StringView query, TestFunc func)`
Move more logic into RankedMatch 2015-10-27 22:25:18 +01:00			`{`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`if (candidate.empty() or query.length() > candidate.length())`
			`return;`

			`if (query.empty())`
Move more logic into RankedMatch 2015-10-27 22:25:18 +01:00			`{`
			`m_candidate = candidate;`
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`return;`
			`}`
Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`if (not func())`
			`return;`

			`auto res = subsequence_match_smart_case(candidate, query);`
Warning fix in ranked_match.cc 2017-01-30 12:22:14 +01:00			`if (not res)`
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`return;`

			`m_candidate = candidate;`
Warning fix in ranked_match.cc 2017-01-30 12:22:14 +01:00			`m_max_index = res->max_index;`
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00
Warning fix in ranked_match.cc 2017-01-30 12:22:14 +01:00			`if (res->single_word)`
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`m_flags \|= Flags::SingleWord;`
			`if (smartcase_eq(query[0], candidate[0]))`
			`m_flags \|= Flags::FirstCharMatch;`
Tweak ranked match comparison, give contiguous matches an edge 2016-09-30 23:13:01 +02:00
			`auto it = std::search(candidate.begin(), candidate.end(),`
			`query.begin(), query.end());`
			`if (it != candidate.end())`
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`{`
Tweak ranked match comparison, give contiguous matches an edge 2016-09-30 23:13:01 +02:00			`m_flags \|= Flags::Contiguous;`
			`if (it == candidate.begin())`
			`{`
			`m_flags \|= Flags::Prefix;`
			`if (query.length() == candidate.length())`
			`m_flags \|= Flags::FullMatch;`
			`}`
Move more logic into RankedMatch 2015-10-27 22:25:18 +01:00			`}`
Tweak ranked match comparison, give contiguous matches an edge 2016-09-30 23:13:01 +02:00
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`m_word_boundary_match_count = count_word_boundaries_match(candidate, query);`
			`if (m_word_boundary_match_count == query.length())`
			`m_flags \|= Flags::OnlyWordBoundary;`
Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00			`}`
Move more logic into RankedMatch 2015-10-27 22:25:18 +01:00
Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00			`RankedMatch::RankedMatch(StringView candidate, UsedLetters candidate_letters,`
			`StringView query, UsedLetters query_letters)`
			`: RankedMatch{candidate, query, [&] {`
			`return matches(to_lower(query_letters), to_lower(candidate_letters)) and`
			`matches(query_letters & upper_mask, candidate_letters & upper_mask);`
			`}} {}`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00

Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00			`RankedMatch::RankedMatch(StringView candidate, StringView query)`
			`: RankedMatch{candidate, query, [] { return true; }}`
			`{`
Move more logic into RankedMatch 2015-10-27 22:25:18 +01:00			`}`

			`bool RankedMatch::operator<(const RankedMatch& other) const`
			`{`
Fix uninitialized value in RankedMatch 2016-03-24 23:04:56 +01:00			`kak_assert((bool)*this and (bool)other);`

Use flags and bit operations instead of bools in RankedMatch full match is now the most important flag for comparison. 2016-08-30 00:56:22 +02:00			`const auto diff = m_flags ^ other.m_flags;`
			`// flags are different, use their ordering to return the first match`
			`if (diff != Flags::None)`
			`return (int)(m_flags & diff) > (int)(other.m_flags & diff);`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00
Tweak ranked match ordering 2017-01-31 23:30:23 +01:00			`if (not (m_flags & Flags::Prefix) and`
			`m_word_boundary_match_count != other.m_word_boundary_match_count)`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`return m_word_boundary_match_count > other.m_word_boundary_match_count;`

Tweak RankedMatch, compare max match index instead of match indices sum 2016-08-30 01:30:15 +02:00			`if (m_max_index != other.m_max_index)`
			`return m_max_index < other.m_max_index;`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00
Change RankedMatch ordering to favor `/` characters This will improve matching of filenames, as 'foo/' will be sorted before 'foo-bar' due to `/` coming before `-` in the new ordering (it comes after in ascii/unicode order). Fixes #1395 2017-06-04 09:27:53 +02:00			`// Reorder codepoints to improve matching behaviour`
			`auto order = [](Codepoint cp) { return cp == '/' ? 0 : cp; };`

Only decode utf8 when strictly necessary in RankedMatch::operator< 2016-08-30 01:30:52 +02:00			`auto it1 = m_candidate.begin(), it2 = other.m_candidate.begin();`
			`const auto end1 = m_candidate.end(), end2 = other.m_candidate.end();`
Fix infinite loop when comparing RankedMatches containing invalid utf8 If we had a word containing some invalid utf8, like a wrong sequence of continuation bytes, we would infinitely loop back to the previous valid character start. Fixes #1157 2017-01-30 00:50:33 +01:00			`auto last1 = it1, last2 = it2;`
Only decode utf8 when strictly necessary in RankedMatch::operator< 2016-08-30 01:30:52 +02:00			`while (true)`
Use manual lexicographic comparison in RankedMatch::operator< 2016-03-28 15:44:49 +02:00			`{`
Only decode utf8 when strictly necessary in RankedMatch::operator< 2016-08-30 01:30:52 +02:00			`// find next mismatch`
			`while (it1 != end1 and it2 != end2 and it1 == it2)`
			`++it1, ++it2;`

			`if (it1 == end1 or it2 == end2)`
			`return it1 == end1 and it2 != end2;`

			`// compare codepoints`
Fix infinite loop when comparing RankedMatches containing invalid utf8 If we had a word containing some invalid utf8, like a wrong sequence of continuation bytes, we would infinitely loop back to the previous valid character start. Fixes #1157 2017-01-30 00:50:33 +01:00			`it1 = utf8::character_start(it1, last1);`
			`it2 = utf8::character_start(it2, last2);`
Only decode utf8 when strictly necessary in RankedMatch::operator< 2016-08-30 01:30:52 +02:00			`const auto cp1 = utf8::read_codepoint(it1, end1);`
Use iswlower instead of islower islower can crash with big codepoints, and is incorrect anyway. 2017-01-30 00:37:10 +01:00			`const auto cp2 = utf8::read_codepoint(it2, end2);`
Use manual lexicographic comparison in RankedMatch::operator< 2016-03-28 15:44:49 +02:00			`if (cp1 != cp2)`
			`{`
Use iswlower instead of islower islower can crash with big codepoints, and is incorrect anyway. 2017-01-30 00:37:10 +01:00			`const bool low1 = iswlower((wchar_t)cp1);`
			`const bool low2 = iswlower((wchar_t)cp2);`
Change RankedMatch ordering to favor `/` characters This will improve matching of filenames, as 'foo/' will be sorted before 'foo-bar' due to `/` coming before `-` in the new ordering (it comes after in ascii/unicode order). Fixes #1395 2017-06-04 09:27:53 +02:00			`return low1 == low2 ? order(cp1) < order(cp2) : low1;`
Use manual lexicographic comparison in RankedMatch::operator< 2016-03-28 15:44:49 +02:00			`}`
Fix infinite loop when comparing RankedMatches containing invalid utf8 If we had a word containing some invalid utf8, like a wrong sequence of continuation bytes, we would infinitely loop back to the previous valid character start. Fixes #1157 2017-01-30 00:50:33 +01:00			`last1 = it1; last2 = it2;`
Use manual lexicographic comparison in RankedMatch::operator< 2016-03-28 15:44:49 +02:00			`}`
Move more logic into RankedMatch 2015-10-27 22:25:18 +01:00			`}`

Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`UnitTest test_ranked_match{[] {`
			`kak_assert(count_word_boundaries_match("run_all_tests", "rat") == 3);`
Fix count_word_boundaries_match 2016-02-18 00:05:08 +01:00			`kak_assert(count_word_boundaries_match("run_all_tests", "at") == 2);`
			`kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "wm") == 2);`
			`kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "cobm") == 3);`
			`kak_assert(count_word_boundaries_match("countWordBoundariesMatch", "cWBM") == 4);`
Fix RankedMatch::operator< with matching prefix candidates 2016-05-17 20:40:36 +02:00			`kak_assert(RankedMatch{"source", "so"} < RankedMatch{"source_data", "so"});`
			`kak_assert(not (RankedMatch{"source_data", "so"} < RankedMatch{"source", "so"}));`
Fix RankedMatch ordering where 'a < a' was true Fixes #679 2016-05-20 19:42:01 +02:00			`kak_assert(not (RankedMatch{"source", "so"} < RankedMatch{"source", "so"}));`
Tweak RankedMatch logic, prioritize matches that are in a single word 2016-09-26 22:54:39 +02:00			`kak_assert(RankedMatch{"single/word", "wo"} < RankedMatch{"multiw/ord", "wo"});`
Tweak ranked match comparison, give contiguous matches an edge 2016-09-30 23:13:01 +02:00			`kak_assert(RankedMatch{"foo/bar/foobar", "foobar"} < RankedMatch{"foo/bar/baz", "foobar"});`
Tweak RankedMatch behaviour and fix bug in its comparison function casting TestableFlag<T> to UnderlyingType<T> was going through bool conversion... Not sure how things worked earlier. 2016-11-14 20:14:09 +01:00			`kak_assert(RankedMatch{"delete-buffer", "db"} < RankedMatch{"debug", "db"});`
			`kak_assert(RankedMatch{"create_task", "ct"} < RankedMatch{"constructor", "ct"});`
Tweak ranked match ordering 2017-01-31 23:30:23 +01:00			`kak_assert(RankedMatch{"class", "cla"} < RankedMatch{"class::attr", "cla"});`
Change RankedMatch ordering to favor `/` characters This will improve matching of filenames, as 'foo/' will be sorted before 'foo-bar' due to `/` coming before `-` in the new ordering (it comes after in ascii/unicode order). Fixes #1395 2017-06-04 09:27:53 +02:00			`kak_assert(RankedMatch{"meta/", "meta"} < RankedMatch{"meta-a/", "meta"});`
Use an heuristic based match ranking algorithm inspired by what YouCompleteMe does 2015-10-29 14:36:30 +01:00			`}};`

Move UsedLetters with RankedMatch 2016-03-25 21:35:57 +01:00			`UnitTest test_used_letters{[]()`
			`{`
			`kak_assert(used_letters("abcd") == to_lower(used_letters("abcdABCD")));`
			`}};`

Extract WordDB::RankedWord as RankedMatch in its own file 2015-10-22 20:49:08 +02:00			`}`