utf8 awareness in selectors
This commit is contained in:
parent
ed592a3005
commit
0a686573ab
160
src/selectors.cc
160
src/selectors.cc
|
@ -2,37 +2,42 @@
|
|||
|
||||
#include "string.hh"
|
||||
|
||||
#include "utf8_iterator.hh"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
||||
using utf8::Codepoint;
|
||||
using Utf8Iterator = utf8::utf8_iterator<BufferIterator>;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool is_eol(char c)
|
||||
bool is_eol(Codepoint c)
|
||||
{
|
||||
return c == '\n';
|
||||
}
|
||||
|
||||
bool is_blank(char c)
|
||||
bool is_blank(Codepoint c)
|
||||
{
|
||||
return c == ' ' or c == '\t';
|
||||
}
|
||||
|
||||
template<bool punctuation_is_word = false>
|
||||
bool is_word(char c)
|
||||
bool is_word(Codepoint c)
|
||||
{
|
||||
return Kakoune::is_word(c);
|
||||
}
|
||||
|
||||
template<>
|
||||
bool is_word<true>(char c)
|
||||
bool is_word<true>(Codepoint c)
|
||||
{
|
||||
return !is_blank(c) and !is_eol(c);
|
||||
}
|
||||
|
||||
static bool is_punctuation(char c)
|
||||
static bool is_punctuation(Codepoint c)
|
||||
{
|
||||
return not (is_word(c) or is_blank(c) or is_eol(c));
|
||||
}
|
||||
|
@ -46,7 +51,7 @@ enum class CharCategories
|
|||
};
|
||||
|
||||
template<bool punctuation_is_word = false>
|
||||
CharCategories categorize(char c)
|
||||
CharCategories categorize(Codepoint c)
|
||||
{
|
||||
if (is_word(c))
|
||||
return CharCategories::Word;
|
||||
|
@ -58,20 +63,31 @@ CharCategories categorize(char c)
|
|||
: CharCategories::Punctuation;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool skip_while(BufferIterator& it, T condition)
|
||||
bool is_begin(const BufferIterator& it) { return it.is_begin(); }
|
||||
bool is_end(const BufferIterator& it) { return it.is_end(); }
|
||||
|
||||
bool is_begin(const Utf8Iterator& it) { return it.underlying_iterator().is_begin(); }
|
||||
bool is_end(const Utf8Iterator& it) { return it.underlying_iterator().is_end(); }
|
||||
|
||||
template<typename Iterator, typename T>
|
||||
bool skip_while(Iterator& it, T condition)
|
||||
{
|
||||
while (not it.is_end() and condition(*it))
|
||||
while (not is_end(it) and condition(*it))
|
||||
++it;
|
||||
return not it.is_end() and condition(*it);
|
||||
return not is_end(it) and condition(*it);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool skip_while_reverse(BufferIterator& it, T condition)
|
||||
template<typename Iterator, typename T>
|
||||
bool skip_while_reverse(Iterator& it, T condition)
|
||||
{
|
||||
while (not it.is_begin() and condition(*it))
|
||||
while (not is_begin(it) and condition(*it))
|
||||
--it;
|
||||
return not it.is_end() and condition(*it);
|
||||
return not is_end(it) and condition(*it);
|
||||
}
|
||||
|
||||
Selection utf8_selection(const Utf8Iterator& first, const Utf8Iterator& last)
|
||||
{
|
||||
return Selection(first.underlying_iterator(), last.underlying_iterator());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -81,13 +97,13 @@ typedef boost::regex_iterator<BufferIterator> RegexIterator;
|
|||
template<bool punctuation_is_word>
|
||||
SelectionAndCaptures select_to_next_word(const Selection& selection)
|
||||
{
|
||||
BufferIterator begin = selection.last();
|
||||
Utf8Iterator begin = selection.last();
|
||||
if (categorize<punctuation_is_word>(*begin) !=
|
||||
categorize<punctuation_is_word>(*(begin+1)))
|
||||
++begin;
|
||||
|
||||
skip_while(begin, is_eol);
|
||||
BufferIterator end = begin+1;
|
||||
Utf8Iterator end = begin+1;
|
||||
|
||||
if (not punctuation_is_word and is_punctuation(*begin))
|
||||
skip_while(end, is_punctuation);
|
||||
|
@ -96,7 +112,7 @@ SelectionAndCaptures select_to_next_word(const Selection& selection)
|
|||
|
||||
bool with_end = skip_while(end, is_blank);
|
||||
|
||||
return Selection(begin, with_end ? end : end-1);
|
||||
return utf8_selection(begin, with_end ? end : end-1);
|
||||
}
|
||||
template SelectionAndCaptures select_to_next_word<false>(const Selection&);
|
||||
template SelectionAndCaptures select_to_next_word<true>(const Selection&);
|
||||
|
@ -104,13 +120,13 @@ template SelectionAndCaptures select_to_next_word<true>(const Selection&);
|
|||
template<bool punctuation_is_word>
|
||||
SelectionAndCaptures select_to_next_word_end(const Selection& selection)
|
||||
{
|
||||
BufferIterator begin = selection.last();
|
||||
Utf8Iterator begin = selection.last();
|
||||
if (categorize<punctuation_is_word>(*begin) !=
|
||||
categorize<punctuation_is_word>(*(begin+1)))
|
||||
++begin;
|
||||
|
||||
skip_while(begin, is_eol);
|
||||
BufferIterator end = begin;
|
||||
Utf8Iterator end = begin;
|
||||
skip_while(end, is_blank);
|
||||
|
||||
bool with_end = false;
|
||||
|
@ -119,7 +135,7 @@ SelectionAndCaptures select_to_next_word_end(const Selection& selection)
|
|||
else if (is_word<punctuation_is_word>(*end))
|
||||
with_end = skip_while(end, is_word<punctuation_is_word>);
|
||||
|
||||
return Selection(begin, with_end ? end : end-1);
|
||||
return utf8_selection(begin, with_end ? end : end-1);
|
||||
}
|
||||
template SelectionAndCaptures select_to_next_word_end<false>(const Selection&);
|
||||
template SelectionAndCaptures select_to_next_word_end<true>(const Selection&);
|
||||
|
@ -127,14 +143,14 @@ template SelectionAndCaptures select_to_next_word_end<true>(const Selection&);
|
|||
template<bool punctuation_is_word>
|
||||
SelectionAndCaptures select_to_previous_word(const Selection& selection)
|
||||
{
|
||||
BufferIterator begin = selection.last();
|
||||
Utf8Iterator begin = selection.last();
|
||||
|
||||
if (categorize<punctuation_is_word>(*begin) !=
|
||||
categorize<punctuation_is_word>(*(begin-1)))
|
||||
--begin;
|
||||
|
||||
skip_while_reverse(begin, is_eol);
|
||||
BufferIterator end = begin;
|
||||
Utf8Iterator end = begin;
|
||||
skip_while_reverse(end, is_blank);
|
||||
|
||||
bool with_end = false;
|
||||
|
@ -143,30 +159,30 @@ SelectionAndCaptures select_to_previous_word(const Selection& selection)
|
|||
else if (is_word<punctuation_is_word>(*end))
|
||||
with_end = skip_while_reverse(end, is_word<punctuation_is_word>);
|
||||
|
||||
return Selection(begin, with_end ? end : end+1);
|
||||
return utf8_selection(begin, with_end ? end : end+1);
|
||||
}
|
||||
template SelectionAndCaptures select_to_previous_word<false>(const Selection&);
|
||||
template SelectionAndCaptures select_to_previous_word<true>(const Selection&);
|
||||
|
||||
SelectionAndCaptures select_line(const Selection& selection)
|
||||
{
|
||||
BufferIterator first = selection.last();
|
||||
if (*first == '\n' and not (first + 1).is_end())
|
||||
Utf8Iterator first = selection.last();
|
||||
if (*first == '\n' and not is_end(first + 1))
|
||||
++first;
|
||||
|
||||
while (not first.is_begin() and *(first - 1) != '\n')
|
||||
while (not is_begin(first) and *(first - 1) != '\n')
|
||||
--first;
|
||||
|
||||
BufferIterator last = first;
|
||||
while (not (last + 1).is_end() and *last != '\n')
|
||||
Utf8Iterator last = first;
|
||||
while (not is_end(last + 1) and *last != '\n')
|
||||
++last;
|
||||
return Selection(first, last);
|
||||
return utf8_selection(first, last);
|
||||
}
|
||||
|
||||
SelectionAndCaptures select_matching(const Selection& selection)
|
||||
{
|
||||
std::vector<char> matching_pairs = { '(', ')', '{', '}', '[', ']', '<', '>' };
|
||||
BufferIterator it = selection.last();
|
||||
Utf8Iterator it = selection.last();
|
||||
std::vector<char>::iterator match = matching_pairs.end();
|
||||
while (not is_eol(*it))
|
||||
{
|
||||
|
@ -178,19 +194,19 @@ SelectionAndCaptures select_matching(const Selection& selection)
|
|||
if (match == matching_pairs.end())
|
||||
return selection;
|
||||
|
||||
BufferIterator begin = it;
|
||||
Utf8Iterator begin = it;
|
||||
|
||||
if (((match - matching_pairs.begin()) % 2) == 0)
|
||||
{
|
||||
int level = 0;
|
||||
const char opening = *match;
|
||||
const char closing = *(match+1);
|
||||
while (not it.is_end())
|
||||
while (not is_end(it))
|
||||
{
|
||||
if (*it == opening)
|
||||
++level;
|
||||
else if (*it == closing and --level == 0)
|
||||
return Selection(begin, it);
|
||||
return utf8_selection(begin, it);
|
||||
|
||||
++it;
|
||||
}
|
||||
|
@ -200,12 +216,12 @@ SelectionAndCaptures select_matching(const Selection& selection)
|
|||
int level = 0;
|
||||
const char opening = *(match-1);
|
||||
const char closing = *match;
|
||||
while (not it.is_begin())
|
||||
while (not is_begin(it))
|
||||
{
|
||||
if (*it == closing)
|
||||
++level;
|
||||
else if (*it == opening and --level == 0)
|
||||
return Selection(begin, it);
|
||||
return utf8_selection(begin, it);
|
||||
--it;
|
||||
}
|
||||
}
|
||||
|
@ -213,12 +229,12 @@ SelectionAndCaptures select_matching(const Selection& selection)
|
|||
}
|
||||
|
||||
SelectionAndCaptures select_surrounding(const Selection& selection,
|
||||
const std::pair<char, char>& matching,
|
||||
const CodepointPair& matching,
|
||||
bool inside)
|
||||
{
|
||||
int level = 0;
|
||||
BufferIterator first = selection.last();
|
||||
while (not first.is_begin())
|
||||
Utf8Iterator first = selection.last();
|
||||
while (not is_begin(first))
|
||||
{
|
||||
if (first != selection.last() and *first == matching.second)
|
||||
++level;
|
||||
|
@ -235,8 +251,8 @@ SelectionAndCaptures select_surrounding(const Selection& selection,
|
|||
return selection;
|
||||
|
||||
level = 0;
|
||||
BufferIterator last = first + 1;
|
||||
while (not last.is_end())
|
||||
Utf8Iterator last = first + 1;
|
||||
while (not is_end(last))
|
||||
{
|
||||
if (*last == matching.first)
|
||||
++level;
|
||||
|
@ -258,64 +274,64 @@ SelectionAndCaptures select_surrounding(const Selection& selection,
|
|||
if (first != last)
|
||||
--last;
|
||||
}
|
||||
return Selection(first, last);
|
||||
return utf8_selection(first, last);
|
||||
}
|
||||
|
||||
SelectionAndCaptures select_to(const Selection& selection,
|
||||
char c, int count, bool inclusive)
|
||||
Codepoint c, int count, bool inclusive)
|
||||
{
|
||||
BufferIterator begin = selection.last();
|
||||
BufferIterator end = begin;
|
||||
Utf8Iterator begin = selection.last();
|
||||
Utf8Iterator end = begin;
|
||||
do
|
||||
{
|
||||
++end;
|
||||
skip_while(end, [c](char cur) { return not is_eol(cur) and cur != c; });
|
||||
if (end.is_end() or is_eol(*end))
|
||||
skip_while(end, [c](Codepoint cur) { return not is_eol(cur) and cur != c; });
|
||||
if (is_end(end) or is_eol(*end))
|
||||
return selection;
|
||||
}
|
||||
while (--count > 0);
|
||||
|
||||
return Selection(begin, inclusive ? end : end-1);
|
||||
return utf8_selection(begin, inclusive ? end : end-1);
|
||||
}
|
||||
|
||||
SelectionAndCaptures select_to_reverse(const Selection& selection,
|
||||
char c, int count, bool inclusive)
|
||||
Codepoint c, int count, bool inclusive)
|
||||
{
|
||||
BufferIterator begin = selection.last();
|
||||
BufferIterator end = begin;
|
||||
Utf8Iterator begin = selection.last();
|
||||
Utf8Iterator end = begin;
|
||||
do
|
||||
{
|
||||
--end;
|
||||
skip_while_reverse(end, [c](char cur) { return not is_eol(cur) and cur != c; });
|
||||
if (end.is_begin() or is_eol(*end))
|
||||
skip_while_reverse(end, [c](Codepoint cur) { return not is_eol(cur) and cur != c; });
|
||||
if (is_begin(end) or is_eol(*end))
|
||||
return selection;
|
||||
}
|
||||
while (--count > 0);
|
||||
|
||||
return Selection(begin, inclusive ? end : end+1);
|
||||
return utf8_selection(begin, inclusive ? end : end+1);
|
||||
}
|
||||
|
||||
SelectionAndCaptures select_to_eol(const Selection& selection)
|
||||
{
|
||||
BufferIterator begin = selection.last();
|
||||
BufferIterator end = begin + 1;
|
||||
skip_while(end, [](char cur) { return not is_eol(cur); });
|
||||
return Selection(begin, end-1);
|
||||
Utf8Iterator begin = selection.last();
|
||||
Utf8Iterator end = begin + 1;
|
||||
skip_while(end, [](Codepoint cur) { return not is_eol(cur); });
|
||||
return utf8_selection(begin, end-1);
|
||||
}
|
||||
|
||||
SelectionAndCaptures select_to_eol_reverse(const Selection& selection)
|
||||
{
|
||||
BufferIterator begin = selection.last();
|
||||
BufferIterator end = begin - 1;
|
||||
skip_while_reverse(end, [](char cur) { return not is_eol(cur); });
|
||||
return Selection(begin, end.is_begin() ? end : end+1);
|
||||
Utf8Iterator begin = selection.last();
|
||||
Utf8Iterator end = begin - 1;
|
||||
skip_while_reverse(end, [](Codepoint cur) { return not is_eol(cur); });
|
||||
return utf8_selection(begin, is_begin(end) ? end : end+1);
|
||||
}
|
||||
|
||||
template<bool punctuation_is_word>
|
||||
SelectionAndCaptures select_whole_word(const Selection& selection, bool inner)
|
||||
{
|
||||
BufferIterator first = selection.last();
|
||||
BufferIterator last = first;
|
||||
Utf8Iterator first = selection.last();
|
||||
Utf8Iterator last = first;
|
||||
if (is_word(*first))
|
||||
{
|
||||
if (not skip_while_reverse(first, is_word<punctuation_is_word>))
|
||||
|
@ -334,13 +350,14 @@ SelectionAndCaptures select_whole_word(const Selection& selection, bool inner)
|
|||
skip_while(last, is_word<punctuation_is_word>);
|
||||
}
|
||||
--last;
|
||||
return Selection(first, last);
|
||||
return utf8_selection(first, last);
|
||||
}
|
||||
template SelectionAndCaptures select_whole_word<false>(const Selection&, bool);
|
||||
template SelectionAndCaptures select_whole_word<true>(const Selection&, bool);
|
||||
|
||||
SelectionAndCaptures select_whole_lines(const Selection& selection)
|
||||
{
|
||||
// no need to be utf8 aware for is_eol as we only use \n as line seperator
|
||||
BufferIterator first = selection.first();
|
||||
BufferIterator last = selection.last();
|
||||
BufferIterator& to_line_start = first <= last ? first : last;
|
||||
|
@ -352,14 +369,13 @@ SelectionAndCaptures select_whole_lines(const Selection& selection)
|
|||
|
||||
skip_while(to_line_end, [](char cur) { return not is_eol(cur); });
|
||||
|
||||
|
||||
return Selection(first, last);
|
||||
}
|
||||
|
||||
SelectionAndCaptures select_whole_buffer(const Selection& selection)
|
||||
{
|
||||
const Buffer& buffer = selection.first().buffer();
|
||||
return Selection(buffer.begin(), buffer.end()-1);
|
||||
return Selection(buffer.begin(), utf8::previous(buffer.end()));
|
||||
}
|
||||
|
||||
SelectionAndCaptures select_next_match(const Selection& selection,
|
||||
|
@ -367,6 +383,8 @@ SelectionAndCaptures select_next_match(const Selection& selection,
|
|||
{
|
||||
try
|
||||
{
|
||||
// regex matching do not use Utf8Iterator as boost::regex handle utf8
|
||||
// decoding itself
|
||||
BufferIterator begin = selection.last();
|
||||
BufferIterator end = begin;
|
||||
CaptureList captures;
|
||||
|
@ -374,7 +392,7 @@ SelectionAndCaptures select_next_match(const Selection& selection,
|
|||
Regex ex(regex.begin(), regex.end());
|
||||
boost::match_results<BufferIterator> matches;
|
||||
|
||||
if (boost::regex_search(begin+1, begin.buffer().end(),
|
||||
if (boost::regex_search(utf8::next(begin), begin.buffer().end(),
|
||||
matches, ex))
|
||||
{
|
||||
begin = matches[0].first;
|
||||
|
@ -382,7 +400,7 @@ SelectionAndCaptures select_next_match(const Selection& selection,
|
|||
for (auto& match : matches)
|
||||
captures.push_back(String(match.first, match.second));
|
||||
}
|
||||
else if (boost::regex_search(begin.buffer().begin(), begin+1,
|
||||
else if (boost::regex_search(begin.buffer().begin(), utf8::next(begin),
|
||||
matches, ex))
|
||||
{
|
||||
begin = matches[0].first;
|
||||
|
@ -396,7 +414,7 @@ SelectionAndCaptures select_next_match(const Selection& selection,
|
|||
if (begin == end)
|
||||
++end;
|
||||
|
||||
return SelectionAndCaptures(begin, end - 1, std::move(captures));
|
||||
return SelectionAndCaptures(begin, utf8::previous(end), std::move(captures));
|
||||
}
|
||||
catch (boost::regex_error& err)
|
||||
{
|
||||
|
@ -427,7 +445,7 @@ SelectionAndCapturesList select_all_matches(const Selection& selection,
|
|||
captures.push_back(String(match.first, match.second));
|
||||
|
||||
result.push_back(SelectionAndCaptures(begin,
|
||||
begin == end ? end : end-1,
|
||||
begin == end ? end : utf8::previous(end),
|
||||
std::move(captures)));
|
||||
}
|
||||
return result;
|
||||
|
@ -454,7 +472,7 @@ SelectionAndCapturesList split_selection(const Selection& selection,
|
|||
{
|
||||
BufferIterator end = (*re_it)[0].first;
|
||||
|
||||
result.push_back(Selection(begin, (begin == end) ? end : end-1));
|
||||
result.push_back(Selection(begin, (begin == end) ? end : utf8::previous(end)));
|
||||
begin = (*re_it)[0].second;
|
||||
}
|
||||
result.push_back(Selection(begin, selection.last()));
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#define selectors_hh_INCLUDED
|
||||
|
||||
#include "selection.hh"
|
||||
#include "utf8.hh"
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
@ -15,14 +16,19 @@ SelectionAndCaptures select_to_previous_word(const Selection& selection);
|
|||
|
||||
SelectionAndCaptures select_line(const Selection& selection);
|
||||
SelectionAndCaptures select_matching(const Selection& selection);
|
||||
|
||||
|
||||
using CodepointPair = std::pair<utf8::Codepoint, utf8::Codepoint>;
|
||||
SelectionAndCaptures select_surrounding(const Selection& selection,
|
||||
const std::pair<char, char>& matching,
|
||||
const CodepointPair& matching,
|
||||
bool inside);
|
||||
|
||||
SelectionAndCaptures select_to(const Selection& selection,
|
||||
char c, int count, bool inclusive);
|
||||
utf8::Codepoint c,
|
||||
int count, bool inclusive);
|
||||
SelectionAndCaptures select_to_reverse(const Selection& selection,
|
||||
char c, int count, bool inclusive);
|
||||
utf8::Codepoint c,
|
||||
int count, bool inclusive);
|
||||
|
||||
SelectionAndCaptures select_to_eol(const Selection& selection);
|
||||
SelectionAndCaptures select_to_eol_reverse(const Selection& selection);
|
||||
|
|
Loading…
Reference in New Issue
Block a user