Pass flags to the regex engine to correct anchors

Current behaviour was matching ^ $ for the current search start/end
(and \b was always matching begin/end as well).

Fixes #536
This commit is contained in:
Maxime Coste 2015-12-23 21:43:07 +00:00
parent 669fccc5e9
commit 1d748a4017
6 changed files with 66 additions and 15 deletions

View File

@ -4,6 +4,9 @@
#include "buffer.hh" #include "buffer.hh"
#include "selection.hh" #include "selection.hh"
#include "utf8_iterator.hh"
#include "unicode.hh"
namespace Kakoune namespace Kakoune
{ {
@ -24,6 +27,25 @@ inline CharCount char_length(const Buffer& buffer, const Selection& range)
buffer.iterator_at(buffer.char_next(range.max()))); buffer.iterator_at(buffer.char_next(range.max())));
} }
inline bool is_bol(ByteCoord coord)
{
return coord.column == 0;
}
inline bool is_eol(const Buffer& buffer, ByteCoord coord)
{
return buffer.is_end(coord) or buffer[coord.line].length() == coord.column+1;
}
inline bool is_eow(const Buffer& buffer, ByteCoord coord)
{
if (buffer.is_end(coord) or coord == ByteCoord{0,0})
return true;
auto it = utf8::iterator<BufferIterator>(buffer.iterator_at(coord), buffer);
return is_word(*(it-1)) and not is_word(*it);
}
CharCount get_column(const Buffer& buffer, CharCount get_column(const Buffer& buffer,
CharCount tabstop, ByteCoord coord); CharCount tabstop, ByteCoord coord);

View File

@ -304,7 +304,10 @@ private:
kak_assert(matches.size() % m_faces.size() == 0); kak_assert(matches.size() % m_faces.size() == 0);
using RegexIt = RegexIterator<BufferIterator>; using RegexIt = RegexIterator<BufferIterator>;
RegexIt re_it{buffer.iterator_at(range.begin), RegexIt re_it{buffer.iterator_at(range.begin),
buffer.iterator_at(range.end), m_regex}; buffer.iterator_at(range.end), m_regex,
match_flags(is_bol(range.begin),
is_eol(buffer, range.end),
is_eow(buffer, range.end))};
RegexIt re_end; RegexIt re_end;
for (; re_it != re_end; ++re_it) for (; re_it != re_end; ++re_it)
{ {

View File

@ -797,8 +797,9 @@ void keep(Context& context, NormalParams)
Vector<Selection> keep; Vector<Selection> keep;
for (auto& sel : context.selections()) for (auto& sel : context.selections())
{ {
if (regex_search(buffer.iterator_at(sel.min()), auto begin = buffer.iterator_at(sel.min());
utf8::next(buffer.iterator_at(sel.max()), buffer.end()), ex) == matching) auto end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
if (regex_search(begin, end, ex, RegexConstant::match_any) == matching)
keep.push_back(sel); keep.push_back(sel);
} }
if (keep.empty()) if (keep.empty())

View File

@ -70,6 +70,16 @@ using RegexIterator = regex_ns::regex_iterator<Iterator>;
template<typename Iterator> template<typename Iterator>
using MatchResults = regex_ns::match_results<Iterator>; using MatchResults = regex_ns::match_results<Iterator>;
namespace RegexConstant = regex_ns::regex_constants;
inline RegexConstant::match_flag_type match_flags(bool bol, bool eol, bool eow)
{
return (bol ? RegexConstant::match_default : RegexConstant::match_not_bol |
RegexConstant::match_prev_avail) |
(eol ? RegexConstant::match_default : RegexConstant::match_not_eol) |
(eow ? RegexConstant::match_default : RegexConstant::match_not_eow);
}
String option_to_string(const Regex& re); String option_to_string(const Regex& re);
void option_from_string(StringView str, Regex& re); void option_from_string(StringView str, Regex& re);

View File

@ -307,7 +307,8 @@ Selection select_paragraph(const Buffer& buffer, const Selection& selection, Obj
if ((flags & ObjectFlags::ToBegin) and first != buffer.begin()) if ((flags & ObjectFlags::ToBegin) and first != buffer.begin())
{ {
skip_while_reverse(first, buffer.begin(), is_eol); skip_while_reverse(first, buffer.begin(),
[](Codepoint c){ return is_eol(c); });
if (flags & ObjectFlags::ToEnd) if (flags & ObjectFlags::ToEnd)
last = first; last = first;
while (first != buffer.begin()) while (first != buffer.begin())
@ -331,7 +332,8 @@ Selection select_paragraph(const Buffer& buffer, const Selection& selection, Obj
if (last != buffer.begin() and is_eol(*last) and is_eol(*(last-1))) if (last != buffer.begin() and is_eol(*last) and is_eol(*(last-1)))
{ {
if (not (flags & ObjectFlags::Inner)) if (not (flags & ObjectFlags::Inner))
skip_while(last, buffer.end(), is_eol); skip_while(last, buffer.end(),
[](Codepoint c){ return is_eol(c); });
break; break;
} }
++last; ++last;
@ -570,8 +572,12 @@ void select_all_matches(SelectionList& selections, const Regex& regex, unsigned
auto& buffer = selections.buffer(); auto& buffer = selections.buffer();
for (auto& sel : selections) for (auto& sel : selections)
{ {
auto sel_beg = buffer.iterator_at(sel.min());
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
RegexIt re_it(buffer.iterator_at(sel.min()), sel_end, regex); const auto flags = match_flags(is_bol(sel_beg.coord()),
is_eol(buffer, sel_end.coord()),
is_eow(buffer, sel_end.coord()));
RegexIt re_it(sel_beg, sel_end, regex, flags);
RegexIt re_end; RegexIt re_end;
for (; re_it != re_end; ++re_it) for (; re_it != re_end; ++re_it)
@ -613,7 +619,11 @@ void split_selections(SelectionList& selections, const Regex& regex, unsigned ca
{ {
auto begin = buffer.iterator_at(sel.min()); auto begin = buffer.iterator_at(sel.min());
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
RegexIt re_it(begin, sel_end, regex); const auto flags = match_flags(is_bol(begin.coord()),
is_eol(buffer, sel_end.coord()),
is_eow(buffer, sel_end.coord()));
RegexIt re_it(begin, sel_end, regex, flags);
RegexIt re_end; RegexIt re_end;
for (; re_it != re_end; ++re_it) for (; re_it != re_end; ++re_it)

View File

@ -54,7 +54,7 @@ Selection select_to_next_word(const Buffer& buffer, const Selection& selection)
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin+1))) if (categorize<word_type>(*begin) != categorize<word_type>(*(begin+1)))
++begin; ++begin;
skip_while(begin, buffer.end(), is_eol); skip_while(begin, buffer.end(), [](Codepoint c){ return is_eol(c); });
if (begin == buffer.end()) if (begin == buffer.end())
return selection; return selection;
Utf8Iterator end = begin+1; Utf8Iterator end = begin+1;
@ -78,7 +78,7 @@ Selection select_to_next_word_end(const Buffer& buffer, const Selection& selecti
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin+1))) if (categorize<word_type>(*begin) != categorize<word_type>(*(begin+1)))
++begin; ++begin;
skip_while(begin, buffer.end(), is_eol); skip_while(begin, buffer.end(), [](Codepoint c){ return is_eol(c); });
if (begin == buffer.end()) if (begin == buffer.end())
return selection; return selection;
Utf8Iterator end = begin; Utf8Iterator end = begin;
@ -101,7 +101,7 @@ Selection select_to_previous_word(const Buffer& buffer, const Selection& selecti
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin-1))) if (categorize<word_type>(*begin) != categorize<word_type>(*(begin-1)))
--begin; --begin;
skip_while_reverse(begin, buffer.begin(), is_eol); skip_while_reverse(begin, buffer.begin(), [](Codepoint c){ return is_eol(c); });
Utf8Iterator end = begin; Utf8Iterator end = begin;
skip_while_reverse(end, buffer.begin(), is_horizontal_blank); skip_while_reverse(end, buffer.begin(), is_horizontal_blank);
@ -231,12 +231,16 @@ void select_buffer(SelectionList& selections);
enum Direction { Forward, Backward }; enum Direction { Forward, Backward };
inline bool find_last_match(BufferIterator begin, const BufferIterator& end, inline bool find_last_match(const Buffer& buffer, const BufferIterator& pos,
MatchResults<BufferIterator>& res, MatchResults<BufferIterator>& res,
const Regex& regex) const Regex& regex)
{ {
MatchResults<BufferIterator> matches; MatchResults<BufferIterator> matches;
while (regex_search(begin, end, matches, regex)) const bool is_pos_eol = is_eol(buffer, pos.coord());
const bool is_pos_eow = is_eow(buffer, pos.coord());
auto begin = buffer.begin();
while (regex_search(begin, pos, matches, regex,
match_flags(is_bol(begin.coord()), is_pos_eol, is_pos_eow)))
{ {
if (begin == matches[0].second) if (begin == matches[0].second)
break; break;
@ -252,11 +256,12 @@ bool find_match_in_buffer(const Buffer& buffer, const BufferIterator pos,
const Regex& ex) const Regex& ex)
{ {
if (direction == Forward) if (direction == Forward)
return (regex_search(pos, buffer.end(), matches, ex) or return (regex_search(pos, buffer.end(), matches, ex,
match_flags(is_bol(pos.coord()), true, true)) or
regex_search(buffer.begin(), buffer.end(), matches, ex)); regex_search(buffer.begin(), buffer.end(), matches, ex));
else else
return (find_last_match(buffer.begin(), pos, matches, ex) or return (find_last_match(buffer, pos, matches, ex) or
find_last_match(buffer.begin(), buffer.end(), matches, ex)); find_last_match(buffer, buffer.end(), matches, ex));
} }
inline BufferIterator ensure_char_start(const Buffer& buffer, const BufferIterator& it) inline BufferIterator ensure_char_start(const Buffer& buffer, const BufferIterator& it)