Pass flags to the regex engine to correct anchors

Current behaviour was matching ^ $ for the current search start/end
(and \b was always matching begin/end as well).

Fixes #536
This commit is contained in:
Maxime Coste 2015-12-23 21:43:07 +00:00
parent 669fccc5e9
commit 1d748a4017
6 changed files with 66 additions and 15 deletions

View File

@ -4,6 +4,9 @@
#include "buffer.hh"
#include "selection.hh"
#include "utf8_iterator.hh"
#include "unicode.hh"
namespace Kakoune
{
@ -24,6 +27,25 @@ inline CharCount char_length(const Buffer& buffer, const Selection& range)
buffer.iterator_at(buffer.char_next(range.max())));
}
inline bool is_bol(ByteCoord coord)
{
return coord.column == 0;
}
inline bool is_eol(const Buffer& buffer, ByteCoord coord)
{
return buffer.is_end(coord) or buffer[coord.line].length() == coord.column+1;
}
inline bool is_eow(const Buffer& buffer, ByteCoord coord)
{
if (buffer.is_end(coord) or coord == ByteCoord{0,0})
return true;
auto it = utf8::iterator<BufferIterator>(buffer.iterator_at(coord), buffer);
return is_word(*(it-1)) and not is_word(*it);
}
CharCount get_column(const Buffer& buffer,
CharCount tabstop, ByteCoord coord);

View File

@ -304,7 +304,10 @@ private:
kak_assert(matches.size() % m_faces.size() == 0);
using RegexIt = RegexIterator<BufferIterator>;
RegexIt re_it{buffer.iterator_at(range.begin),
buffer.iterator_at(range.end), m_regex};
buffer.iterator_at(range.end), m_regex,
match_flags(is_bol(range.begin),
is_eol(buffer, range.end),
is_eow(buffer, range.end))};
RegexIt re_end;
for (; re_it != re_end; ++re_it)
{

View File

@ -797,8 +797,9 @@ void keep(Context& context, NormalParams)
Vector<Selection> keep;
for (auto& sel : context.selections())
{
if (regex_search(buffer.iterator_at(sel.min()),
utf8::next(buffer.iterator_at(sel.max()), buffer.end()), ex) == matching)
auto begin = buffer.iterator_at(sel.min());
auto end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
if (regex_search(begin, end, ex, RegexConstant::match_any) == matching)
keep.push_back(sel);
}
if (keep.empty())

View File

@ -70,6 +70,16 @@ using RegexIterator = regex_ns::regex_iterator<Iterator>;
template<typename Iterator>
using MatchResults = regex_ns::match_results<Iterator>;
namespace RegexConstant = regex_ns::regex_constants;
inline RegexConstant::match_flag_type match_flags(bool bol, bool eol, bool eow)
{
return (bol ? RegexConstant::match_default : RegexConstant::match_not_bol |
RegexConstant::match_prev_avail) |
(eol ? RegexConstant::match_default : RegexConstant::match_not_eol) |
(eow ? RegexConstant::match_default : RegexConstant::match_not_eow);
}
String option_to_string(const Regex& re);
void option_from_string(StringView str, Regex& re);

View File

@ -307,7 +307,8 @@ Selection select_paragraph(const Buffer& buffer, const Selection& selection, Obj
if ((flags & ObjectFlags::ToBegin) and first != buffer.begin())
{
skip_while_reverse(first, buffer.begin(), is_eol);
skip_while_reverse(first, buffer.begin(),
[](Codepoint c){ return is_eol(c); });
if (flags & ObjectFlags::ToEnd)
last = first;
while (first != buffer.begin())
@ -331,7 +332,8 @@ Selection select_paragraph(const Buffer& buffer, const Selection& selection, Obj
if (last != buffer.begin() and is_eol(*last) and is_eol(*(last-1)))
{
if (not (flags & ObjectFlags::Inner))
skip_while(last, buffer.end(), is_eol);
skip_while(last, buffer.end(),
[](Codepoint c){ return is_eol(c); });
break;
}
++last;
@ -570,8 +572,12 @@ void select_all_matches(SelectionList& selections, const Regex& regex, unsigned
auto& buffer = selections.buffer();
for (auto& sel : selections)
{
auto sel_beg = buffer.iterator_at(sel.min());
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
RegexIt re_it(buffer.iterator_at(sel.min()), sel_end, regex);
const auto flags = match_flags(is_bol(sel_beg.coord()),
is_eol(buffer, sel_end.coord()),
is_eow(buffer, sel_end.coord()));
RegexIt re_it(sel_beg, sel_end, regex, flags);
RegexIt re_end;
for (; re_it != re_end; ++re_it)
@ -613,7 +619,11 @@ void split_selections(SelectionList& selections, const Regex& regex, unsigned ca
{
auto begin = buffer.iterator_at(sel.min());
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
RegexIt re_it(begin, sel_end, regex);
const auto flags = match_flags(is_bol(begin.coord()),
is_eol(buffer, sel_end.coord()),
is_eow(buffer, sel_end.coord()));
RegexIt re_it(begin, sel_end, regex, flags);
RegexIt re_end;
for (; re_it != re_end; ++re_it)

View File

@ -54,7 +54,7 @@ Selection select_to_next_word(const Buffer& buffer, const Selection& selection)
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin+1)))
++begin;
skip_while(begin, buffer.end(), is_eol);
skip_while(begin, buffer.end(), [](Codepoint c){ return is_eol(c); });
if (begin == buffer.end())
return selection;
Utf8Iterator end = begin+1;
@ -78,7 +78,7 @@ Selection select_to_next_word_end(const Buffer& buffer, const Selection& selecti
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin+1)))
++begin;
skip_while(begin, buffer.end(), is_eol);
skip_while(begin, buffer.end(), [](Codepoint c){ return is_eol(c); });
if (begin == buffer.end())
return selection;
Utf8Iterator end = begin;
@ -101,7 +101,7 @@ Selection select_to_previous_word(const Buffer& buffer, const Selection& selecti
if (categorize<word_type>(*begin) != categorize<word_type>(*(begin-1)))
--begin;
skip_while_reverse(begin, buffer.begin(), is_eol);
skip_while_reverse(begin, buffer.begin(), [](Codepoint c){ return is_eol(c); });
Utf8Iterator end = begin;
skip_while_reverse(end, buffer.begin(), is_horizontal_blank);
@ -231,12 +231,16 @@ void select_buffer(SelectionList& selections);
enum Direction { Forward, Backward };
inline bool find_last_match(BufferIterator begin, const BufferIterator& end,
inline bool find_last_match(const Buffer& buffer, const BufferIterator& pos,
MatchResults<BufferIterator>& res,
const Regex& regex)
{
MatchResults<BufferIterator> matches;
while (regex_search(begin, end, matches, regex))
const bool is_pos_eol = is_eol(buffer, pos.coord());
const bool is_pos_eow = is_eow(buffer, pos.coord());
auto begin = buffer.begin();
while (regex_search(begin, pos, matches, regex,
match_flags(is_bol(begin.coord()), is_pos_eol, is_pos_eow)))
{
if (begin == matches[0].second)
break;
@ -252,11 +256,12 @@ bool find_match_in_buffer(const Buffer& buffer, const BufferIterator pos,
const Regex& ex)
{
if (direction == Forward)
return (regex_search(pos, buffer.end(), matches, ex) or
return (regex_search(pos, buffer.end(), matches, ex,
match_flags(is_bol(pos.coord()), true, true)) or
regex_search(buffer.begin(), buffer.end(), matches, ex));
else
return (find_last_match(buffer.begin(), pos, matches, ex) or
find_last_match(buffer.begin(), buffer.end(), matches, ex));
return (find_last_match(buffer, pos, matches, ex) or
find_last_match(buffer, buffer.end(), matches, ex));
}
inline BufferIterator ensure_char_start(const Buffer& buffer, const BufferIterator& it)