From 1d748a401726999959c9bc69e819c2fbbd97a565 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Wed, 23 Dec 2015 21:43:07 +0000 Subject: [PATCH] Pass flags to the regex engine to correct anchors Current behaviour was matching ^ $ for the current search start/end (and \b was always matching begin/end as well). Fixes #536 --- src/buffer_utils.hh | 22 ++++++++++++++++++++++ src/highlighters.cc | 5 ++++- src/normal.cc | 5 +++-- src/regex.hh | 10 ++++++++++ src/selectors.cc | 18 ++++++++++++++---- src/selectors.hh | 21 +++++++++++++-------- 6 files changed, 66 insertions(+), 15 deletions(-) diff --git a/src/buffer_utils.hh b/src/buffer_utils.hh index 3851ee65..8e7fe5dc 100644 --- a/src/buffer_utils.hh +++ b/src/buffer_utils.hh @@ -4,6 +4,9 @@ #include "buffer.hh" #include "selection.hh" +#include "utf8_iterator.hh" +#include "unicode.hh" + namespace Kakoune { @@ -24,6 +27,25 @@ inline CharCount char_length(const Buffer& buffer, const Selection& range) buffer.iterator_at(buffer.char_next(range.max()))); } +inline bool is_bol(ByteCoord coord) +{ + return coord.column == 0; +} + +inline bool is_eol(const Buffer& buffer, ByteCoord coord) +{ + return buffer.is_end(coord) or buffer[coord.line].length() == coord.column+1; +} + +inline bool is_eow(const Buffer& buffer, ByteCoord coord) +{ + if (buffer.is_end(coord) or coord == ByteCoord{0,0}) + return true; + + auto it = utf8::iterator(buffer.iterator_at(coord), buffer); + return is_word(*(it-1)) and not is_word(*it); +} + CharCount get_column(const Buffer& buffer, CharCount tabstop, ByteCoord coord); diff --git a/src/highlighters.cc b/src/highlighters.cc index 8b0d669d..899f725f 100644 --- a/src/highlighters.cc +++ b/src/highlighters.cc @@ -304,7 +304,10 @@ private: kak_assert(matches.size() % m_faces.size() == 0); using RegexIt = RegexIterator; RegexIt re_it{buffer.iterator_at(range.begin), - buffer.iterator_at(range.end), m_regex}; + buffer.iterator_at(range.end), m_regex, + match_flags(is_bol(range.begin), + is_eol(buffer, range.end), + is_eow(buffer, range.end))}; RegexIt re_end; for (; re_it != re_end; ++re_it) { diff --git a/src/normal.cc b/src/normal.cc index d5d0b4d3..54e7a558 100644 --- a/src/normal.cc +++ b/src/normal.cc @@ -797,8 +797,9 @@ void keep(Context& context, NormalParams) Vector keep; for (auto& sel : context.selections()) { - if (regex_search(buffer.iterator_at(sel.min()), - utf8::next(buffer.iterator_at(sel.max()), buffer.end()), ex) == matching) + auto begin = buffer.iterator_at(sel.min()); + auto end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); + if (regex_search(begin, end, ex, RegexConstant::match_any) == matching) keep.push_back(sel); } if (keep.empty()) diff --git a/src/regex.hh b/src/regex.hh index c4eab808..93c5d462 100644 --- a/src/regex.hh +++ b/src/regex.hh @@ -70,6 +70,16 @@ using RegexIterator = regex_ns::regex_iterator; template using MatchResults = regex_ns::match_results; +namespace RegexConstant = regex_ns::regex_constants; + +inline RegexConstant::match_flag_type match_flags(bool bol, bool eol, bool eow) +{ + return (bol ? RegexConstant::match_default : RegexConstant::match_not_bol | + RegexConstant::match_prev_avail) | + (eol ? RegexConstant::match_default : RegexConstant::match_not_eol) | + (eow ? RegexConstant::match_default : RegexConstant::match_not_eow); +} + String option_to_string(const Regex& re); void option_from_string(StringView str, Regex& re); diff --git a/src/selectors.cc b/src/selectors.cc index c86d2749..f7ee4be5 100644 --- a/src/selectors.cc +++ b/src/selectors.cc @@ -307,7 +307,8 @@ Selection select_paragraph(const Buffer& buffer, const Selection& selection, Obj if ((flags & ObjectFlags::ToBegin) and first != buffer.begin()) { - skip_while_reverse(first, buffer.begin(), is_eol); + skip_while_reverse(first, buffer.begin(), + [](Codepoint c){ return is_eol(c); }); if (flags & ObjectFlags::ToEnd) last = first; while (first != buffer.begin()) @@ -331,7 +332,8 @@ Selection select_paragraph(const Buffer& buffer, const Selection& selection, Obj if (last != buffer.begin() and is_eol(*last) and is_eol(*(last-1))) { if (not (flags & ObjectFlags::Inner)) - skip_while(last, buffer.end(), is_eol); + skip_while(last, buffer.end(), + [](Codepoint c){ return is_eol(c); }); break; } ++last; @@ -570,8 +572,12 @@ void select_all_matches(SelectionList& selections, const Regex& regex, unsigned auto& buffer = selections.buffer(); for (auto& sel : selections) { + auto sel_beg = buffer.iterator_at(sel.min()); auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); - RegexIt re_it(buffer.iterator_at(sel.min()), sel_end, regex); + const auto flags = match_flags(is_bol(sel_beg.coord()), + is_eol(buffer, sel_end.coord()), + is_eow(buffer, sel_end.coord())); + RegexIt re_it(sel_beg, sel_end, regex, flags); RegexIt re_end; for (; re_it != re_end; ++re_it) @@ -613,7 +619,11 @@ void split_selections(SelectionList& selections, const Regex& regex, unsigned ca { auto begin = buffer.iterator_at(sel.min()); auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); - RegexIt re_it(begin, sel_end, regex); + const auto flags = match_flags(is_bol(begin.coord()), + is_eol(buffer, sel_end.coord()), + is_eow(buffer, sel_end.coord())); + + RegexIt re_it(begin, sel_end, regex, flags); RegexIt re_end; for (; re_it != re_end; ++re_it) diff --git a/src/selectors.hh b/src/selectors.hh index fff025fc..442a93c8 100644 --- a/src/selectors.hh +++ b/src/selectors.hh @@ -54,7 +54,7 @@ Selection select_to_next_word(const Buffer& buffer, const Selection& selection) if (categorize(*begin) != categorize(*(begin+1))) ++begin; - skip_while(begin, buffer.end(), is_eol); + skip_while(begin, buffer.end(), [](Codepoint c){ return is_eol(c); }); if (begin == buffer.end()) return selection; Utf8Iterator end = begin+1; @@ -78,7 +78,7 @@ Selection select_to_next_word_end(const Buffer& buffer, const Selection& selecti if (categorize(*begin) != categorize(*(begin+1))) ++begin; - skip_while(begin, buffer.end(), is_eol); + skip_while(begin, buffer.end(), [](Codepoint c){ return is_eol(c); }); if (begin == buffer.end()) return selection; Utf8Iterator end = begin; @@ -101,7 +101,7 @@ Selection select_to_previous_word(const Buffer& buffer, const Selection& selecti if (categorize(*begin) != categorize(*(begin-1))) --begin; - skip_while_reverse(begin, buffer.begin(), is_eol); + skip_while_reverse(begin, buffer.begin(), [](Codepoint c){ return is_eol(c); }); Utf8Iterator end = begin; skip_while_reverse(end, buffer.begin(), is_horizontal_blank); @@ -231,12 +231,16 @@ void select_buffer(SelectionList& selections); enum Direction { Forward, Backward }; -inline bool find_last_match(BufferIterator begin, const BufferIterator& end, +inline bool find_last_match(const Buffer& buffer, const BufferIterator& pos, MatchResults& res, const Regex& regex) { MatchResults matches; - while (regex_search(begin, end, matches, regex)) + const bool is_pos_eol = is_eol(buffer, pos.coord()); + const bool is_pos_eow = is_eow(buffer, pos.coord()); + auto begin = buffer.begin(); + while (regex_search(begin, pos, matches, regex, + match_flags(is_bol(begin.coord()), is_pos_eol, is_pos_eow))) { if (begin == matches[0].second) break; @@ -252,11 +256,12 @@ bool find_match_in_buffer(const Buffer& buffer, const BufferIterator pos, const Regex& ex) { if (direction == Forward) - return (regex_search(pos, buffer.end(), matches, ex) or + return (regex_search(pos, buffer.end(), matches, ex, + match_flags(is_bol(pos.coord()), true, true)) or regex_search(buffer.begin(), buffer.end(), matches, ex)); else - return (find_last_match(buffer.begin(), pos, matches, ex) or - find_last_match(buffer.begin(), buffer.end(), matches, ex)); + return (find_last_match(buffer, pos, matches, ex) or + find_last_match(buffer, buffer.end(), matches, ex)); } inline BufferIterator ensure_char_start(const Buffer& buffer, const BufferIterator& it)