Refactor RegexIterator to use a Sentinel

This commit is contained in:
Maxime Coste 2018-12-19 21:47:44 +11:00
parent 3babd0685c
commit 566268d7bc
4 changed files with 52 additions and 83 deletions

View File

@ -361,20 +361,17 @@ private:
void add_matches(const Buffer& buffer, MatchList& matches, BufferRange range)
{
kak_assert(matches.size() % m_faces.size() == 0);
using RegexIt = RegexIterator<BufferIterator>;
RegexIt re_it{get_iterator(buffer, range.begin),
for (auto&& match : RegexIterator{get_iterator(buffer, range.begin),
get_iterator(buffer, range.end),
buffer.begin(), buffer.end(), m_regex,
match_flags(is_bol(range.begin),
is_eol(buffer, range.end),
is_bow(buffer, range.begin),
is_eow(buffer, range.end))};
RegexIt re_end;
for (; re_it != re_end; ++re_it)
is_eow(buffer, range.end))})
{
for (auto& face : m_faces)
{
const auto& sub = (*re_it)[face.first];
const auto& sub = match[face.first];
matches.push_back({sub.first.coord(), sub.second.coord()});
}
}
@ -1647,9 +1644,8 @@ using RegexMatchList = Vector<RegexMatch, MemoryDomain::Regions>;
void append_matches(const Buffer& buffer, LineCount line, RegexMatchList& matches, const Regex& regex, bool capture)
{
auto l = buffer[line];
for (RegexIterator<const char*> it{l.begin(), l.end(), regex}, end{}; it != end; ++it)
for (auto&& m : RegexIterator{l.begin(), l.end(), regex})
{
auto& m = *it;
const bool with_capture = capture and m[1].matched and
m[0].second - m[0].first < std::numeric_limits<uint16_t>::max();
matches.push_back({

View File

@ -168,77 +168,58 @@ template<typename Iterator, MatchDirection direction = MatchDirection::Forward>
struct RegexIterator
{
using ValueType = MatchResults<Iterator>;
struct Sentinel{};
struct It
{
It(RegexIterator& base) : m_base(base), m_valid{m_base.next()} {}
const ValueType& operator*() const { kak_assert(m_valid); return m_base.m_results; }
const ValueType* operator->() const { kak_assert(m_valid); return &m_base.m_results; }
It& operator++() { m_valid = m_base.next(); return *this; }
RegexIterator& m_base;
bool m_valid;
};
friend bool operator==(const It& lhs, Sentinel) { return not lhs.m_valid; }
friend bool operator!=(const It& lhs, Sentinel) { return lhs.m_valid; }
RegexIterator() = default;
RegexIterator(Iterator begin, Iterator end,
Iterator subject_begin, Iterator subject_end,
const Regex& re,
RegexExecFlags flags = RegexExecFlags::None)
: m_program{re.impl()}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
const Regex& re, RegexExecFlags flags = RegexExecFlags::None)
: m_vm{*re.impl()}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
m_begin{std::move(begin)}, m_end{std::move(end)},
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
m_flags{flags}
{
next();
}
m_flags{flags} {}
RegexIterator(const Iterator& begin, const Iterator& end, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None)
: RegexIterator{begin, end, begin, end, re, flags} {}
const ValueType& operator*() const { kak_assert(m_program); return m_results; }
const ValueType* operator->() const { kak_assert(m_program); return &m_results; }
RegexIterator& operator++()
{
next();
return *this;
}
friend bool operator==(const RegexIterator& lhs, const RegexIterator& rhs)
{
if (lhs.m_program == nullptr and rhs.m_program == nullptr)
return true;
return lhs.m_program == rhs.m_program and
lhs.m_next_pos == rhs.m_next_pos and
lhs.m_end == rhs.m_end and
lhs.m_flags == rhs.m_flags and
lhs.m_results == rhs.m_results;
}
friend bool operator!=(const RegexIterator& lhs, const RegexIterator& rhs)
{
return not (lhs == rhs);
}
RegexIterator begin() { return *this; }
RegexIterator end() { return {}; }
It begin() { return {*this}; }
Sentinel end() const { return {}; }
private:
void next()
bool next()
{
kak_assert(m_program);
auto additional_flags = RegexExecFlags::Search;
if (m_results.size() and m_results[0].first == m_results[0].second)
additional_flags |= RegexExecFlags::NotInitialNull;
ThreadedRegexVM<Iterator, direction> vm{*m_program};
constexpr bool forward = direction == MatchDirection::Forward;
if (vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
if (not m_vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
m_subject_begin, m_subject_end, m_flags | additional_flags))
{
return false;
m_results.values().clear();
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(m_results.values()));
std::move(m_vm.captures().begin(), m_vm.captures().end(), std::back_inserter(m_results.values()));
m_next_pos = (direction == MatchDirection::Forward) ? m_results[0].second : m_results[0].first;
}
else
m_program = nullptr;
return true;
}
const CompiledRegex* m_program = nullptr;
ThreadedRegexVM<Iterator, direction> m_vm;
MatchResults<Iterator> m_results;
Iterator m_next_pos{};
const Iterator m_begin{};

View File

@ -305,7 +305,7 @@ find_opening(Iterator pos, const Container& container,
pos = res[0].first;
using RegexIt = RegexIterator<Iterator, MatchDirection::Backward>;
for (auto match : RegexIt{container.begin(), pos, container.begin(), container.end(), opening})
for (auto&& match : RegexIt{container.begin(), pos, container.begin(), container.end(), opening})
{
if (nestable)
{
@ -923,8 +923,6 @@ Selection find_next_match(const Context& context, const Selection& sel, const Re
template Selection find_next_match<MatchDirection::Forward>(const Context&, const Selection&, const Regex&, bool&);
template Selection find_next_match<MatchDirection::Backward>(const Context&, const Selection&, const Regex&, bool&);
using RegexIt = RegexIterator<BufferIterator>;
void select_all_matches(SelectionList& selections, const Regex& regex, int capture)
{
const int mark_count = (int)regex.mark_count();
@ -937,21 +935,19 @@ void select_all_matches(SelectionList& selections, const Regex& regex, int captu
{
auto sel_beg = buffer.iterator_at(sel.min());
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
RegexIt re_it(sel_beg, sel_end, regex, match_flags(buffer, sel_beg, sel_end));
RegexIt re_end;
for (; re_it != re_end; ++re_it)
for (auto&& match : RegexIterator{sel_beg, sel_end, regex, match_flags(buffer, sel_beg, sel_end)})
{
auto begin = (*re_it)[capture].first;
auto begin = match[capture].first;
if (begin == sel_end)
continue;
auto end = (*re_it)[capture].second;
auto end = match[capture].second;
CaptureList captures;
captures.reserve(mark_count);
for (const auto& match : *re_it)
captures.push_back(buffer.string(match.first.coord(),
match.second.coord()));
for (const auto& submatch : match)
captures.push_back(buffer.string(submatch.first.coord(),
submatch.second.coord()));
result.push_back(
keep_direction({ begin.coord(),
@ -981,12 +977,9 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture
auto begin = buffer.iterator_at(sel.min());
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
RegexIt re_it(begin, sel_end, regex, match_flags(buffer, begin, sel_end));
RegexIt re_end;
for (; re_it != re_end; ++re_it)
for (auto&& match : RegexIterator{begin, sel_end, regex, match_flags(buffer, begin, sel_end)})
{
BufferIterator end = (*re_it)[capture].first;
BufferIterator end = match[capture].first;
if (end == buf_end)
continue;
@ -995,7 +988,7 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture
auto sel_end = (begin == end) ? end : utf8::previous(end, begin);
result.push_back(keep_direction({ begin.coord(), sel_end.coord() }, sel));
}
begin = (*re_it)[capture].second;
begin = match[capture].second;
}
if (begin.coord() <= sel.max())
result.push_back(keep_direction({ begin.coord(), sel.max() }, sel));

View File

@ -136,10 +136,9 @@ Vector<String> generate_env(StringView cmdline, const Context& context, const Sh
static const Regex re(R"(\bkak_(\w+)\b)");
Vector<String> kak_env;
for (RegexIterator<const char*> it{cmdline.begin(), cmdline.end(), re}, end;
it != end; ++it)
for (auto&& match : RegexIterator{cmdline.begin(), cmdline.end(), re})
{
StringView name{(*it)[1].first, (*it)[1].second};
StringView name{match[1].first, match[1].second};
auto match_name = [&](const String& s) {
return s.substr(0_byte, name.length()) == name and