Refactor RegexIterator to use a Sentinel

This commit is contained in:
Maxime Coste 2018-12-19 21:47:44 +11:00
parent 3babd0685c
commit 566268d7bc
4 changed files with 52 additions and 83 deletions

View File

@ -361,20 +361,17 @@ private:
void add_matches(const Buffer& buffer, MatchList& matches, BufferRange range) void add_matches(const Buffer& buffer, MatchList& matches, BufferRange range)
{ {
kak_assert(matches.size() % m_faces.size() == 0); kak_assert(matches.size() % m_faces.size() == 0);
using RegexIt = RegexIterator<BufferIterator>; for (auto&& match : RegexIterator{get_iterator(buffer, range.begin),
RegexIt re_it{get_iterator(buffer, range.begin),
get_iterator(buffer, range.end), get_iterator(buffer, range.end),
buffer.begin(), buffer.end(), m_regex, buffer.begin(), buffer.end(), m_regex,
match_flags(is_bol(range.begin), match_flags(is_bol(range.begin),
is_eol(buffer, range.end), is_eol(buffer, range.end),
is_bow(buffer, range.begin), is_bow(buffer, range.begin),
is_eow(buffer, range.end))}; is_eow(buffer, range.end))})
RegexIt re_end;
for (; re_it != re_end; ++re_it)
{ {
for (auto& face : m_faces) for (auto& face : m_faces)
{ {
const auto& sub = (*re_it)[face.first]; const auto& sub = match[face.first];
matches.push_back({sub.first.coord(), sub.second.coord()}); matches.push_back({sub.first.coord(), sub.second.coord()});
} }
} }
@ -1647,9 +1644,8 @@ using RegexMatchList = Vector<RegexMatch, MemoryDomain::Regions>;
void append_matches(const Buffer& buffer, LineCount line, RegexMatchList& matches, const Regex& regex, bool capture) void append_matches(const Buffer& buffer, LineCount line, RegexMatchList& matches, const Regex& regex, bool capture)
{ {
auto l = buffer[line]; auto l = buffer[line];
for (RegexIterator<const char*> it{l.begin(), l.end(), regex}, end{}; it != end; ++it) for (auto&& m : RegexIterator{l.begin(), l.end(), regex})
{ {
auto& m = *it;
const bool with_capture = capture and m[1].matched and const bool with_capture = capture and m[1].matched and
m[0].second - m[0].first < std::numeric_limits<uint16_t>::max(); m[0].second - m[0].first < std::numeric_limits<uint16_t>::max();
matches.push_back({ matches.push_back({

View File

@ -168,77 +168,58 @@ template<typename Iterator, MatchDirection direction = MatchDirection::Forward>
struct RegexIterator struct RegexIterator
{ {
using ValueType = MatchResults<Iterator>; using ValueType = MatchResults<Iterator>;
struct Sentinel{};
struct It
{
It(RegexIterator& base) : m_base(base), m_valid{m_base.next()} {}
const ValueType& operator*() const { kak_assert(m_valid); return m_base.m_results; }
const ValueType* operator->() const { kak_assert(m_valid); return &m_base.m_results; }
It& operator++() { m_valid = m_base.next(); return *this; }
RegexIterator& m_base;
bool m_valid;
};
friend bool operator==(const It& lhs, Sentinel) { return not lhs.m_valid; }
friend bool operator!=(const It& lhs, Sentinel) { return lhs.m_valid; }
RegexIterator() = default;
RegexIterator(Iterator begin, Iterator end, RegexIterator(Iterator begin, Iterator end,
Iterator subject_begin, Iterator subject_end, Iterator subject_begin, Iterator subject_end,
const Regex& re, const Regex& re, RegexExecFlags flags = RegexExecFlags::None)
RegexExecFlags flags = RegexExecFlags::None) : m_vm{*re.impl()}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
: m_program{re.impl()}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
m_begin{std::move(begin)}, m_end{std::move(end)}, m_begin{std::move(begin)}, m_end{std::move(end)},
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)}, m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
m_flags{flags} m_flags{flags} {}
{
next();
}
RegexIterator(const Iterator& begin, const Iterator& end, const Regex& re, RegexIterator(const Iterator& begin, const Iterator& end, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None) RegexExecFlags flags = RegexExecFlags::None)
: RegexIterator{begin, end, begin, end, re, flags} {} : RegexIterator{begin, end, begin, end, re, flags} {}
const ValueType& operator*() const { kak_assert(m_program); return m_results; } It begin() { return {*this}; }
const ValueType* operator->() const { kak_assert(m_program); return &m_results; } Sentinel end() const { return {}; }
RegexIterator& operator++()
{
next();
return *this;
}
friend bool operator==(const RegexIterator& lhs, const RegexIterator& rhs)
{
if (lhs.m_program == nullptr and rhs.m_program == nullptr)
return true;
return lhs.m_program == rhs.m_program and
lhs.m_next_pos == rhs.m_next_pos and
lhs.m_end == rhs.m_end and
lhs.m_flags == rhs.m_flags and
lhs.m_results == rhs.m_results;
}
friend bool operator!=(const RegexIterator& lhs, const RegexIterator& rhs)
{
return not (lhs == rhs);
}
RegexIterator begin() { return *this; }
RegexIterator end() { return {}; }
private: private:
void next() bool next()
{ {
kak_assert(m_program);
auto additional_flags = RegexExecFlags::Search; auto additional_flags = RegexExecFlags::Search;
if (m_results.size() and m_results[0].first == m_results[0].second) if (m_results.size() and m_results[0].first == m_results[0].second)
additional_flags |= RegexExecFlags::NotInitialNull; additional_flags |= RegexExecFlags::NotInitialNull;
ThreadedRegexVM<Iterator, direction> vm{*m_program};
constexpr bool forward = direction == MatchDirection::Forward; constexpr bool forward = direction == MatchDirection::Forward;
if (vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos, if (not m_vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
m_subject_begin, m_subject_end, m_flags | additional_flags)) m_subject_begin, m_subject_end, m_flags | additional_flags))
{ return false;
m_results.values().clear(); m_results.values().clear();
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(m_results.values())); std::move(m_vm.captures().begin(), m_vm.captures().end(), std::back_inserter(m_results.values()));
m_next_pos = (direction == MatchDirection::Forward) ? m_results[0].second : m_results[0].first; m_next_pos = (direction == MatchDirection::Forward) ? m_results[0].second : m_results[0].first;
} return true;
else
m_program = nullptr;
} }
const CompiledRegex* m_program = nullptr; ThreadedRegexVM<Iterator, direction> m_vm;
MatchResults<Iterator> m_results; MatchResults<Iterator> m_results;
Iterator m_next_pos{}; Iterator m_next_pos{};
const Iterator m_begin{}; const Iterator m_begin{};

View File

@ -305,7 +305,7 @@ find_opening(Iterator pos, const Container& container,
pos = res[0].first; pos = res[0].first;
using RegexIt = RegexIterator<Iterator, MatchDirection::Backward>; using RegexIt = RegexIterator<Iterator, MatchDirection::Backward>;
for (auto match : RegexIt{container.begin(), pos, container.begin(), container.end(), opening}) for (auto&& match : RegexIt{container.begin(), pos, container.begin(), container.end(), opening})
{ {
if (nestable) if (nestable)
{ {
@ -923,8 +923,6 @@ Selection find_next_match(const Context& context, const Selection& sel, const Re
template Selection find_next_match<MatchDirection::Forward>(const Context&, const Selection&, const Regex&, bool&); template Selection find_next_match<MatchDirection::Forward>(const Context&, const Selection&, const Regex&, bool&);
template Selection find_next_match<MatchDirection::Backward>(const Context&, const Selection&, const Regex&, bool&); template Selection find_next_match<MatchDirection::Backward>(const Context&, const Selection&, const Regex&, bool&);
using RegexIt = RegexIterator<BufferIterator>;
void select_all_matches(SelectionList& selections, const Regex& regex, int capture) void select_all_matches(SelectionList& selections, const Regex& regex, int capture)
{ {
const int mark_count = (int)regex.mark_count(); const int mark_count = (int)regex.mark_count();
@ -937,21 +935,19 @@ void select_all_matches(SelectionList& selections, const Regex& regex, int captu
{ {
auto sel_beg = buffer.iterator_at(sel.min()); auto sel_beg = buffer.iterator_at(sel.min());
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
RegexIt re_it(sel_beg, sel_end, regex, match_flags(buffer, sel_beg, sel_end));
RegexIt re_end;
for (; re_it != re_end; ++re_it) for (auto&& match : RegexIterator{sel_beg, sel_end, regex, match_flags(buffer, sel_beg, sel_end)})
{ {
auto begin = (*re_it)[capture].first; auto begin = match[capture].first;
if (begin == sel_end) if (begin == sel_end)
continue; continue;
auto end = (*re_it)[capture].second; auto end = match[capture].second;
CaptureList captures; CaptureList captures;
captures.reserve(mark_count); captures.reserve(mark_count);
for (const auto& match : *re_it) for (const auto& submatch : match)
captures.push_back(buffer.string(match.first.coord(), captures.push_back(buffer.string(submatch.first.coord(),
match.second.coord())); submatch.second.coord()));
result.push_back( result.push_back(
keep_direction({ begin.coord(), keep_direction({ begin.coord(),
@ -981,12 +977,9 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture
auto begin = buffer.iterator_at(sel.min()); auto begin = buffer.iterator_at(sel.min());
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
RegexIt re_it(begin, sel_end, regex, match_flags(buffer, begin, sel_end)); for (auto&& match : RegexIterator{begin, sel_end, regex, match_flags(buffer, begin, sel_end)})
RegexIt re_end;
for (; re_it != re_end; ++re_it)
{ {
BufferIterator end = (*re_it)[capture].first; BufferIterator end = match[capture].first;
if (end == buf_end) if (end == buf_end)
continue; continue;
@ -995,7 +988,7 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture
auto sel_end = (begin == end) ? end : utf8::previous(end, begin); auto sel_end = (begin == end) ? end : utf8::previous(end, begin);
result.push_back(keep_direction({ begin.coord(), sel_end.coord() }, sel)); result.push_back(keep_direction({ begin.coord(), sel_end.coord() }, sel));
} }
begin = (*re_it)[capture].second; begin = match[capture].second;
} }
if (begin.coord() <= sel.max()) if (begin.coord() <= sel.max())
result.push_back(keep_direction({ begin.coord(), sel.max() }, sel)); result.push_back(keep_direction({ begin.coord(), sel.max() }, sel));

View File

@ -136,10 +136,9 @@ Vector<String> generate_env(StringView cmdline, const Context& context, const Sh
static const Regex re(R"(\bkak_(\w+)\b)"); static const Regex re(R"(\bkak_(\w+)\b)");
Vector<String> kak_env; Vector<String> kak_env;
for (RegexIterator<const char*> it{cmdline.begin(), cmdline.end(), re}, end; for (auto&& match : RegexIterator{cmdline.begin(), cmdline.end(), re})
it != end; ++it)
{ {
StringView name{(*it)[1].first, (*it)[1].second}; StringView name{match[1].first, match[1].second};
auto match_name = [&](const String& s) { auto match_name = [&](const String& s) {
return s.substr(0_byte, name.length()) == name and return s.substr(0_byte, name.length()) == name and