Refactor RegexIterator to use a Sentinel
This commit is contained in:
parent
3babd0685c
commit
566268d7bc
|
@ -361,20 +361,17 @@ private:
|
||||||
void add_matches(const Buffer& buffer, MatchList& matches, BufferRange range)
|
void add_matches(const Buffer& buffer, MatchList& matches, BufferRange range)
|
||||||
{
|
{
|
||||||
kak_assert(matches.size() % m_faces.size() == 0);
|
kak_assert(matches.size() % m_faces.size() == 0);
|
||||||
using RegexIt = RegexIterator<BufferIterator>;
|
for (auto&& match : RegexIterator{get_iterator(buffer, range.begin),
|
||||||
RegexIt re_it{get_iterator(buffer, range.begin),
|
get_iterator(buffer, range.end),
|
||||||
get_iterator(buffer, range.end),
|
buffer.begin(), buffer.end(), m_regex,
|
||||||
buffer.begin(), buffer.end(), m_regex,
|
match_flags(is_bol(range.begin),
|
||||||
match_flags(is_bol(range.begin),
|
is_eol(buffer, range.end),
|
||||||
is_eol(buffer, range.end),
|
is_bow(buffer, range.begin),
|
||||||
is_bow(buffer, range.begin),
|
is_eow(buffer, range.end))})
|
||||||
is_eow(buffer, range.end))};
|
|
||||||
RegexIt re_end;
|
|
||||||
for (; re_it != re_end; ++re_it)
|
|
||||||
{
|
{
|
||||||
for (auto& face : m_faces)
|
for (auto& face : m_faces)
|
||||||
{
|
{
|
||||||
const auto& sub = (*re_it)[face.first];
|
const auto& sub = match[face.first];
|
||||||
matches.push_back({sub.first.coord(), sub.second.coord()});
|
matches.push_back({sub.first.coord(), sub.second.coord()});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1647,9 +1644,8 @@ using RegexMatchList = Vector<RegexMatch, MemoryDomain::Regions>;
|
||||||
void append_matches(const Buffer& buffer, LineCount line, RegexMatchList& matches, const Regex& regex, bool capture)
|
void append_matches(const Buffer& buffer, LineCount line, RegexMatchList& matches, const Regex& regex, bool capture)
|
||||||
{
|
{
|
||||||
auto l = buffer[line];
|
auto l = buffer[line];
|
||||||
for (RegexIterator<const char*> it{l.begin(), l.end(), regex}, end{}; it != end; ++it)
|
for (auto&& m : RegexIterator{l.begin(), l.end(), regex})
|
||||||
{
|
{
|
||||||
auto& m = *it;
|
|
||||||
const bool with_capture = capture and m[1].matched and
|
const bool with_capture = capture and m[1].matched and
|
||||||
m[0].second - m[0].first < std::numeric_limits<uint16_t>::max();
|
m[0].second - m[0].first < std::numeric_limits<uint16_t>::max();
|
||||||
matches.push_back({
|
matches.push_back({
|
||||||
|
|
81
src/regex.hh
81
src/regex.hh
|
@ -168,77 +168,58 @@ template<typename Iterator, MatchDirection direction = MatchDirection::Forward>
|
||||||
struct RegexIterator
|
struct RegexIterator
|
||||||
{
|
{
|
||||||
using ValueType = MatchResults<Iterator>;
|
using ValueType = MatchResults<Iterator>;
|
||||||
|
struct Sentinel{};
|
||||||
|
struct It
|
||||||
|
{
|
||||||
|
It(RegexIterator& base) : m_base(base), m_valid{m_base.next()} {}
|
||||||
|
|
||||||
|
const ValueType& operator*() const { kak_assert(m_valid); return m_base.m_results; }
|
||||||
|
const ValueType* operator->() const { kak_assert(m_valid); return &m_base.m_results; }
|
||||||
|
|
||||||
|
It& operator++() { m_valid = m_base.next(); return *this; }
|
||||||
|
|
||||||
|
RegexIterator& m_base;
|
||||||
|
bool m_valid;
|
||||||
|
};
|
||||||
|
|
||||||
|
friend bool operator==(const It& lhs, Sentinel) { return not lhs.m_valid; }
|
||||||
|
friend bool operator!=(const It& lhs, Sentinel) { return lhs.m_valid; }
|
||||||
|
|
||||||
RegexIterator() = default;
|
|
||||||
RegexIterator(Iterator begin, Iterator end,
|
RegexIterator(Iterator begin, Iterator end,
|
||||||
Iterator subject_begin, Iterator subject_end,
|
Iterator subject_begin, Iterator subject_end,
|
||||||
const Regex& re,
|
const Regex& re, RegexExecFlags flags = RegexExecFlags::None)
|
||||||
RegexExecFlags flags = RegexExecFlags::None)
|
: m_vm{*re.impl()}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
|
||||||
: m_program{re.impl()}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
|
|
||||||
m_begin{std::move(begin)}, m_end{std::move(end)},
|
m_begin{std::move(begin)}, m_end{std::move(end)},
|
||||||
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
|
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
|
||||||
m_flags{flags}
|
m_flags{flags} {}
|
||||||
{
|
|
||||||
next();
|
|
||||||
}
|
|
||||||
|
|
||||||
RegexIterator(const Iterator& begin, const Iterator& end, const Regex& re,
|
RegexIterator(const Iterator& begin, const Iterator& end, const Regex& re,
|
||||||
RegexExecFlags flags = RegexExecFlags::None)
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
: RegexIterator{begin, end, begin, end, re, flags} {}
|
: RegexIterator{begin, end, begin, end, re, flags} {}
|
||||||
|
|
||||||
const ValueType& operator*() const { kak_assert(m_program); return m_results; }
|
It begin() { return {*this}; }
|
||||||
const ValueType* operator->() const { kak_assert(m_program); return &m_results; }
|
Sentinel end() const { return {}; }
|
||||||
|
|
||||||
RegexIterator& operator++()
|
|
||||||
{
|
|
||||||
next();
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
friend bool operator==(const RegexIterator& lhs, const RegexIterator& rhs)
|
|
||||||
{
|
|
||||||
if (lhs.m_program == nullptr and rhs.m_program == nullptr)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return lhs.m_program == rhs.m_program and
|
|
||||||
lhs.m_next_pos == rhs.m_next_pos and
|
|
||||||
lhs.m_end == rhs.m_end and
|
|
||||||
lhs.m_flags == rhs.m_flags and
|
|
||||||
lhs.m_results == rhs.m_results;
|
|
||||||
}
|
|
||||||
|
|
||||||
friend bool operator!=(const RegexIterator& lhs, const RegexIterator& rhs)
|
|
||||||
{
|
|
||||||
return not (lhs == rhs);
|
|
||||||
}
|
|
||||||
|
|
||||||
RegexIterator begin() { return *this; }
|
|
||||||
RegexIterator end() { return {}; }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void next()
|
bool next()
|
||||||
{
|
{
|
||||||
kak_assert(m_program);
|
|
||||||
|
|
||||||
auto additional_flags = RegexExecFlags::Search;
|
auto additional_flags = RegexExecFlags::Search;
|
||||||
if (m_results.size() and m_results[0].first == m_results[0].second)
|
if (m_results.size() and m_results[0].first == m_results[0].second)
|
||||||
additional_flags |= RegexExecFlags::NotInitialNull;
|
additional_flags |= RegexExecFlags::NotInitialNull;
|
||||||
|
|
||||||
ThreadedRegexVM<Iterator, direction> vm{*m_program};
|
|
||||||
constexpr bool forward = direction == MatchDirection::Forward;
|
constexpr bool forward = direction == MatchDirection::Forward;
|
||||||
|
|
||||||
if (vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
|
if (not m_vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
|
||||||
m_subject_begin, m_subject_end, m_flags | additional_flags))
|
m_subject_begin, m_subject_end, m_flags | additional_flags))
|
||||||
{
|
return false;
|
||||||
m_results.values().clear();
|
|
||||||
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(m_results.values()));
|
m_results.values().clear();
|
||||||
m_next_pos = (direction == MatchDirection::Forward) ? m_results[0].second : m_results[0].first;
|
std::move(m_vm.captures().begin(), m_vm.captures().end(), std::back_inserter(m_results.values()));
|
||||||
}
|
m_next_pos = (direction == MatchDirection::Forward) ? m_results[0].second : m_results[0].first;
|
||||||
else
|
return true;
|
||||||
m_program = nullptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const CompiledRegex* m_program = nullptr;
|
ThreadedRegexVM<Iterator, direction> m_vm;
|
||||||
MatchResults<Iterator> m_results;
|
MatchResults<Iterator> m_results;
|
||||||
Iterator m_next_pos{};
|
Iterator m_next_pos{};
|
||||||
const Iterator m_begin{};
|
const Iterator m_begin{};
|
||||||
|
|
|
@ -305,7 +305,7 @@ find_opening(Iterator pos, const Container& container,
|
||||||
pos = res[0].first;
|
pos = res[0].first;
|
||||||
|
|
||||||
using RegexIt = RegexIterator<Iterator, MatchDirection::Backward>;
|
using RegexIt = RegexIterator<Iterator, MatchDirection::Backward>;
|
||||||
for (auto match : RegexIt{container.begin(), pos, container.begin(), container.end(), opening})
|
for (auto&& match : RegexIt{container.begin(), pos, container.begin(), container.end(), opening})
|
||||||
{
|
{
|
||||||
if (nestable)
|
if (nestable)
|
||||||
{
|
{
|
||||||
|
@ -923,8 +923,6 @@ Selection find_next_match(const Context& context, const Selection& sel, const Re
|
||||||
template Selection find_next_match<MatchDirection::Forward>(const Context&, const Selection&, const Regex&, bool&);
|
template Selection find_next_match<MatchDirection::Forward>(const Context&, const Selection&, const Regex&, bool&);
|
||||||
template Selection find_next_match<MatchDirection::Backward>(const Context&, const Selection&, const Regex&, bool&);
|
template Selection find_next_match<MatchDirection::Backward>(const Context&, const Selection&, const Regex&, bool&);
|
||||||
|
|
||||||
using RegexIt = RegexIterator<BufferIterator>;
|
|
||||||
|
|
||||||
void select_all_matches(SelectionList& selections, const Regex& regex, int capture)
|
void select_all_matches(SelectionList& selections, const Regex& regex, int capture)
|
||||||
{
|
{
|
||||||
const int mark_count = (int)regex.mark_count();
|
const int mark_count = (int)regex.mark_count();
|
||||||
|
@ -937,21 +935,19 @@ void select_all_matches(SelectionList& selections, const Regex& regex, int captu
|
||||||
{
|
{
|
||||||
auto sel_beg = buffer.iterator_at(sel.min());
|
auto sel_beg = buffer.iterator_at(sel.min());
|
||||||
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
||||||
RegexIt re_it(sel_beg, sel_end, regex, match_flags(buffer, sel_beg, sel_end));
|
|
||||||
RegexIt re_end;
|
|
||||||
|
|
||||||
for (; re_it != re_end; ++re_it)
|
for (auto&& match : RegexIterator{sel_beg, sel_end, regex, match_flags(buffer, sel_beg, sel_end)})
|
||||||
{
|
{
|
||||||
auto begin = (*re_it)[capture].first;
|
auto begin = match[capture].first;
|
||||||
if (begin == sel_end)
|
if (begin == sel_end)
|
||||||
continue;
|
continue;
|
||||||
auto end = (*re_it)[capture].second;
|
auto end = match[capture].second;
|
||||||
|
|
||||||
CaptureList captures;
|
CaptureList captures;
|
||||||
captures.reserve(mark_count);
|
captures.reserve(mark_count);
|
||||||
for (const auto& match : *re_it)
|
for (const auto& submatch : match)
|
||||||
captures.push_back(buffer.string(match.first.coord(),
|
captures.push_back(buffer.string(submatch.first.coord(),
|
||||||
match.second.coord()));
|
submatch.second.coord()));
|
||||||
|
|
||||||
result.push_back(
|
result.push_back(
|
||||||
keep_direction({ begin.coord(),
|
keep_direction({ begin.coord(),
|
||||||
|
@ -981,12 +977,9 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture
|
||||||
auto begin = buffer.iterator_at(sel.min());
|
auto begin = buffer.iterator_at(sel.min());
|
||||||
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
||||||
|
|
||||||
RegexIt re_it(begin, sel_end, regex, match_flags(buffer, begin, sel_end));
|
for (auto&& match : RegexIterator{begin, sel_end, regex, match_flags(buffer, begin, sel_end)})
|
||||||
RegexIt re_end;
|
|
||||||
|
|
||||||
for (; re_it != re_end; ++re_it)
|
|
||||||
{
|
{
|
||||||
BufferIterator end = (*re_it)[capture].first;
|
BufferIterator end = match[capture].first;
|
||||||
if (end == buf_end)
|
if (end == buf_end)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -995,7 +988,7 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture
|
||||||
auto sel_end = (begin == end) ? end : utf8::previous(end, begin);
|
auto sel_end = (begin == end) ? end : utf8::previous(end, begin);
|
||||||
result.push_back(keep_direction({ begin.coord(), sel_end.coord() }, sel));
|
result.push_back(keep_direction({ begin.coord(), sel_end.coord() }, sel));
|
||||||
}
|
}
|
||||||
begin = (*re_it)[capture].second;
|
begin = match[capture].second;
|
||||||
}
|
}
|
||||||
if (begin.coord() <= sel.max())
|
if (begin.coord() <= sel.max())
|
||||||
result.push_back(keep_direction({ begin.coord(), sel.max() }, sel));
|
result.push_back(keep_direction({ begin.coord(), sel.max() }, sel));
|
||||||
|
|
|
@ -136,10 +136,9 @@ Vector<String> generate_env(StringView cmdline, const Context& context, const Sh
|
||||||
static const Regex re(R"(\bkak_(\w+)\b)");
|
static const Regex re(R"(\bkak_(\w+)\b)");
|
||||||
|
|
||||||
Vector<String> kak_env;
|
Vector<String> kak_env;
|
||||||
for (RegexIterator<const char*> it{cmdline.begin(), cmdline.end(), re}, end;
|
for (auto&& match : RegexIterator{cmdline.begin(), cmdline.end(), re})
|
||||||
it != end; ++it)
|
|
||||||
{
|
{
|
||||||
StringView name{(*it)[1].first, (*it)[1].second};
|
StringView name{match[1].first, match[1].second};
|
||||||
|
|
||||||
auto match_name = [&](const String& s) {
|
auto match_name = [&](const String& s) {
|
||||||
return s.substr(0_byte, name.length()) == name and
|
return s.substr(0_byte, name.length()) == name and
|
||||||
|
|
Loading…
Reference in New Issue
Block a user