Support re-using the same ThreadedRegexVM for multiple iterations

This should reduce the number of allocations as the memory allocated
for the thread stack and the saves can be re-used between runs instead
of being cleared every time.
This commit is contained in:
Maxime Coste 2018-12-20 23:07:50 +11:00
parent 30897fd820
commit 8c2603ab3c
3 changed files with 34 additions and 29 deletions

View File

@ -1641,10 +1641,16 @@ struct RegexMatch
}; };
using RegexMatchList = Vector<RegexMatch, MemoryDomain::Regions>; using RegexMatchList = Vector<RegexMatch, MemoryDomain::Regions>;
void append_matches(const Buffer& buffer, LineCount line, RegexMatchList& matches, const Regex& regex, bool capture)
void insert_matches(const Buffer& buffer, RegexMatchList& matches, const Regex& regex, bool capture, LineRange range)
{ {
auto l = buffer[line]; size_t pivot = matches.size();
for (auto&& m : RegexIterator{l.begin(), l.end(), regex}) capture = capture and regex.mark_count() > 0;
ThreadedRegexVM<const char*, MatchDirection::Forward> vm{*regex.impl()};
for (auto line = range.begin; line < range.end; ++line)
{
const StringView l = buffer[line];
for (auto&& m : RegexIterator{l.begin(), l.end(), vm})
{ {
const bool with_capture = capture and m[1].matched and const bool with_capture = capture and m[1].matched and
m[0].second - m[0].first < std::numeric_limits<uint16_t>::max(); m[0].second - m[0].first < std::numeric_limits<uint16_t>::max();
@ -1658,13 +1664,6 @@ void append_matches(const Buffer& buffer, LineCount line, RegexMatchList& matche
} }
} }
void insert_matches(const Buffer& buffer, RegexMatchList& matches, const Regex& regex, bool capture, LineRange range)
{
size_t pivot = matches.size();
capture = capture and regex.mark_count() > 0;
for (auto line = range.begin; line < range.end; ++line)
append_matches(buffer, line, matches, regex, capture);
auto pos = std::lower_bound(matches.begin(), matches.begin() + pivot, range.begin, auto pos = std::lower_bound(matches.begin(), matches.begin() + pivot, range.begin,
[](const RegexMatch& m, LineCount l) { return m.line < l; }); [](const RegexMatch& m, LineCount l) { return m.line < l; });
kak_assert(pos == matches.begin() + pivot or pos->line >= range.end); // We should not have had matches for range kak_assert(pos == matches.begin() + pivot or pos->line >= range.end); // We should not have had matches for range

View File

@ -164,7 +164,8 @@ bool backward_regex_search(It begin, It end, It subject_begin, It subject_end,
String option_to_string(const Regex& re); String option_to_string(const Regex& re);
Regex option_from_string(Meta::Type<Regex>, StringView str); Regex option_from_string(Meta::Type<Regex>, StringView str);
template<typename Iterator, MatchDirection direction = MatchDirection::Forward> template<typename Iterator, MatchDirection direction = MatchDirection::Forward,
typename VmArg = const Regex>
struct RegexIterator struct RegexIterator
{ {
using ValueType = MatchResults<Iterator>; using ValueType = MatchResults<Iterator>;
@ -177,25 +178,24 @@ struct RegexIterator
const ValueType* operator->() const { kak_assert(m_valid); return &m_base.m_results; } const ValueType* operator->() const { kak_assert(m_valid); return &m_base.m_results; }
It& operator++() { m_valid = m_base.next(); return *this; } It& operator++() { m_valid = m_base.next(); return *this; }
bool operator==(Sentinel) const { return not m_valid; }
bool operator!=(Sentinel) const { return m_valid; }
RegexIterator& m_base; RegexIterator& m_base;
bool m_valid; bool m_valid;
}; };
friend bool operator==(const It& lhs, Sentinel) { return not lhs.m_valid; }
friend bool operator!=(const It& lhs, Sentinel) { return lhs.m_valid; }
RegexIterator(Iterator begin, Iterator end, RegexIterator(Iterator begin, Iterator end,
Iterator subject_begin, Iterator subject_end, Iterator subject_begin, Iterator subject_end,
const Regex& re, RegexExecFlags flags = RegexExecFlags::None) VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None)
: m_vm{*re.impl()}, m_next_pos{direction == MatchDirection::Forward ? begin : end}, : m_vm{make_vm(vm_arg)}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
m_begin{std::move(begin)}, m_end{std::move(end)}, m_begin{std::move(begin)}, m_end{std::move(end)},
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)}, m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
m_flags{flags} {} m_flags{flags} {}
RegexIterator(const Iterator& begin, const Iterator& end, const Regex& re, RegexIterator(const Iterator& begin, const Iterator& end,
RegexExecFlags flags = RegexExecFlags::None) VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None)
: RegexIterator{begin, end, begin, end, re, flags} {} : RegexIterator{begin, end, begin, end, vm_arg, flags} {}
It begin() { return {*this}; } It begin() { return {*this}; }
Sentinel end() const { return {}; } Sentinel end() const { return {}; }
@ -219,7 +219,11 @@ private:
return true; return true;
} }
ThreadedRegexVM<Iterator, direction> m_vm; using RegexVM = ThreadedRegexVM<Iterator, direction>;
static RegexVM& make_vm(RegexVM& vm) { return vm; }
static RegexVM make_vm(const Regex& regex) { return {*regex.impl()}; }
decltype(make_vm(std::declval<VmArg&>())) m_vm;
MatchResults<Iterator> m_results; MatchResults<Iterator> m_results;
Iterator m_next_pos{}; Iterator m_next_pos{};
const Iterator m_begin{}; const Iterator m_begin{};

View File

@ -931,12 +931,13 @@ void select_all_matches(SelectionList& selections, const Regex& regex, int captu
Vector<Selection> result; Vector<Selection> result;
auto& buffer = selections.buffer(); auto& buffer = selections.buffer();
ThreadedRegexVM<BufferIterator, MatchDirection::Forward> vm{*regex.impl()};
for (auto& sel : selections) for (auto& sel : selections)
{ {
auto sel_beg = buffer.iterator_at(sel.min()); auto sel_beg = buffer.iterator_at(sel.min());
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
for (auto&& match : RegexIterator{sel_beg, sel_end, regex, match_flags(buffer, sel_beg, sel_end)}) for (auto&& match : RegexIterator{sel_beg, sel_end, vm, match_flags(buffer, sel_beg, sel_end)})
{ {
auto begin = match[capture].first; auto begin = match[capture].first;
if (begin == sel_end) if (begin == sel_end)
@ -972,12 +973,13 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture
auto& buffer = selections.buffer(); auto& buffer = selections.buffer();
auto buf_end = buffer.end(); auto buf_end = buffer.end();
auto buf_begin = buffer.begin(); auto buf_begin = buffer.begin();
ThreadedRegexVM<BufferIterator, MatchDirection::Forward> vm{*regex.impl()};
for (auto& sel : selections) for (auto& sel : selections)
{ {
auto begin = buffer.iterator_at(sel.min()); auto begin = buffer.iterator_at(sel.min());
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
for (auto&& match : RegexIterator{begin, sel_end, regex, match_flags(buffer, begin, sel_end)}) for (auto&& match : RegexIterator{begin, sel_end, vm, match_flags(buffer, begin, sel_end)})
{ {
BufferIterator end = match[capture].first; BufferIterator end = match[capture].first;
if (end == buf_end) if (end == buf_end)