From 8c2603ab3ca58e24ca227bd42e0550adb74fa95f Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Thu, 20 Dec 2018 23:07:50 +1100 Subject: [PATCH] Support re-using the same ThreadedRegexVM for multiple iterations This should reduce the number of allocations as the memory allocated for the thread stack and the saves can be re-used between runs instead of being cleared every time. --- src/highlighters.cc | 33 ++++++++++++++++----------------- src/regex.hh | 24 ++++++++++++++---------- src/selectors.cc | 6 ++++-- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/src/highlighters.cc b/src/highlighters.cc index 1d2624c3..c8b537ba 100644 --- a/src/highlighters.cc +++ b/src/highlighters.cc @@ -1641,29 +1641,28 @@ struct RegexMatch }; using RegexMatchList = Vector; -void append_matches(const Buffer& buffer, LineCount line, RegexMatchList& matches, const Regex& regex, bool capture) -{ - auto l = buffer[line]; - for (auto&& m : RegexIterator{l.begin(), l.end(), regex}) - { - const bool with_capture = capture and m[1].matched and - m[0].second - m[0].first < std::numeric_limits::max(); - matches.push_back({ - line, - (int)(m[0].first - l.begin()), - (int)(m[0].second - l.begin()), - (uint16_t)(with_capture ? m[1].first - m[0].first : 0), - (uint16_t)(with_capture ? m[1].second - m[1].first : 0) - }); - } -} void insert_matches(const Buffer& buffer, RegexMatchList& matches, const Regex& regex, bool capture, LineRange range) { size_t pivot = matches.size(); capture = capture and regex.mark_count() > 0; + ThreadedRegexVM vm{*regex.impl()}; for (auto line = range.begin; line < range.end; ++line) - append_matches(buffer, line, matches, regex, capture); + { + const StringView l = buffer[line]; + for (auto&& m : RegexIterator{l.begin(), l.end(), vm}) + { + const bool with_capture = capture and m[1].matched and + m[0].second - m[0].first < std::numeric_limits::max(); + matches.push_back({ + line, + (int)(m[0].first - l.begin()), + (int)(m[0].second - l.begin()), + (uint16_t)(with_capture ? m[1].first - m[0].first : 0), + (uint16_t)(with_capture ? m[1].second - m[1].first : 0) + }); + } + } auto pos = std::lower_bound(matches.begin(), matches.begin() + pivot, range.begin, [](const RegexMatch& m, LineCount l) { return m.line < l; }); diff --git a/src/regex.hh b/src/regex.hh index 4aa916c9..c0a1fea0 100644 --- a/src/regex.hh +++ b/src/regex.hh @@ -164,7 +164,8 @@ bool backward_regex_search(It begin, It end, It subject_begin, It subject_end, String option_to_string(const Regex& re); Regex option_from_string(Meta::Type, StringView str); -template +template struct RegexIterator { using ValueType = MatchResults; @@ -177,25 +178,24 @@ struct RegexIterator const ValueType* operator->() const { kak_assert(m_valid); return &m_base.m_results; } It& operator++() { m_valid = m_base.next(); return *this; } + bool operator==(Sentinel) const { return not m_valid; } + bool operator!=(Sentinel) const { return m_valid; } RegexIterator& m_base; bool m_valid; }; - friend bool operator==(const It& lhs, Sentinel) { return not lhs.m_valid; } - friend bool operator!=(const It& lhs, Sentinel) { return lhs.m_valid; } - RegexIterator(Iterator begin, Iterator end, Iterator subject_begin, Iterator subject_end, - const Regex& re, RegexExecFlags flags = RegexExecFlags::None) - : m_vm{*re.impl()}, m_next_pos{direction == MatchDirection::Forward ? begin : end}, + VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None) + : m_vm{make_vm(vm_arg)}, m_next_pos{direction == MatchDirection::Forward ? begin : end}, m_begin{std::move(begin)}, m_end{std::move(end)}, m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)}, m_flags{flags} {} - RegexIterator(const Iterator& begin, const Iterator& end, const Regex& re, - RegexExecFlags flags = RegexExecFlags::None) - : RegexIterator{begin, end, begin, end, re, flags} {} + RegexIterator(const Iterator& begin, const Iterator& end, + VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None) + : RegexIterator{begin, end, begin, end, vm_arg, flags} {} It begin() { return {*this}; } Sentinel end() const { return {}; } @@ -219,7 +219,11 @@ private: return true; } - ThreadedRegexVM m_vm; + using RegexVM = ThreadedRegexVM; + static RegexVM& make_vm(RegexVM& vm) { return vm; } + static RegexVM make_vm(const Regex& regex) { return {*regex.impl()}; } + + decltype(make_vm(std::declval())) m_vm; MatchResults m_results; Iterator m_next_pos{}; const Iterator m_begin{}; diff --git a/src/selectors.cc b/src/selectors.cc index fc661c08..06c64546 100644 --- a/src/selectors.cc +++ b/src/selectors.cc @@ -931,12 +931,13 @@ void select_all_matches(SelectionList& selections, const Regex& regex, int captu Vector result; auto& buffer = selections.buffer(); + ThreadedRegexVM vm{*regex.impl()}; for (auto& sel : selections) { auto sel_beg = buffer.iterator_at(sel.min()); auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); - for (auto&& match : RegexIterator{sel_beg, sel_end, regex, match_flags(buffer, sel_beg, sel_end)}) + for (auto&& match : RegexIterator{sel_beg, sel_end, vm, match_flags(buffer, sel_beg, sel_end)}) { auto begin = match[capture].first; if (begin == sel_end) @@ -972,12 +973,13 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture auto& buffer = selections.buffer(); auto buf_end = buffer.end(); auto buf_begin = buffer.begin(); + ThreadedRegexVM vm{*regex.impl()}; for (auto& sel : selections) { auto begin = buffer.iterator_at(sel.min()); auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end()); - for (auto&& match : RegexIterator{begin, sel_end, regex, match_flags(buffer, begin, sel_end)}) + for (auto&& match : RegexIterator{begin, sel_end, vm, match_flags(buffer, begin, sel_end)}) { BufferIterator end = match[capture].first; if (end == buf_end)