Support re-using the same ThreadedRegexVM for multiple iterations
This should reduce the number of allocations as the memory allocated for the thread stack and the saves can be re-used between runs instead of being cleared every time.
This commit is contained in:
parent
30897fd820
commit
8c2603ab3c
|
@ -1641,29 +1641,28 @@ struct RegexMatch
|
||||||
};
|
};
|
||||||
|
|
||||||
using RegexMatchList = Vector<RegexMatch, MemoryDomain::Regions>;
|
using RegexMatchList = Vector<RegexMatch, MemoryDomain::Regions>;
|
||||||
void append_matches(const Buffer& buffer, LineCount line, RegexMatchList& matches, const Regex& regex, bool capture)
|
|
||||||
{
|
|
||||||
auto l = buffer[line];
|
|
||||||
for (auto&& m : RegexIterator{l.begin(), l.end(), regex})
|
|
||||||
{
|
|
||||||
const bool with_capture = capture and m[1].matched and
|
|
||||||
m[0].second - m[0].first < std::numeric_limits<uint16_t>::max();
|
|
||||||
matches.push_back({
|
|
||||||
line,
|
|
||||||
(int)(m[0].first - l.begin()),
|
|
||||||
(int)(m[0].second - l.begin()),
|
|
||||||
(uint16_t)(with_capture ? m[1].first - m[0].first : 0),
|
|
||||||
(uint16_t)(with_capture ? m[1].second - m[1].first : 0)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void insert_matches(const Buffer& buffer, RegexMatchList& matches, const Regex& regex, bool capture, LineRange range)
|
void insert_matches(const Buffer& buffer, RegexMatchList& matches, const Regex& regex, bool capture, LineRange range)
|
||||||
{
|
{
|
||||||
size_t pivot = matches.size();
|
size_t pivot = matches.size();
|
||||||
capture = capture and regex.mark_count() > 0;
|
capture = capture and regex.mark_count() > 0;
|
||||||
|
ThreadedRegexVM<const char*, MatchDirection::Forward> vm{*regex.impl()};
|
||||||
for (auto line = range.begin; line < range.end; ++line)
|
for (auto line = range.begin; line < range.end; ++line)
|
||||||
append_matches(buffer, line, matches, regex, capture);
|
{
|
||||||
|
const StringView l = buffer[line];
|
||||||
|
for (auto&& m : RegexIterator{l.begin(), l.end(), vm})
|
||||||
|
{
|
||||||
|
const bool with_capture = capture and m[1].matched and
|
||||||
|
m[0].second - m[0].first < std::numeric_limits<uint16_t>::max();
|
||||||
|
matches.push_back({
|
||||||
|
line,
|
||||||
|
(int)(m[0].first - l.begin()),
|
||||||
|
(int)(m[0].second - l.begin()),
|
||||||
|
(uint16_t)(with_capture ? m[1].first - m[0].first : 0),
|
||||||
|
(uint16_t)(with_capture ? m[1].second - m[1].first : 0)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto pos = std::lower_bound(matches.begin(), matches.begin() + pivot, range.begin,
|
auto pos = std::lower_bound(matches.begin(), matches.begin() + pivot, range.begin,
|
||||||
[](const RegexMatch& m, LineCount l) { return m.line < l; });
|
[](const RegexMatch& m, LineCount l) { return m.line < l; });
|
||||||
|
|
24
src/regex.hh
24
src/regex.hh
|
@ -164,7 +164,8 @@ bool backward_regex_search(It begin, It end, It subject_begin, It subject_end,
|
||||||
String option_to_string(const Regex& re);
|
String option_to_string(const Regex& re);
|
||||||
Regex option_from_string(Meta::Type<Regex>, StringView str);
|
Regex option_from_string(Meta::Type<Regex>, StringView str);
|
||||||
|
|
||||||
template<typename Iterator, MatchDirection direction = MatchDirection::Forward>
|
template<typename Iterator, MatchDirection direction = MatchDirection::Forward,
|
||||||
|
typename VmArg = const Regex>
|
||||||
struct RegexIterator
|
struct RegexIterator
|
||||||
{
|
{
|
||||||
using ValueType = MatchResults<Iterator>;
|
using ValueType = MatchResults<Iterator>;
|
||||||
|
@ -177,25 +178,24 @@ struct RegexIterator
|
||||||
const ValueType* operator->() const { kak_assert(m_valid); return &m_base.m_results; }
|
const ValueType* operator->() const { kak_assert(m_valid); return &m_base.m_results; }
|
||||||
|
|
||||||
It& operator++() { m_valid = m_base.next(); return *this; }
|
It& operator++() { m_valid = m_base.next(); return *this; }
|
||||||
|
bool operator==(Sentinel) const { return not m_valid; }
|
||||||
|
bool operator!=(Sentinel) const { return m_valid; }
|
||||||
|
|
||||||
RegexIterator& m_base;
|
RegexIterator& m_base;
|
||||||
bool m_valid;
|
bool m_valid;
|
||||||
};
|
};
|
||||||
|
|
||||||
friend bool operator==(const It& lhs, Sentinel) { return not lhs.m_valid; }
|
|
||||||
friend bool operator!=(const It& lhs, Sentinel) { return lhs.m_valid; }
|
|
||||||
|
|
||||||
RegexIterator(Iterator begin, Iterator end,
|
RegexIterator(Iterator begin, Iterator end,
|
||||||
Iterator subject_begin, Iterator subject_end,
|
Iterator subject_begin, Iterator subject_end,
|
||||||
const Regex& re, RegexExecFlags flags = RegexExecFlags::None)
|
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None)
|
||||||
: m_vm{*re.impl()}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
|
: m_vm{make_vm(vm_arg)}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
|
||||||
m_begin{std::move(begin)}, m_end{std::move(end)},
|
m_begin{std::move(begin)}, m_end{std::move(end)},
|
||||||
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
|
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
|
||||||
m_flags{flags} {}
|
m_flags{flags} {}
|
||||||
|
|
||||||
RegexIterator(const Iterator& begin, const Iterator& end, const Regex& re,
|
RegexIterator(const Iterator& begin, const Iterator& end,
|
||||||
RegexExecFlags flags = RegexExecFlags::None)
|
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None)
|
||||||
: RegexIterator{begin, end, begin, end, re, flags} {}
|
: RegexIterator{begin, end, begin, end, vm_arg, flags} {}
|
||||||
|
|
||||||
It begin() { return {*this}; }
|
It begin() { return {*this}; }
|
||||||
Sentinel end() const { return {}; }
|
Sentinel end() const { return {}; }
|
||||||
|
@ -219,7 +219,11 @@ private:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
ThreadedRegexVM<Iterator, direction> m_vm;
|
using RegexVM = ThreadedRegexVM<Iterator, direction>;
|
||||||
|
static RegexVM& make_vm(RegexVM& vm) { return vm; }
|
||||||
|
static RegexVM make_vm(const Regex& regex) { return {*regex.impl()}; }
|
||||||
|
|
||||||
|
decltype(make_vm(std::declval<VmArg&>())) m_vm;
|
||||||
MatchResults<Iterator> m_results;
|
MatchResults<Iterator> m_results;
|
||||||
Iterator m_next_pos{};
|
Iterator m_next_pos{};
|
||||||
const Iterator m_begin{};
|
const Iterator m_begin{};
|
||||||
|
|
|
@ -931,12 +931,13 @@ void select_all_matches(SelectionList& selections, const Regex& regex, int captu
|
||||||
|
|
||||||
Vector<Selection> result;
|
Vector<Selection> result;
|
||||||
auto& buffer = selections.buffer();
|
auto& buffer = selections.buffer();
|
||||||
|
ThreadedRegexVM<BufferIterator, MatchDirection::Forward> vm{*regex.impl()};
|
||||||
for (auto& sel : selections)
|
for (auto& sel : selections)
|
||||||
{
|
{
|
||||||
auto sel_beg = buffer.iterator_at(sel.min());
|
auto sel_beg = buffer.iterator_at(sel.min());
|
||||||
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
||||||
|
|
||||||
for (auto&& match : RegexIterator{sel_beg, sel_end, regex, match_flags(buffer, sel_beg, sel_end)})
|
for (auto&& match : RegexIterator{sel_beg, sel_end, vm, match_flags(buffer, sel_beg, sel_end)})
|
||||||
{
|
{
|
||||||
auto begin = match[capture].first;
|
auto begin = match[capture].first;
|
||||||
if (begin == sel_end)
|
if (begin == sel_end)
|
||||||
|
@ -972,12 +973,13 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture
|
||||||
auto& buffer = selections.buffer();
|
auto& buffer = selections.buffer();
|
||||||
auto buf_end = buffer.end();
|
auto buf_end = buffer.end();
|
||||||
auto buf_begin = buffer.begin();
|
auto buf_begin = buffer.begin();
|
||||||
|
ThreadedRegexVM<BufferIterator, MatchDirection::Forward> vm{*regex.impl()};
|
||||||
for (auto& sel : selections)
|
for (auto& sel : selections)
|
||||||
{
|
{
|
||||||
auto begin = buffer.iterator_at(sel.min());
|
auto begin = buffer.iterator_at(sel.min());
|
||||||
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
||||||
|
|
||||||
for (auto&& match : RegexIterator{begin, sel_end, regex, match_flags(buffer, begin, sel_end)})
|
for (auto&& match : RegexIterator{begin, sel_end, vm, match_flags(buffer, begin, sel_end)})
|
||||||
{
|
{
|
||||||
BufferIterator end = match[capture].first;
|
BufferIterator end = match[capture].first;
|
||||||
if (end == buf_end)
|
if (end == buf_end)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user