Fix quadratic runtime when updating region highlighter matches
Running %sYeti<ret>casdf on file [example.journal.txt](https://github.com/mawww/kakoune/issues/4685#issuecomment-1193243588) can cause noticeable lag. This is because we insert text at 6000 selections, which means we need to update highlighters in those lines. The runtime for updating range highlighters is quadratic in the number of selections: for each selection, we call on_new_range(), which calls add_matches(), which calls std::rotate(), which needs needs linear time. Fix the quadratic runtime by calling std::inplace_merge() once instead of repeatedly calling std::rotate(). This is works because ranges are already sorted. I used this script to benchmark the improvements. (In hindsight I could have just used "-ui json" instead of tmux). #!/bin/sh set -ex N=${1:-100} kak=${2:-./kak.opt} for i in $(seq "$N") do echo -n "\ 2022-02-06 * Earth expense:electronics:audio 116.7 USD liability:card -116.7 USD 2022-02-06 * Blue Yeti USB Microphone expense:electronics:audio 116.7 USD liability:card -116.7 USD " done > big-journal.ledger echo > .empty-tmux.conf 'set -sg escape-time 5' test_tmux() { tmux -S .tmux-socket -f .empty-tmux.conf "$@" } test_tmux new-session -d "$kak" big-journal.ledger test_tmux send-keys '%sYeti' Enter c 1234567890 sleep .2 test_tmux send-keys Escape while ! test_tmux capture-pane -p | grep 123 do sleep .1 done test_tmux send-keys ':wq' Enter while test_tmux ls do sleep .1 done rm -f .tmux-socket .empty-tmux.conf This script's runtime used to grow super-linearly but now it grows linearly: kak.old kak.new N=10000 1.142 0.897 N=20000 2.879 1.400 Detailed results: $ hyperfine -w 1 './bench.sh 10000 ./kak.opt.'{old,new} Benchmark 1: ./bench.sh 10000 ./kak.opt.old Time (mean ± σ): 1.142 s ± 0.072 s [User: 0.252 s, System: 0.059 s] Range (min … max): 1.060 s … 1.242 s 10 runs Benchmark 2: ./bench.sh 10000 ./kak.opt.new Time (mean ± σ): 897.2 ms ± 19.3 ms [User: 241.6 ms, System: 57.4 ms] Range (min … max): 853.9 ms … 923.6 ms 10 runs Summary './bench.sh 10000 ./kak.opt.new' ran 1.27 ± 0.09 times faster than './bench.sh 10000 ./kak.opt.old' $ hyperfine -w 1 './bench.sh 20000 ./kak.opt.'{old,new} Benchmark 1: ./bench.sh 20000 ./kak.opt.old Time (mean ± σ): 2.879 s ± 0.065 s [User: 0.553 s, System: 0.126 s] Range (min … max): 2.768 s … 2.963 s 10 runs Benchmark 2: ./bench.sh 20000 ./kak.opt.new Time (mean ± σ): 1.400 s ± 0.018 s [User: 0.428 s, System: 0.083 s] Range (min … max): 1.374 s … 1.429 s 10 runs Summary './bench.sh 20000 ./kak.opt.new' ran 2.06 ± 0.05 times faster than '../repro.sh 20000 ./kak.opt.old'
This commit is contained in:
parent
a33ec8dc80
commit
803873c91c
|
@ -2210,11 +2210,57 @@ private:
|
||||||
m_regexes.insert({key, Regex{str, flags}});
|
m_regexes.insert({key, Regex{str, flags}});
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_matches(const Buffer& buffer, LineRange range, Cache& cache) const
|
class MatchAdder
|
||||||
{
|
{
|
||||||
for (auto& [key, regex] : m_regexes)
|
public:
|
||||||
cache.matches[key];
|
MatchAdder(RegionsHighlighter& region, const Buffer& buffer, Cache& cache) : m_buffer(buffer)
|
||||||
|
{
|
||||||
|
for (auto& [key, regex] : region.m_regexes)
|
||||||
|
cache.matches[key];
|
||||||
|
for (auto& [key, regex] : region.m_regexes)
|
||||||
|
m_matchers.push_back(Matcher{cache.matches.get(key), regex});
|
||||||
|
}
|
||||||
|
|
||||||
|
~MatchAdder()
|
||||||
|
{
|
||||||
|
// Move new matches into position.
|
||||||
|
for (auto& [matches, regex, pivot, vm] : m_matchers)
|
||||||
|
std::inplace_merge(matches.begin(), matches.begin() + pivot, matches.end(),
|
||||||
|
[](const auto& lhs, const auto& rhs) { return lhs.line < rhs.line; });
|
||||||
|
}
|
||||||
|
|
||||||
|
void add(LineRange range)
|
||||||
|
{
|
||||||
|
for (auto line = range.begin; line < range.end; ++line)
|
||||||
|
{
|
||||||
|
const StringView l = m_buffer[line];
|
||||||
|
const auto flags = RegexExecFlags::NotEndOfLine; // buffer line already ends with \n
|
||||||
|
|
||||||
|
for (auto& [matches, regex, pivot, vm] : m_matchers)
|
||||||
|
{
|
||||||
|
auto extra_flags = RegexExecFlags::None;
|
||||||
|
auto pos = l.begin();
|
||||||
|
while (vm.exec(pos, l.end(), l.begin(), l.end(), flags | extra_flags))
|
||||||
|
{
|
||||||
|
ConstArrayView<const char*> captures = vm.captures();
|
||||||
|
const bool with_capture = regex.mark_count() > 0 and captures[2] != nullptr and
|
||||||
|
captures[1] - captures[0] < std::numeric_limits<uint16_t>::max();
|
||||||
|
matches.push_back({
|
||||||
|
line,
|
||||||
|
(int)(captures[0] - l.begin()),
|
||||||
|
(int)(captures[1] - l.begin()),
|
||||||
|
(uint16_t)(with_capture ? captures[2] - captures[0] : 0),
|
||||||
|
(uint16_t)(with_capture ? captures[3] - captures[2] : 0)
|
||||||
|
});
|
||||||
|
pos = captures[1];
|
||||||
|
|
||||||
|
extra_flags = (captures[0] == captures[1]) ? RegexExecFlags::NotInitialNull : RegexExecFlags::None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
struct Matcher
|
struct Matcher
|
||||||
{
|
{
|
||||||
RegexMatchList& matches;
|
RegexMatchList& matches;
|
||||||
|
@ -2222,48 +2268,10 @@ private:
|
||||||
size_t pivot = matches.size();
|
size_t pivot = matches.size();
|
||||||
ThreadedRegexVM<const char*, RegexMode::Forward | RegexMode::Search> vm{*regex.impl()};
|
ThreadedRegexVM<const char*, RegexMode::Forward | RegexMode::Search> vm{*regex.impl()};
|
||||||
};
|
};
|
||||||
Vector<Matcher> matchers;
|
|
||||||
for (auto& [key, regex] : m_regexes)
|
|
||||||
matchers.push_back(Matcher{cache.matches.get(key), regex});
|
|
||||||
|
|
||||||
for (auto line = range.begin; line < range.end; ++line)
|
const Buffer& m_buffer;
|
||||||
{
|
Vector<Matcher> m_matchers;
|
||||||
const StringView l = buffer[line];
|
};
|
||||||
const auto flags = RegexExecFlags::NotEndOfLine; // buffer line already ends with \n
|
|
||||||
|
|
||||||
for (auto& [matches, regex, pivot, vm] : matchers)
|
|
||||||
{
|
|
||||||
auto extra_flags = RegexExecFlags::None;
|
|
||||||
auto pos = l.begin();
|
|
||||||
while (vm.exec(pos, l.end(), l.begin(), l.end(), flags | extra_flags))
|
|
||||||
{
|
|
||||||
ConstArrayView<const char*> captures = vm.captures();
|
|
||||||
const bool with_capture = regex.mark_count() > 0 and captures[2] != nullptr and
|
|
||||||
captures[1] - captures[0] < std::numeric_limits<uint16_t>::max();
|
|
||||||
matches.push_back({
|
|
||||||
line,
|
|
||||||
(int)(captures[0] - l.begin()),
|
|
||||||
(int)(captures[1] - l.begin()),
|
|
||||||
(uint16_t)(with_capture ? captures[2] - captures[0] : 0),
|
|
||||||
(uint16_t)(with_capture ? captures[3] - captures[2] : 0)
|
|
||||||
});
|
|
||||||
pos = captures[1];
|
|
||||||
|
|
||||||
extra_flags = (captures[0] == captures[1]) ? RegexExecFlags::NotInitialNull : RegexExecFlags::None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto& [matches, regex, pivot, vm] : matchers)
|
|
||||||
{
|
|
||||||
auto pos = std::lower_bound(matches.begin(), matches.begin() + pivot, range.begin,
|
|
||||||
[](const RegexMatch& m, LineCount l) { return m.line < l; });
|
|
||||||
kak_assert(pos == matches.begin() + pivot or pos->line >= range.end); // We should not have had matches for range
|
|
||||||
|
|
||||||
// Move new matches into position.
|
|
||||||
std::rotate(pos, matches.begin() + pivot, matches.end());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void update_changed_lines(const Buffer& buffer, ConstArrayView<LineModification> modifs, Cache& cache)
|
void update_changed_lines(const Buffer& buffer, ConstArrayView<LineModification> modifs, Cache& cache)
|
||||||
{
|
{
|
||||||
|
@ -2299,7 +2307,6 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool update_matches(Cache& cache, const Buffer& buffer, LineRange range)
|
bool update_matches(Cache& cache, const Buffer& buffer, LineRange range)
|
||||||
{
|
{
|
||||||
const size_t buffer_timestamp = buffer.timestamp();
|
const size_t buffer_timestamp = buffer.timestamp();
|
||||||
|
@ -2315,7 +2322,7 @@ private:
|
||||||
add_regex(region->m_recurse, region->match_capture());
|
add_regex(region->m_recurse, region->match_capture());
|
||||||
}
|
}
|
||||||
|
|
||||||
add_matches(buffer, range, cache);
|
MatchAdder{*this, buffer, cache}.add(range);
|
||||||
cache.ranges.reset(range);
|
cache.ranges.reset(range);
|
||||||
cache.buffer_timestamp = buffer_timestamp;
|
cache.buffer_timestamp = buffer_timestamp;
|
||||||
cache.regions_timestamp = m_regions_timestamp;
|
cache.regions_timestamp = m_regions_timestamp;
|
||||||
|
@ -2333,10 +2340,11 @@ private:
|
||||||
modified = true;
|
modified = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
cache.ranges.add_range(range, [&, this](const LineRange& range) {
|
MatchAdder matches{*this, buffer, cache};
|
||||||
|
cache.ranges.add_range(range, [&](const LineRange& range) {
|
||||||
if (range.begin == range.end)
|
if (range.begin == range.end)
|
||||||
return;
|
return;
|
||||||
add_matches(buffer, range, cache);
|
matches.add(range);
|
||||||
modified = true;
|
modified = true;
|
||||||
});
|
});
|
||||||
return modified;
|
return modified;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user