From e140df8f0857125f40f9338450f73ff1ac50664d Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Mon, 15 May 2023 20:38:10 +1000 Subject: [PATCH] Add an idle callback to be called regularly while regex matching This paves the way towards being able to cancel long regex matching operations --- src/regex.hh | 53 ++++++++++++++++++++++++++++------------------- src/regex_impl.hh | 13 ++++++++++-- 2 files changed, 43 insertions(+), 23 deletions(-) diff --git a/src/regex.hh b/src/regex.hh index a235470d..d735aaf0 100644 --- a/src/regex.hh +++ b/src/regex.hh @@ -114,19 +114,24 @@ inline RegexExecFlags match_flags(bool bol, bool eol, bool bow, bool eow) (eow ? RegexExecFlags::None : RegexExecFlags::NotEndOfWord); } -template -bool regex_match(It begin, It end, const Regex& re) +struct NoopIdle +{ + void operator()() {} +}; + +template +bool regex_match(It begin, It end, const Regex& re, IdleFunc&& idle_func = {}) { ThreadedRegexVM vm{*re.impl()}; - return vm.exec(begin, end, begin, end, RegexExecFlags::None); + return vm.exec(begin, end, begin, end, RegexExecFlags::None, idle_func); } -template -bool regex_match(It begin, It end, MatchResults& res, const Regex& re) +template +bool regex_match(It begin, It end, MatchResults& res, const Regex& re, IdleFunc&& idle_func = {}) { res.values().clear(); ThreadedRegexVM vm{*re.impl()}; - if (vm.exec(begin, end, begin, end, RegexExecFlags::None)) + if (vm.exec(begin, end, begin, end, RegexExecFlags::None, idle_func)) { std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values())); return true; @@ -134,22 +139,23 @@ bool regex_match(It begin, It end, MatchResults& res, const Regex& re) return false; } -template +template bool regex_search(It begin, It end, It subject_begin, It subject_end, const Regex& re, - RegexExecFlags flags = RegexExecFlags::None) + RegexExecFlags flags = RegexExecFlags::None, IdleFunc&& idle_func = {}) { ThreadedRegexVM vm{*re.impl()}; - return vm.exec(begin, end, subject_begin, subject_end, flags); + return vm.exec(begin, end, subject_begin, subject_end, flags, idle_func); } -template +template bool regex_search(It begin, It end, It subject_begin, It subject_end, MatchResults& res, const Regex& re, - RegexExecFlags flags = RegexExecFlags::None) + RegexExecFlags flags = RegexExecFlags::None, + IdleFunc&& idle_func = {}) { res.values().clear(); ThreadedRegexVM vm{*re.impl()}; - if (vm.exec(begin, end, subject_begin, subject_end, flags)) + if (vm.exec(begin, end, subject_begin, subject_end, flags, idle_func)) { std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values())); return true; @@ -157,19 +163,20 @@ bool regex_search(It begin, It end, It subject_begin, It subject_end, return false; } -template +template bool backward_regex_search(It begin, It end, It subject_begin, It subject_end, MatchResults& res, const Regex& re, - RegexExecFlags flags = RegexExecFlags::None) + RegexExecFlags flags = RegexExecFlags::None, + IdleFunc&& idle_func = {}) { - return regex_search(begin, end, subject_begin, subject_end, res, re, flags); + return regex_search(begin, end, subject_begin, subject_end, res, re, flags, idle_func); } String option_to_string(const Regex& re); Regex option_from_string(Meta::Type, StringView str); template + typename VmArg = const Regex, typename IdleFunc = NoopIdle> struct RegexIterator { static_assert(has_direction(mode)); @@ -193,15 +200,17 @@ struct RegexIterator RegexIterator(Iterator begin, Iterator end, Iterator subject_begin, Iterator subject_end, - VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None) + VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None, + IdleFunc idle_func = {}) : m_vm{make_vm(vm_arg)}, m_next_pos{forward ? begin : end}, m_begin{std::move(begin)}, m_end{std::move(end)}, m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)}, - m_flags{flags} {} + m_flags{flags}, m_idle_func{idle_func} {} RegexIterator(const Iterator& begin, const Iterator& end, - VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None) - : RegexIterator{begin, end, begin, end, vm_arg, flags} {} + VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None, + IdleFunc idle_func = {}) + : RegexIterator{begin, end, begin, end, vm_arg, flags, idle_func} {} It begin() { return {*this}; } Sentinel end() const { return {}; } @@ -214,7 +223,8 @@ private: additional_flags |= RegexExecFlags::NotInitialNull; if (not m_vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos, - m_subject_begin, m_subject_end, m_flags | additional_flags)) + m_subject_begin, m_subject_end, m_flags | additional_flags, + m_idle_func)) return false; m_results.values().clear(); @@ -236,6 +246,7 @@ private: const Iterator m_subject_begin{}; const Iterator m_subject_end{}; const RegexExecFlags m_flags = RegexExecFlags::None; + IdleFunc m_idle_func; }; } diff --git a/src/regex_impl.hh b/src/regex_impl.hh index 57f07c8b..4ba0e233 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -243,6 +243,13 @@ public: bool exec(const Iterator& begin, const Iterator& end, const Iterator& subject_begin, const Iterator& subject_end, RegexExecFlags flags) + { + return exec(begin, end, subject_begin, subject_end, flags, []{}); + } + + bool exec(const Iterator& begin, const Iterator& end, + const Iterator& subject_begin, const Iterator& subject_end, + RegexExecFlags flags, auto&& idle_func) { if (flags & RegexExecFlags::NotInitialNull and begin == end) return false; @@ -274,7 +281,7 @@ public: } } - return exec_program(std::move(start), config); + return exec_program(std::move(start), config, idle_func); } ArrayView captures() const @@ -453,7 +460,7 @@ private: return failed(); } - bool exec_program(Iterator pos, const ExecConfig& config) + bool exec_program(Iterator pos, const ExecConfig& config, auto&& idle_func) { kak_assert(m_threads.current_is_empty() and m_threads.next_is_empty()); release_saves(m_captures); @@ -473,6 +480,8 @@ private: { if (++current_step == 0) { + idle_func(); + // We wrapped, avoid potential collision on inst.last_step by resetting them ConstArrayView instructions{m_program.instructions}; instructions = forward ? instructions.subrange(0, m_program.first_backward_inst)