Add an idle callback to be called regularly while regex matching

This paves the way towards being able to cancel long regex matching
operations
This commit is contained in:
Maxime Coste 2023-05-15 20:38:10 +10:00
parent 19b4149d47
commit e140df8f08
2 changed files with 43 additions and 23 deletions

View File

@ -114,19 +114,24 @@ inline RegexExecFlags match_flags(bool bol, bool eol, bool bow, bool eow)
(eow ? RegexExecFlags::None : RegexExecFlags::NotEndOfWord); (eow ? RegexExecFlags::None : RegexExecFlags::NotEndOfWord);
} }
template<typename It> struct NoopIdle
bool regex_match(It begin, It end, const Regex& re) {
void operator()() {}
};
template<typename It, typename IdleFunc = NoopIdle>
bool regex_match(It begin, It end, const Regex& re, IdleFunc&& idle_func = {})
{ {
ThreadedRegexVM<It, RegexMode::Forward | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()}; ThreadedRegexVM<It, RegexMode::Forward | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()};
return vm.exec(begin, end, begin, end, RegexExecFlags::None); return vm.exec(begin, end, begin, end, RegexExecFlags::None, idle_func);
} }
template<typename It> template<typename It, typename IdleFunc = NoopIdle>
bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re) bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re, IdleFunc&& idle_func = {})
{ {
res.values().clear(); res.values().clear();
ThreadedRegexVM<It, RegexMode::Forward> vm{*re.impl()}; ThreadedRegexVM<It, RegexMode::Forward> vm{*re.impl()};
if (vm.exec(begin, end, begin, end, RegexExecFlags::None)) if (vm.exec(begin, end, begin, end, RegexExecFlags::None, idle_func))
{ {
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values())); std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
return true; return true;
@ -134,22 +139,23 @@ bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
return false; return false;
} }
template<typename It> template<typename It, typename IdleFunc = NoopIdle>
bool regex_search(It begin, It end, It subject_begin, It subject_end, const Regex& re, bool regex_search(It begin, It end, It subject_begin, It subject_end, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None) RegexExecFlags flags = RegexExecFlags::None, IdleFunc&& idle_func = {})
{ {
ThreadedRegexVM<It, RegexMode::Forward | RegexMode::Search | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()}; ThreadedRegexVM<It, RegexMode::Forward | RegexMode::Search | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()};
return vm.exec(begin, end, subject_begin, subject_end, flags); return vm.exec(begin, end, subject_begin, subject_end, flags, idle_func);
} }
template<typename It, RegexMode mode = RegexMode::Forward> template<typename It, RegexMode mode = RegexMode::Forward, typename IdleFunc = NoopIdle>
bool regex_search(It begin, It end, It subject_begin, It subject_end, bool regex_search(It begin, It end, It subject_begin, It subject_end,
MatchResults<It>& res, const Regex& re, MatchResults<It>& res, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None) RegexExecFlags flags = RegexExecFlags::None,
IdleFunc&& idle_func = {})
{ {
res.values().clear(); res.values().clear();
ThreadedRegexVM<It, mode | RegexMode::Search> vm{*re.impl()}; ThreadedRegexVM<It, mode | RegexMode::Search> vm{*re.impl()};
if (vm.exec(begin, end, subject_begin, subject_end, flags)) if (vm.exec(begin, end, subject_begin, subject_end, flags, idle_func))
{ {
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values())); std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
return true; return true;
@ -157,19 +163,20 @@ bool regex_search(It begin, It end, It subject_begin, It subject_end,
return false; return false;
} }
template<typename It> template<typename It, typename IdleFunc = NoopIdle>
bool backward_regex_search(It begin, It end, It subject_begin, It subject_end, bool backward_regex_search(It begin, It end, It subject_begin, It subject_end,
MatchResults<It>& res, const Regex& re, MatchResults<It>& res, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None) RegexExecFlags flags = RegexExecFlags::None,
IdleFunc&& idle_func = {})
{ {
return regex_search<It, RegexMode::Backward>(begin, end, subject_begin, subject_end, res, re, flags); return regex_search<It, RegexMode::Backward>(begin, end, subject_begin, subject_end, res, re, flags, idle_func);
} }
String option_to_string(const Regex& re); String option_to_string(const Regex& re);
Regex option_from_string(Meta::Type<Regex>, StringView str); Regex option_from_string(Meta::Type<Regex>, StringView str);
template<typename Iterator, RegexMode mode = RegexMode::Forward, template<typename Iterator, RegexMode mode = RegexMode::Forward,
typename VmArg = const Regex> typename VmArg = const Regex, typename IdleFunc = NoopIdle>
struct RegexIterator struct RegexIterator
{ {
static_assert(has_direction(mode)); static_assert(has_direction(mode));
@ -193,15 +200,17 @@ struct RegexIterator
RegexIterator(Iterator begin, Iterator end, RegexIterator(Iterator begin, Iterator end,
Iterator subject_begin, Iterator subject_end, Iterator subject_begin, Iterator subject_end,
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None) VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None,
IdleFunc idle_func = {})
: m_vm{make_vm(vm_arg)}, m_next_pos{forward ? begin : end}, : m_vm{make_vm(vm_arg)}, m_next_pos{forward ? begin : end},
m_begin{std::move(begin)}, m_end{std::move(end)}, m_begin{std::move(begin)}, m_end{std::move(end)},
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)}, m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
m_flags{flags} {} m_flags{flags}, m_idle_func{idle_func} {}
RegexIterator(const Iterator& begin, const Iterator& end, RegexIterator(const Iterator& begin, const Iterator& end,
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None) VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None,
: RegexIterator{begin, end, begin, end, vm_arg, flags} {} IdleFunc idle_func = {})
: RegexIterator{begin, end, begin, end, vm_arg, flags, idle_func} {}
It begin() { return {*this}; } It begin() { return {*this}; }
Sentinel end() const { return {}; } Sentinel end() const { return {}; }
@ -214,7 +223,8 @@ private:
additional_flags |= RegexExecFlags::NotInitialNull; additional_flags |= RegexExecFlags::NotInitialNull;
if (not m_vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos, if (not m_vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
m_subject_begin, m_subject_end, m_flags | additional_flags)) m_subject_begin, m_subject_end, m_flags | additional_flags,
m_idle_func))
return false; return false;
m_results.values().clear(); m_results.values().clear();
@ -236,6 +246,7 @@ private:
const Iterator m_subject_begin{}; const Iterator m_subject_begin{};
const Iterator m_subject_end{}; const Iterator m_subject_end{};
const RegexExecFlags m_flags = RegexExecFlags::None; const RegexExecFlags m_flags = RegexExecFlags::None;
IdleFunc m_idle_func;
}; };
} }

View File

@ -243,6 +243,13 @@ public:
bool exec(const Iterator& begin, const Iterator& end, bool exec(const Iterator& begin, const Iterator& end,
const Iterator& subject_begin, const Iterator& subject_end, const Iterator& subject_begin, const Iterator& subject_end,
RegexExecFlags flags) RegexExecFlags flags)
{
return exec(begin, end, subject_begin, subject_end, flags, []{});
}
bool exec(const Iterator& begin, const Iterator& end,
const Iterator& subject_begin, const Iterator& subject_end,
RegexExecFlags flags, auto&& idle_func)
{ {
if (flags & RegexExecFlags::NotInitialNull and begin == end) if (flags & RegexExecFlags::NotInitialNull and begin == end)
return false; return false;
@ -274,7 +281,7 @@ public:
} }
} }
return exec_program(std::move(start), config); return exec_program(std::move(start), config, idle_func);
} }
ArrayView<const Iterator> captures() const ArrayView<const Iterator> captures() const
@ -453,7 +460,7 @@ private:
return failed(); return failed();
} }
bool exec_program(Iterator pos, const ExecConfig& config) bool exec_program(Iterator pos, const ExecConfig& config, auto&& idle_func)
{ {
kak_assert(m_threads.current_is_empty() and m_threads.next_is_empty()); kak_assert(m_threads.current_is_empty() and m_threads.next_is_empty());
release_saves(m_captures); release_saves(m_captures);
@ -473,6 +480,8 @@ private:
{ {
if (++current_step == 0) if (++current_step == 0)
{ {
idle_func();
// We wrapped, avoid potential collision on inst.last_step by resetting them // We wrapped, avoid potential collision on inst.last_step by resetting them
ConstArrayView<CompiledRegex::Instruction> instructions{m_program.instructions}; ConstArrayView<CompiledRegex::Instruction> instructions{m_program.instructions};
instructions = forward ? instructions.subrange(0, m_program.first_backward_inst) instructions = forward ? instructions.subrange(0, m_program.first_backward_inst)