Add an idle callback to be called regularly while regex matching

This paves the way towards being able to cancel long regex matching
operations
This commit is contained in:
Maxime Coste 2023-05-15 20:38:10 +10:00
parent 19b4149d47
commit e140df8f08
2 changed files with 43 additions and 23 deletions

View File

@ -114,19 +114,24 @@ inline RegexExecFlags match_flags(bool bol, bool eol, bool bow, bool eow)
(eow ? RegexExecFlags::None : RegexExecFlags::NotEndOfWord);
}
template<typename It>
bool regex_match(It begin, It end, const Regex& re)
struct NoopIdle
{
void operator()() {}
};
template<typename It, typename IdleFunc = NoopIdle>
bool regex_match(It begin, It end, const Regex& re, IdleFunc&& idle_func = {})
{
ThreadedRegexVM<It, RegexMode::Forward | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()};
return vm.exec(begin, end, begin, end, RegexExecFlags::None);
return vm.exec(begin, end, begin, end, RegexExecFlags::None, idle_func);
}
template<typename It>
bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
template<typename It, typename IdleFunc = NoopIdle>
bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re, IdleFunc&& idle_func = {})
{
res.values().clear();
ThreadedRegexVM<It, RegexMode::Forward> vm{*re.impl()};
if (vm.exec(begin, end, begin, end, RegexExecFlags::None))
if (vm.exec(begin, end, begin, end, RegexExecFlags::None, idle_func))
{
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
return true;
@ -134,22 +139,23 @@ bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
return false;
}
template<typename It>
template<typename It, typename IdleFunc = NoopIdle>
bool regex_search(It begin, It end, It subject_begin, It subject_end, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None)
RegexExecFlags flags = RegexExecFlags::None, IdleFunc&& idle_func = {})
{
ThreadedRegexVM<It, RegexMode::Forward | RegexMode::Search | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()};
return vm.exec(begin, end, subject_begin, subject_end, flags);
return vm.exec(begin, end, subject_begin, subject_end, flags, idle_func);
}
template<typename It, RegexMode mode = RegexMode::Forward>
template<typename It, RegexMode mode = RegexMode::Forward, typename IdleFunc = NoopIdle>
bool regex_search(It begin, It end, It subject_begin, It subject_end,
MatchResults<It>& res, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None)
RegexExecFlags flags = RegexExecFlags::None,
IdleFunc&& idle_func = {})
{
res.values().clear();
ThreadedRegexVM<It, mode | RegexMode::Search> vm{*re.impl()};
if (vm.exec(begin, end, subject_begin, subject_end, flags))
if (vm.exec(begin, end, subject_begin, subject_end, flags, idle_func))
{
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
return true;
@ -157,19 +163,20 @@ bool regex_search(It begin, It end, It subject_begin, It subject_end,
return false;
}
template<typename It>
template<typename It, typename IdleFunc = NoopIdle>
bool backward_regex_search(It begin, It end, It subject_begin, It subject_end,
MatchResults<It>& res, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None)
RegexExecFlags flags = RegexExecFlags::None,
IdleFunc&& idle_func = {})
{
return regex_search<It, RegexMode::Backward>(begin, end, subject_begin, subject_end, res, re, flags);
return regex_search<It, RegexMode::Backward>(begin, end, subject_begin, subject_end, res, re, flags, idle_func);
}
String option_to_string(const Regex& re);
Regex option_from_string(Meta::Type<Regex>, StringView str);
template<typename Iterator, RegexMode mode = RegexMode::Forward,
typename VmArg = const Regex>
typename VmArg = const Regex, typename IdleFunc = NoopIdle>
struct RegexIterator
{
static_assert(has_direction(mode));
@ -193,15 +200,17 @@ struct RegexIterator
RegexIterator(Iterator begin, Iterator end,
Iterator subject_begin, Iterator subject_end,
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None)
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None,
IdleFunc idle_func = {})
: m_vm{make_vm(vm_arg)}, m_next_pos{forward ? begin : end},
m_begin{std::move(begin)}, m_end{std::move(end)},
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
m_flags{flags} {}
m_flags{flags}, m_idle_func{idle_func} {}
RegexIterator(const Iterator& begin, const Iterator& end,
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None)
: RegexIterator{begin, end, begin, end, vm_arg, flags} {}
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None,
IdleFunc idle_func = {})
: RegexIterator{begin, end, begin, end, vm_arg, flags, idle_func} {}
It begin() { return {*this}; }
Sentinel end() const { return {}; }
@ -214,7 +223,8 @@ private:
additional_flags |= RegexExecFlags::NotInitialNull;
if (not m_vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
m_subject_begin, m_subject_end, m_flags | additional_flags))
m_subject_begin, m_subject_end, m_flags | additional_flags,
m_idle_func))
return false;
m_results.values().clear();
@ -236,6 +246,7 @@ private:
const Iterator m_subject_begin{};
const Iterator m_subject_end{};
const RegexExecFlags m_flags = RegexExecFlags::None;
IdleFunc m_idle_func;
};
}

View File

@ -243,6 +243,13 @@ public:
bool exec(const Iterator& begin, const Iterator& end,
const Iterator& subject_begin, const Iterator& subject_end,
RegexExecFlags flags)
{
return exec(begin, end, subject_begin, subject_end, flags, []{});
}
bool exec(const Iterator& begin, const Iterator& end,
const Iterator& subject_begin, const Iterator& subject_end,
RegexExecFlags flags, auto&& idle_func)
{
if (flags & RegexExecFlags::NotInitialNull and begin == end)
return false;
@ -274,7 +281,7 @@ public:
}
}
return exec_program(std::move(start), config);
return exec_program(std::move(start), config, idle_func);
}
ArrayView<const Iterator> captures() const
@ -453,7 +460,7 @@ private:
return failed();
}
bool exec_program(Iterator pos, const ExecConfig& config)
bool exec_program(Iterator pos, const ExecConfig& config, auto&& idle_func)
{
kak_assert(m_threads.current_is_empty() and m_threads.next_is_empty());
release_saves(m_captures);
@ -473,6 +480,8 @@ private:
{
if (++current_step == 0)
{
idle_func();
// We wrapped, avoid potential collision on inst.last_step by resetting them
ConstArrayView<CompiledRegex::Instruction> instructions{m_program.instructions};
instructions = forward ? instructions.subrange(0, m_program.first_backward_inst)