Regex: Add a NoSaves RegexExecFlags to disable saving positions

This commit is contained in:
Maxime Coste 2017-10-03 19:07:44 +08:00
parent 119bc38254
commit 54da8098ae

View File

@ -57,7 +57,8 @@ enum class RegexExecFlags
NotEndOfWord = 1 << 4, NotEndOfWord = 1 << 4,
NotBeginOfSubject = 1 << 5, NotBeginOfSubject = 1 << 5,
NotInitialNull = 1 << 6, NotInitialNull = 1 << 6,
AnyMatch = 1 << 7 AnyMatch = 1 << 7,
NoSaves = 1 << 8,
}; };
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; } constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
@ -124,6 +125,7 @@ struct ThreadedRegexVM
auto parent = thread.inst + sizeof(CompiledRegex::Offset); auto parent = thread.inst + sizeof(CompiledRegex::Offset);
auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst); auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst);
thread.inst = parent; thread.inst = parent;
if (thread.saves)
++thread.saves->refcount; ++thread.saves->refcount;
m_current_threads.push_back({child, thread.saves}); m_current_threads.push_back({child, thread.saves});
break; break;
@ -133,12 +135,16 @@ struct ThreadedRegexVM
auto parent = thread.inst + sizeof(CompiledRegex::Offset); auto parent = thread.inst + sizeof(CompiledRegex::Offset);
auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst); auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst);
thread.inst = child; thread.inst = child;
if (thread.saves)
++thread.saves->refcount; ++thread.saves->refcount;
m_current_threads.push_back({parent, thread.saves}); m_current_threads.push_back({parent, thread.saves});
break; break;
} }
case CompiledRegex::Save: case CompiledRegex::Save:
{ {
if (thread.saves == nullptr)
break;
const char index = *thread.inst++; const char index = *thread.inst++;
if (thread.saves->refcount > 1) if (thread.saves->refcount > 1)
{ {
@ -213,23 +219,29 @@ struct ThreadedRegexVM
bool exec(Iterator begin, Iterator end, RegexExecFlags flags) bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
{ {
m_begin = begin;
m_end = end;
m_flags = flags;
bool found_match = false; bool found_match = false;
m_current_threads.clear(); m_current_threads.clear();
m_next_threads.clear(); m_next_threads.clear();
const auto start_offset = (flags & RegexExecFlags::Search) ? 0 : CompiledRegex::search_prefix_size; Saves* initial_saves = nullptr;
if (not (m_flags & RegexExecFlags::NoSaves))
{
m_saves.push_back(std::make_unique<Saves>(Saves{1, Vector<Iterator>(m_program.save_count, Iterator{})})); m_saves.push_back(std::make_unique<Saves>(Saves{1, Vector<Iterator>(m_program.save_count, Iterator{})}));
m_current_threads.push_back({m_program.bytecode.data() + start_offset, m_saves.back().get()}); initial_saves = m_saves.back().get();
}
m_begin = begin; const auto start_offset = (flags & RegexExecFlags::Search) ? 0 : CompiledRegex::search_prefix_size;
m_end = end; m_current_threads.push_back({m_program.bytecode.data() + start_offset, initial_saves});
m_flags = flags;
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end) if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
return false; return false;
auto release_saves = [this](Saves* saves) { auto release_saves = [this](Saves* saves) {
if (--saves->refcount == 0) if (saves and --saves->refcount == 0)
m_free_saves.push_back(saves); m_free_saves.push_back(saves);
}; };
@ -249,7 +261,9 @@ struct ThreadedRegexVM
continue; continue;
} }
if (thread.saves)
m_captures = std::move(thread.saves->pos); m_captures = std::move(thread.saves->pos);
if (flags & RegexExecFlags::AnyMatch) if (flags & RegexExecFlags::AnyMatch)
return true; return true;
@ -283,6 +297,7 @@ struct ThreadedRegexVM
m_current_threads.pop_back(); m_current_threads.pop_back();
if (step(thread) == StepResult::Matched) if (step(thread) == StepResult::Matched)
{ {
if (thread.saves)
m_captures = std::move(thread.saves->pos); m_captures = std::move(thread.saves->pos);
return true; return true;
} }
@ -330,7 +345,8 @@ template<typename It>
bool regex_match(It begin, It end, const CompiledRegex& re, RegexExecFlags flags = RegexExecFlags::None) bool regex_match(It begin, It end, const CompiledRegex& re, RegexExecFlags flags = RegexExecFlags::None)
{ {
ThreadedRegexVM<It> vm{re}; ThreadedRegexVM<It> vm{re};
return vm.exec(begin, end, (RegexExecFlags)(flags & ~(RegexExecFlags::Search)) | RegexExecFlags::AnyMatch); return vm.exec(begin, end, (RegexExecFlags)(flags & ~(RegexExecFlags::Search)) |
RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
} }
template<typename It> template<typename It>
@ -351,7 +367,7 @@ bool regex_search(It begin, It end, const CompiledRegex& re,
RegexExecFlags flags = RegexExecFlags::None) RegexExecFlags flags = RegexExecFlags::None)
{ {
ThreadedRegexVM<It> vm{re}; ThreadedRegexVM<It> vm{re};
return vm.exec(begin, end, flags | RegexExecFlags::Search | RegexExecFlags::AnyMatch); return vm.exec(begin, end, flags | RegexExecFlags::Search | RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
} }
template<typename It> template<typename It>