Regex: Make ThreadedRegexVM a proper class, define a proper interface

This commit is contained in:
Maxime Coste 2017-10-06 19:30:46 +08:00
parent 3b69dda04e
commit 236751cb84
2 changed files with 63 additions and 53 deletions

View File

@ -931,7 +931,7 @@ auto test_regex = UnitTest{[]{
{ {
TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"}; TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"};
kak_assert(vm.exec("fooquxbarbaz")); kak_assert(vm.exec("fooquxbarbaz"));
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "qux"); kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "qux");
kak_assert(not vm.exec("fooquxbarbaze")); kak_assert(not vm.exec("fooquxbarbaze"));
kak_assert(not vm.exec("quxbar")); kak_assert(not vm.exec("quxbar"));
kak_assert(not vm.exec("blahblah")); kak_assert(not vm.exec("blahblah"));
@ -942,7 +942,7 @@ auto test_regex = UnitTest{[]{
{ {
TestVM vm{R"(.*\b(foo|bar)\b.*)"}; TestVM vm{R"(.*\b(foo|bar)\b.*)"};
kak_assert(vm.exec("qux foo baz")); kak_assert(vm.exec("qux foo baz"));
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "foo"); kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "foo");
kak_assert(not vm.exec("quxfoobaz")); kak_assert(not vm.exec("quxfoobaz"));
kak_assert(vm.exec("bar")); kak_assert(vm.exec("bar"));
kak_assert(not vm.exec("foobar")); kak_assert(not vm.exec("foobar"));
@ -988,11 +988,11 @@ auto test_regex = UnitTest{[]{
{ {
TestVM vm{R"(f.*a(.*o))"}; TestVM vm{R"(f.*a(.*o))"};
kak_assert(vm.exec("blahfoobarfoobaz", false, true)); kak_assert(vm.exec("blahfoobarfoobaz", false, true));
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "foobarfoo"); kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foobarfoo");
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "rfoo"); kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "rfoo");
kak_assert(vm.exec("mais que fais la police", false, true)); kak_assert(vm.exec("mais que fais la police", false, true));
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "fais la po"); kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "fais la po");
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == " po"); kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == " po");
} }
{ {
@ -1006,13 +1006,13 @@ auto test_regex = UnitTest{[]{
{ {
TestVM vm{R"((a{3,5})a+)"}; TestVM vm{R"((a{3,5})a+)"};
kak_assert(vm.exec("aaaaaa", true, true)); kak_assert(vm.exec("aaaaaa", true, true));
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaaaa"); kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "aaaaa");
} }
{ {
TestVM vm{R"((a{3,5}?)a+)"}; TestVM vm{R"((a{3,5}?)a+)"};
kak_assert(vm.exec("aaaaaa", true, true)); kak_assert(vm.exec("aaaaaa", true, true));
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaa"); kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "aaa");
} }
{ {
@ -1052,20 +1052,20 @@ auto test_regex = UnitTest{[]{
{ {
TestVM vm{R"(foo\Kbar)"}; TestVM vm{R"(foo\Kbar)"};
kak_assert(vm.exec("foobar", true, true)); kak_assert(vm.exec("foobar", true, true));
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "bar"); kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar");
kak_assert(not vm.exec("bar", true, true)); kak_assert(not vm.exec("bar", true, true));
} }
{ {
TestVM vm{R"((fo+?).*)"}; TestVM vm{R"((fo+?).*)"};
kak_assert(vm.exec("foooo", true, true)); kak_assert(vm.exec("foooo", true, true));
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "fo"); kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "fo");
} }
{ {
TestVM vm{R"((?=foo).)"}; TestVM vm{R"((?=foo).)"};
kak_assert(vm.exec("barfoo", false, true)); kak_assert(vm.exec("barfoo", false, true));
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "f"); kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f");
} }
{ {
@ -1111,7 +1111,7 @@ auto test_regex = UnitTest{[]{
{ {
TestVM vm{R"($)"}; TestVM vm{R"($)"};
kak_assert(vm.exec("foo\n", false, true)); kak_assert(vm.exec("foo\n", false, true));
kak_assert(*vm.m_captures->pos[0] == '\n'); kak_assert(*vm.captures()[0] == '\n');
} }
}}; }};

View File

@ -66,12 +66,14 @@ enum class RegexExecFlags
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; } constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
template<typename Iterator> template<typename Iterator>
struct ThreadedRegexVM class ThreadedRegexVM
{ {
public:
ThreadedRegexVM(const CompiledRegex& program) ThreadedRegexVM(const CompiledRegex& program)
: m_program{program} { kak_assert(m_program); } : m_program{program} { kak_assert(m_program); }
ThreadedRegexVM(const ThreadedRegexVM&) = delete; ThreadedRegexVM(const ThreadedRegexVM&) = delete;
ThreadedRegexVM& operator=(const ThreadedRegexVM&) = delete;
~ThreadedRegexVM() ~ThreadedRegexVM()
{ {
@ -83,6 +85,52 @@ struct ThreadedRegexVM
} }
} }
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
{
m_begin = begin;
m_end = end;
m_flags = flags;
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
return false;
Vector<Thread> current_threads, next_threads;
const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
Utf8It start{m_begin, m_begin, m_end};
const bool* start_chars = m_program.start_chars ? m_program.start_chars->map : nullptr;
if (flags & RegexExecFlags::Search)
to_next_start(start, end, start_chars);
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
current_threads, next_threads))
return true;
if (not (flags & RegexExecFlags::Search))
return false;
do
{
to_next_start(++start, end, start_chars);
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
current_threads, next_threads))
return true;
}
while (start != end);
return false;
}
ArrayView<const Iterator> captures() const
{
if (m_captures)
return { m_captures->pos, m_program.save_count };
return {};
}
private:
struct Saves struct Saves
{ {
int refcount; int refcount;
@ -325,44 +373,6 @@ struct ThreadedRegexVM
++start; ++start;
} }
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
{
m_begin = begin;
m_end = end;
m_flags = flags;
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
return false;
Vector<Thread> current_threads, next_threads;
const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
Utf8It start{m_begin, m_begin, m_end};
const bool* start_chars = m_program.start_chars ? m_program.start_chars->map : nullptr;
if (flags & RegexExecFlags::Search)
to_next_start(start, end, start_chars);
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
current_threads, next_threads))
return true;
if (not (flags & RegexExecFlags::Search))
return false;
do
{
to_next_start(++start, end, start_chars);
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
current_threads, next_threads))
return true;
}
while (start != end);
return false;
}
bool is_line_start(const Utf8It& pos) const bool is_line_start(const Utf8It& pos) const
{ {
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or
@ -409,7 +419,7 @@ bool regex_match(It begin, It end, Vector<It>& captures, const CompiledRegex& re
ThreadedRegexVM<It> vm{re}; ThreadedRegexVM<It> vm{re};
if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search))) if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search)))
{ {
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures)); std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
return true; return true;
} }
return false; return false;
@ -430,7 +440,7 @@ bool regex_search(It begin, It end, Vector<It>& captures, const CompiledRegex& r
ThreadedRegexVM<It> vm{re}; ThreadedRegexVM<It> vm{re};
if (vm.exec(begin, end, flags | RegexExecFlags::Search)) if (vm.exec(begin, end, flags | RegexExecFlags::Search))
{ {
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures)); std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
return true; return true;
} }
return false; return false;