Regex: Make ThreadedRegexVM a proper class, define a proper interface

This commit is contained in:
Maxime Coste 2017-10-06 19:30:46 +08:00
parent 3b69dda04e
commit 236751cb84
2 changed files with 63 additions and 53 deletions

View File

@ -931,7 +931,7 @@ auto test_regex = UnitTest{[]{
{
TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"};
kak_assert(vm.exec("fooquxbarbaz"));
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "qux");
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "qux");
kak_assert(not vm.exec("fooquxbarbaze"));
kak_assert(not vm.exec("quxbar"));
kak_assert(not vm.exec("blahblah"));
@ -942,7 +942,7 @@ auto test_regex = UnitTest{[]{
{
TestVM vm{R"(.*\b(foo|bar)\b.*)"};
kak_assert(vm.exec("qux foo baz"));
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "foo");
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "foo");
kak_assert(not vm.exec("quxfoobaz"));
kak_assert(vm.exec("bar"));
kak_assert(not vm.exec("foobar"));
@ -988,11 +988,11 @@ auto test_regex = UnitTest{[]{
{
TestVM vm{R"(f.*a(.*o))"};
kak_assert(vm.exec("blahfoobarfoobaz", false, true));
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "foobarfoo");
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "rfoo");
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foobarfoo");
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "rfoo");
kak_assert(vm.exec("mais que fais la police", false, true));
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "fais la po");
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == " po");
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "fais la po");
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == " po");
}
{
@ -1006,13 +1006,13 @@ auto test_regex = UnitTest{[]{
{
TestVM vm{R"((a{3,5})a+)"};
kak_assert(vm.exec("aaaaaa", true, true));
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaaaa");
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "aaaaa");
}
{
TestVM vm{R"((a{3,5}?)a+)"};
kak_assert(vm.exec("aaaaaa", true, true));
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaa");
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "aaa");
}
{
@ -1052,20 +1052,20 @@ auto test_regex = UnitTest{[]{
{
TestVM vm{R"(foo\Kbar)"};
kak_assert(vm.exec("foobar", true, true));
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "bar");
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar");
kak_assert(not vm.exec("bar", true, true));
}
{
TestVM vm{R"((fo+?).*)"};
kak_assert(vm.exec("foooo", true, true));
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "fo");
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "fo");
}
{
TestVM vm{R"((?=foo).)"};
kak_assert(vm.exec("barfoo", false, true));
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "f");
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f");
}
{
@ -1111,7 +1111,7 @@ auto test_regex = UnitTest{[]{
{
TestVM vm{R"($)"};
kak_assert(vm.exec("foo\n", false, true));
kak_assert(*vm.m_captures->pos[0] == '\n');
kak_assert(*vm.captures()[0] == '\n');
}
}};

View File

@ -66,12 +66,14 @@ enum class RegexExecFlags
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
template<typename Iterator>
struct ThreadedRegexVM
class ThreadedRegexVM
{
public:
ThreadedRegexVM(const CompiledRegex& program)
: m_program{program} { kak_assert(m_program); }
ThreadedRegexVM(const ThreadedRegexVM&) = delete;
ThreadedRegexVM& operator=(const ThreadedRegexVM&) = delete;
~ThreadedRegexVM()
{
@ -83,6 +85,52 @@ struct ThreadedRegexVM
}
}
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
{
m_begin = begin;
m_end = end;
m_flags = flags;
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
return false;
Vector<Thread> current_threads, next_threads;
const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
Utf8It start{m_begin, m_begin, m_end};
const bool* start_chars = m_program.start_chars ? m_program.start_chars->map : nullptr;
if (flags & RegexExecFlags::Search)
to_next_start(start, end, start_chars);
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
current_threads, next_threads))
return true;
if (not (flags & RegexExecFlags::Search))
return false;
do
{
to_next_start(++start, end, start_chars);
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
current_threads, next_threads))
return true;
}
while (start != end);
return false;
}
ArrayView<const Iterator> captures() const
{
if (m_captures)
return { m_captures->pos, m_program.save_count };
return {};
}
private:
struct Saves
{
int refcount;
@ -325,44 +373,6 @@ struct ThreadedRegexVM
++start;
}
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
{
m_begin = begin;
m_end = end;
m_flags = flags;
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
return false;
Vector<Thread> current_threads, next_threads;
const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
Utf8It start{m_begin, m_begin, m_end};
const bool* start_chars = m_program.start_chars ? m_program.start_chars->map : nullptr;
if (flags & RegexExecFlags::Search)
to_next_start(start, end, start_chars);
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
current_threads, next_threads))
return true;
if (not (flags & RegexExecFlags::Search))
return false;
do
{
to_next_start(++start, end, start_chars);
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
current_threads, next_threads))
return true;
}
while (start != end);
return false;
}
bool is_line_start(const Utf8It& pos) const
{
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or
@ -409,7 +419,7 @@ bool regex_match(It begin, It end, Vector<It>& captures, const CompiledRegex& re
ThreadedRegexVM<It> vm{re};
if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search)))
{
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
return true;
}
return false;
@ -430,7 +440,7 @@ bool regex_search(It begin, It end, Vector<It>& captures, const CompiledRegex& r
ThreadedRegexVM<It> vm{re};
if (vm.exec(begin, end, flags | RegexExecFlags::Search))
{
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
return true;
}
return false;