Regex: Make ThreadedRegexVM a proper class, define a proper interface
This commit is contained in:
parent
3b69dda04e
commit
236751cb84
|
@ -931,7 +931,7 @@ auto test_regex = UnitTest{[]{
|
|||
{
|
||||
TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"};
|
||||
kak_assert(vm.exec("fooquxbarbaz"));
|
||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "qux");
|
||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "qux");
|
||||
kak_assert(not vm.exec("fooquxbarbaze"));
|
||||
kak_assert(not vm.exec("quxbar"));
|
||||
kak_assert(not vm.exec("blahblah"));
|
||||
|
@ -942,7 +942,7 @@ auto test_regex = UnitTest{[]{
|
|||
{
|
||||
TestVM vm{R"(.*\b(foo|bar)\b.*)"};
|
||||
kak_assert(vm.exec("qux foo baz"));
|
||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "foo");
|
||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "foo");
|
||||
kak_assert(not vm.exec("quxfoobaz"));
|
||||
kak_assert(vm.exec("bar"));
|
||||
kak_assert(not vm.exec("foobar"));
|
||||
|
@ -988,11 +988,11 @@ auto test_regex = UnitTest{[]{
|
|||
{
|
||||
TestVM vm{R"(f.*a(.*o))"};
|
||||
kak_assert(vm.exec("blahfoobarfoobaz", false, true));
|
||||
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "foobarfoo");
|
||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "rfoo");
|
||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foobarfoo");
|
||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "rfoo");
|
||||
kak_assert(vm.exec("mais que fais la police", false, true));
|
||||
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "fais la po");
|
||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == " po");
|
||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "fais la po");
|
||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == " po");
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -1006,13 +1006,13 @@ auto test_regex = UnitTest{[]{
|
|||
{
|
||||
TestVM vm{R"((a{3,5})a+)"};
|
||||
kak_assert(vm.exec("aaaaaa", true, true));
|
||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaaaa");
|
||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "aaaaa");
|
||||
}
|
||||
|
||||
{
|
||||
TestVM vm{R"((a{3,5}?)a+)"};
|
||||
kak_assert(vm.exec("aaaaaa", true, true));
|
||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaa");
|
||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "aaa");
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -1052,20 +1052,20 @@ auto test_regex = UnitTest{[]{
|
|||
{
|
||||
TestVM vm{R"(foo\Kbar)"};
|
||||
kak_assert(vm.exec("foobar", true, true));
|
||||
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "bar");
|
||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar");
|
||||
kak_assert(not vm.exec("bar", true, true));
|
||||
}
|
||||
|
||||
{
|
||||
TestVM vm{R"((fo+?).*)"};
|
||||
kak_assert(vm.exec("foooo", true, true));
|
||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "fo");
|
||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "fo");
|
||||
}
|
||||
|
||||
{
|
||||
TestVM vm{R"((?=foo).)"};
|
||||
kak_assert(vm.exec("barfoo", false, true));
|
||||
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "f");
|
||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f");
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -1111,7 +1111,7 @@ auto test_regex = UnitTest{[]{
|
|||
{
|
||||
TestVM vm{R"($)"};
|
||||
kak_assert(vm.exec("foo\n", false, true));
|
||||
kak_assert(*vm.m_captures->pos[0] == '\n');
|
||||
kak_assert(*vm.captures()[0] == '\n');
|
||||
}
|
||||
}};
|
||||
|
||||
|
|
|
@ -66,12 +66,14 @@ enum class RegexExecFlags
|
|||
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
|
||||
|
||||
template<typename Iterator>
|
||||
struct ThreadedRegexVM
|
||||
class ThreadedRegexVM
|
||||
{
|
||||
public:
|
||||
ThreadedRegexVM(const CompiledRegex& program)
|
||||
: m_program{program} { kak_assert(m_program); }
|
||||
|
||||
ThreadedRegexVM(const ThreadedRegexVM&) = delete;
|
||||
ThreadedRegexVM& operator=(const ThreadedRegexVM&) = delete;
|
||||
|
||||
~ThreadedRegexVM()
|
||||
{
|
||||
|
@ -83,6 +85,52 @@ struct ThreadedRegexVM
|
|||
}
|
||||
}
|
||||
|
||||
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
|
||||
{
|
||||
m_begin = begin;
|
||||
m_end = end;
|
||||
m_flags = flags;
|
||||
|
||||
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
|
||||
return false;
|
||||
|
||||
Vector<Thread> current_threads, next_threads;
|
||||
|
||||
const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
|
||||
Utf8It start{m_begin, m_begin, m_end};
|
||||
|
||||
const bool* start_chars = m_program.start_chars ? m_program.start_chars->map : nullptr;
|
||||
|
||||
if (flags & RegexExecFlags::Search)
|
||||
to_next_start(start, end, start_chars);
|
||||
|
||||
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
|
||||
current_threads, next_threads))
|
||||
return true;
|
||||
|
||||
if (not (flags & RegexExecFlags::Search))
|
||||
return false;
|
||||
|
||||
do
|
||||
{
|
||||
to_next_start(++start, end, start_chars);
|
||||
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
|
||||
current_threads, next_threads))
|
||||
return true;
|
||||
}
|
||||
while (start != end);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ArrayView<const Iterator> captures() const
|
||||
{
|
||||
if (m_captures)
|
||||
return { m_captures->pos, m_program.save_count };
|
||||
return {};
|
||||
}
|
||||
|
||||
private:
|
||||
struct Saves
|
||||
{
|
||||
int refcount;
|
||||
|
@ -325,44 +373,6 @@ struct ThreadedRegexVM
|
|||
++start;
|
||||
}
|
||||
|
||||
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
|
||||
{
|
||||
m_begin = begin;
|
||||
m_end = end;
|
||||
m_flags = flags;
|
||||
|
||||
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
|
||||
return false;
|
||||
|
||||
Vector<Thread> current_threads, next_threads;
|
||||
|
||||
const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
|
||||
Utf8It start{m_begin, m_begin, m_end};
|
||||
|
||||
const bool* start_chars = m_program.start_chars ? m_program.start_chars->map : nullptr;
|
||||
|
||||
if (flags & RegexExecFlags::Search)
|
||||
to_next_start(start, end, start_chars);
|
||||
|
||||
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
|
||||
current_threads, next_threads))
|
||||
return true;
|
||||
|
||||
if (not (flags & RegexExecFlags::Search))
|
||||
return false;
|
||||
|
||||
do
|
||||
{
|
||||
to_next_start(++start, end, start_chars);
|
||||
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
|
||||
current_threads, next_threads))
|
||||
return true;
|
||||
}
|
||||
while (start != end);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_line_start(const Utf8It& pos) const
|
||||
{
|
||||
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or
|
||||
|
@ -409,7 +419,7 @@ bool regex_match(It begin, It end, Vector<It>& captures, const CompiledRegex& re
|
|||
ThreadedRegexVM<It> vm{re};
|
||||
if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search)))
|
||||
{
|
||||
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
|
||||
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -430,7 +440,7 @@ bool regex_search(It begin, It end, Vector<It>& captures, const CompiledRegex& r
|
|||
ThreadedRegexVM<It> vm{re};
|
||||
if (vm.exec(begin, end, flags | RegexExecFlags::Search))
|
||||
{
|
||||
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
|
||||
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
Loading…
Reference in New Issue
Block a user