Regex: Make ThreadedRegexVM a proper class, define a proper interface
This commit is contained in:
parent
3b69dda04e
commit
236751cb84
|
@ -931,7 +931,7 @@ auto test_regex = UnitTest{[]{
|
||||||
{
|
{
|
||||||
TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"};
|
TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"};
|
||||||
kak_assert(vm.exec("fooquxbarbaz"));
|
kak_assert(vm.exec("fooquxbarbaz"));
|
||||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "qux");
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "qux");
|
||||||
kak_assert(not vm.exec("fooquxbarbaze"));
|
kak_assert(not vm.exec("fooquxbarbaze"));
|
||||||
kak_assert(not vm.exec("quxbar"));
|
kak_assert(not vm.exec("quxbar"));
|
||||||
kak_assert(not vm.exec("blahblah"));
|
kak_assert(not vm.exec("blahblah"));
|
||||||
|
@ -942,7 +942,7 @@ auto test_regex = UnitTest{[]{
|
||||||
{
|
{
|
||||||
TestVM vm{R"(.*\b(foo|bar)\b.*)"};
|
TestVM vm{R"(.*\b(foo|bar)\b.*)"};
|
||||||
kak_assert(vm.exec("qux foo baz"));
|
kak_assert(vm.exec("qux foo baz"));
|
||||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "foo");
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "foo");
|
||||||
kak_assert(not vm.exec("quxfoobaz"));
|
kak_assert(not vm.exec("quxfoobaz"));
|
||||||
kak_assert(vm.exec("bar"));
|
kak_assert(vm.exec("bar"));
|
||||||
kak_assert(not vm.exec("foobar"));
|
kak_assert(not vm.exec("foobar"));
|
||||||
|
@ -988,11 +988,11 @@ auto test_regex = UnitTest{[]{
|
||||||
{
|
{
|
||||||
TestVM vm{R"(f.*a(.*o))"};
|
TestVM vm{R"(f.*a(.*o))"};
|
||||||
kak_assert(vm.exec("blahfoobarfoobaz", false, true));
|
kak_assert(vm.exec("blahfoobarfoobaz", false, true));
|
||||||
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "foobarfoo");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foobarfoo");
|
||||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "rfoo");
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "rfoo");
|
||||||
kak_assert(vm.exec("mais que fais la police", false, true));
|
kak_assert(vm.exec("mais que fais la police", false, true));
|
||||||
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "fais la po");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "fais la po");
|
||||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == " po");
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == " po");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -1006,13 +1006,13 @@ auto test_regex = UnitTest{[]{
|
||||||
{
|
{
|
||||||
TestVM vm{R"((a{3,5})a+)"};
|
TestVM vm{R"((a{3,5})a+)"};
|
||||||
kak_assert(vm.exec("aaaaaa", true, true));
|
kak_assert(vm.exec("aaaaaa", true, true));
|
||||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaaaa");
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "aaaaa");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM vm{R"((a{3,5}?)a+)"};
|
TestVM vm{R"((a{3,5}?)a+)"};
|
||||||
kak_assert(vm.exec("aaaaaa", true, true));
|
kak_assert(vm.exec("aaaaaa", true, true));
|
||||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaa");
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "aaa");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -1052,20 +1052,20 @@ auto test_regex = UnitTest{[]{
|
||||||
{
|
{
|
||||||
TestVM vm{R"(foo\Kbar)"};
|
TestVM vm{R"(foo\Kbar)"};
|
||||||
kak_assert(vm.exec("foobar", true, true));
|
kak_assert(vm.exec("foobar", true, true));
|
||||||
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "bar");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar");
|
||||||
kak_assert(not vm.exec("bar", true, true));
|
kak_assert(not vm.exec("bar", true, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM vm{R"((fo+?).*)"};
|
TestVM vm{R"((fo+?).*)"};
|
||||||
kak_assert(vm.exec("foooo", true, true));
|
kak_assert(vm.exec("foooo", true, true));
|
||||||
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "fo");
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "fo");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM vm{R"((?=foo).)"};
|
TestVM vm{R"((?=foo).)"};
|
||||||
kak_assert(vm.exec("barfoo", false, true));
|
kak_assert(vm.exec("barfoo", false, true));
|
||||||
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "f");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -1111,7 +1111,7 @@ auto test_regex = UnitTest{[]{
|
||||||
{
|
{
|
||||||
TestVM vm{R"($)"};
|
TestVM vm{R"($)"};
|
||||||
kak_assert(vm.exec("foo\n", false, true));
|
kak_assert(vm.exec("foo\n", false, true));
|
||||||
kak_assert(*vm.m_captures->pos[0] == '\n');
|
kak_assert(*vm.captures()[0] == '\n');
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
|
|
||||||
|
|
|
@ -66,12 +66,14 @@ enum class RegexExecFlags
|
||||||
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
|
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
|
||||||
|
|
||||||
template<typename Iterator>
|
template<typename Iterator>
|
||||||
struct ThreadedRegexVM
|
class ThreadedRegexVM
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
ThreadedRegexVM(const CompiledRegex& program)
|
ThreadedRegexVM(const CompiledRegex& program)
|
||||||
: m_program{program} { kak_assert(m_program); }
|
: m_program{program} { kak_assert(m_program); }
|
||||||
|
|
||||||
ThreadedRegexVM(const ThreadedRegexVM&) = delete;
|
ThreadedRegexVM(const ThreadedRegexVM&) = delete;
|
||||||
|
ThreadedRegexVM& operator=(const ThreadedRegexVM&) = delete;
|
||||||
|
|
||||||
~ThreadedRegexVM()
|
~ThreadedRegexVM()
|
||||||
{
|
{
|
||||||
|
@ -83,6 +85,52 @@ struct ThreadedRegexVM
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
|
||||||
|
{
|
||||||
|
m_begin = begin;
|
||||||
|
m_end = end;
|
||||||
|
m_flags = flags;
|
||||||
|
|
||||||
|
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Vector<Thread> current_threads, next_threads;
|
||||||
|
|
||||||
|
const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
|
||||||
|
Utf8It start{m_begin, m_begin, m_end};
|
||||||
|
|
||||||
|
const bool* start_chars = m_program.start_chars ? m_program.start_chars->map : nullptr;
|
||||||
|
|
||||||
|
if (flags & RegexExecFlags::Search)
|
||||||
|
to_next_start(start, end, start_chars);
|
||||||
|
|
||||||
|
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
|
||||||
|
current_threads, next_threads))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (not (flags & RegexExecFlags::Search))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
to_next_start(++start, end, start_chars);
|
||||||
|
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
|
||||||
|
current_threads, next_threads))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
while (start != end);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ArrayView<const Iterator> captures() const
|
||||||
|
{
|
||||||
|
if (m_captures)
|
||||||
|
return { m_captures->pos, m_program.save_count };
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
struct Saves
|
struct Saves
|
||||||
{
|
{
|
||||||
int refcount;
|
int refcount;
|
||||||
|
@ -325,44 +373,6 @@ struct ThreadedRegexVM
|
||||||
++start;
|
++start;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
|
|
||||||
{
|
|
||||||
m_begin = begin;
|
|
||||||
m_end = end;
|
|
||||||
m_flags = flags;
|
|
||||||
|
|
||||||
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
Vector<Thread> current_threads, next_threads;
|
|
||||||
|
|
||||||
const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
|
|
||||||
Utf8It start{m_begin, m_begin, m_end};
|
|
||||||
|
|
||||||
const bool* start_chars = m_program.start_chars ? m_program.start_chars->map : nullptr;
|
|
||||||
|
|
||||||
if (flags & RegexExecFlags::Search)
|
|
||||||
to_next_start(start, end, start_chars);
|
|
||||||
|
|
||||||
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
|
|
||||||
current_threads, next_threads))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (not (flags & RegexExecFlags::Search))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
to_next_start(++start, end, start_chars);
|
|
||||||
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
|
|
||||||
current_threads, next_threads))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
while (start != end);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_line_start(const Utf8It& pos) const
|
bool is_line_start(const Utf8It& pos) const
|
||||||
{
|
{
|
||||||
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or
|
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or
|
||||||
|
@ -409,7 +419,7 @@ bool regex_match(It begin, It end, Vector<It>& captures, const CompiledRegex& re
|
||||||
ThreadedRegexVM<It> vm{re};
|
ThreadedRegexVM<It> vm{re};
|
||||||
if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search)))
|
if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search)))
|
||||||
{
|
{
|
||||||
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
|
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -430,7 +440,7 @@ bool regex_search(It begin, It end, Vector<It>& captures, const CompiledRegex& r
|
||||||
ThreadedRegexVM<It> vm{re};
|
ThreadedRegexVM<It> vm{re};
|
||||||
if (vm.exec(begin, end, flags | RegexExecFlags::Search))
|
if (vm.exec(begin, end, flags | RegexExecFlags::Search))
|
||||||
{
|
{
|
||||||
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
|
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user