Regex: Use a custom allocated buffer for Saves instead of a Vector
This commit is contained in:
parent
1399563e40
commit
e0fac20f6c
|
@ -852,7 +852,7 @@ auto test_regex = UnitTest{[]{
|
||||||
{
|
{
|
||||||
TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"};
|
TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"};
|
||||||
kak_assert(vm.exec("fooquxbarbaz"));
|
kak_assert(vm.exec("fooquxbarbaz"));
|
||||||
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "qux");
|
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "qux");
|
||||||
kak_assert(not vm.exec("fooquxbarbaze"));
|
kak_assert(not vm.exec("fooquxbarbaze"));
|
||||||
kak_assert(not vm.exec("quxbar"));
|
kak_assert(not vm.exec("quxbar"));
|
||||||
kak_assert(not vm.exec("blahblah"));
|
kak_assert(not vm.exec("blahblah"));
|
||||||
|
@ -863,7 +863,7 @@ auto test_regex = UnitTest{[]{
|
||||||
{
|
{
|
||||||
TestVM vm{R"(.*\b(foo|bar)\b.*)"};
|
TestVM vm{R"(.*\b(foo|bar)\b.*)"};
|
||||||
kak_assert(vm.exec("qux foo baz"));
|
kak_assert(vm.exec("qux foo baz"));
|
||||||
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "foo");
|
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "foo");
|
||||||
kak_assert(not vm.exec("quxfoobaz"));
|
kak_assert(not vm.exec("quxfoobaz"));
|
||||||
kak_assert(vm.exec("bar"));
|
kak_assert(vm.exec("bar"));
|
||||||
kak_assert(not vm.exec("foobar"));
|
kak_assert(not vm.exec("foobar"));
|
||||||
|
@ -909,11 +909,11 @@ auto test_regex = UnitTest{[]{
|
||||||
{
|
{
|
||||||
TestVM vm{R"(f.*a(.*o))"};
|
TestVM vm{R"(f.*a(.*o))"};
|
||||||
kak_assert(vm.exec("blahfoobarfoobaz", false, true));
|
kak_assert(vm.exec("blahfoobarfoobaz", false, true));
|
||||||
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "foobarfoo");
|
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "foobarfoo");
|
||||||
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "rfoo");
|
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "rfoo");
|
||||||
kak_assert(vm.exec("mais que fais la police", false, true));
|
kak_assert(vm.exec("mais que fais la police", false, true));
|
||||||
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "fais la po");
|
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "fais la po");
|
||||||
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == " po");
|
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == " po");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -927,13 +927,13 @@ auto test_regex = UnitTest{[]{
|
||||||
{
|
{
|
||||||
TestVM vm{R"((a{3,5})a+)"};
|
TestVM vm{R"((a{3,5})a+)"};
|
||||||
kak_assert(vm.exec("aaaaaa", true, true));
|
kak_assert(vm.exec("aaaaaa", true, true));
|
||||||
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "aaaaa");
|
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaaaa");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM vm{R"((a{3,5}?)a+)"};
|
TestVM vm{R"((a{3,5}?)a+)"};
|
||||||
kak_assert(vm.exec("aaaaaa", true, true));
|
kak_assert(vm.exec("aaaaaa", true, true));
|
||||||
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "aaa");
|
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaa");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -973,20 +973,20 @@ auto test_regex = UnitTest{[]{
|
||||||
{
|
{
|
||||||
TestVM vm{R"(foo\Kbar)"};
|
TestVM vm{R"(foo\Kbar)"};
|
||||||
kak_assert(vm.exec("foobar", true, true));
|
kak_assert(vm.exec("foobar", true, true));
|
||||||
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "bar");
|
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "bar");
|
||||||
kak_assert(not vm.exec("bar", true, true));
|
kak_assert(not vm.exec("bar", true, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM vm{R"((fo+?).*)"};
|
TestVM vm{R"((fo+?).*)"};
|
||||||
kak_assert(vm.exec("foooo", true, true));
|
kak_assert(vm.exec("foooo", true, true));
|
||||||
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "fo");
|
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "fo");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM vm{R"((?=foo).)"};
|
TestVM vm{R"((?=foo).)"};
|
||||||
kak_assert(vm.exec("barfoo", false, true));
|
kak_assert(vm.exec("barfoo", false, true));
|
||||||
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "f");
|
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "f");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|
|
@ -69,10 +69,38 @@ struct ThreadedRegexVM
|
||||||
ThreadedRegexVM(const CompiledRegex& program)
|
ThreadedRegexVM(const CompiledRegex& program)
|
||||||
: m_program{program} { kak_assert(m_program); }
|
: m_program{program} { kak_assert(m_program); }
|
||||||
|
|
||||||
|
~ThreadedRegexVM()
|
||||||
|
{
|
||||||
|
for (auto* saves : m_saves)
|
||||||
|
{
|
||||||
|
for (size_t i = m_program.save_count-1; i > 0; --i)
|
||||||
|
saves->pos[i].~Iterator();
|
||||||
|
saves->~Saves();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct Saves
|
struct Saves
|
||||||
{
|
{
|
||||||
int refcount;
|
int refcount;
|
||||||
Vector<Iterator> pos;
|
Iterator pos[1];
|
||||||
|
|
||||||
|
static Saves* allocate(size_t count)
|
||||||
|
{
|
||||||
|
void* ptr = ::operator new (sizeof(Saves) + (count-1) * sizeof(Iterator));
|
||||||
|
Saves* saves = new (ptr) Saves{1, {}};
|
||||||
|
for (int i = 1; i < count; ++i)
|
||||||
|
new (&saves->pos[i]) Iterator{};
|
||||||
|
return saves;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Saves* allocate(size_t count, const Iterator* pos)
|
||||||
|
{
|
||||||
|
void* ptr = ::operator new (sizeof(Saves) + (count-1) * sizeof(Iterator));
|
||||||
|
Saves* saves = new (ptr) Saves{1, pos[0]};
|
||||||
|
for (size_t i = 1; i < count; ++i)
|
||||||
|
new (&saves->pos[i]) Iterator{pos[i]};
|
||||||
|
return saves;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Saves* clone_saves(Saves* saves)
|
Saves* clone_saves(Saves* saves)
|
||||||
|
@ -82,12 +110,12 @@ struct ThreadedRegexVM
|
||||||
Saves* res = m_free_saves.back();
|
Saves* res = m_free_saves.back();
|
||||||
m_free_saves.pop_back();
|
m_free_saves.pop_back();
|
||||||
res->refcount = 1;
|
res->refcount = 1;
|
||||||
res->pos = saves->pos;
|
std::copy(saves->pos, saves->pos + m_program.save_count, res->pos);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_saves.push_back(std::make_unique<Saves>(Saves{1, saves->pos}));
|
m_saves.push_back(Saves::allocate(m_program.save_count, saves->pos));
|
||||||
return m_saves.back().get();
|
return m_saves.back();
|
||||||
}
|
}
|
||||||
|
|
||||||
void release_saves(Saves* saves)
|
void release_saves(Saves* saves)
|
||||||
|
@ -150,13 +178,12 @@ struct ThreadedRegexVM
|
||||||
{
|
{
|
||||||
if (thread.saves == nullptr)
|
if (thread.saves == nullptr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
const char index = *thread.inst++;
|
|
||||||
if (thread.saves->refcount > 1)
|
if (thread.saves->refcount > 1)
|
||||||
{
|
{
|
||||||
--thread.saves->refcount;
|
--thread.saves->refcount;
|
||||||
thread.saves = clone_saves(thread.saves);
|
thread.saves = clone_saves(thread.saves);
|
||||||
}
|
}
|
||||||
|
const size_t index = *thread.inst++;
|
||||||
thread.saves->pos[index] = m_pos.base();
|
thread.saves->pos[index] = m_pos.base();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -237,8 +264,8 @@ struct ThreadedRegexVM
|
||||||
Saves* initial_saves = nullptr;
|
Saves* initial_saves = nullptr;
|
||||||
if (not (m_flags & RegexExecFlags::NoSaves))
|
if (not (m_flags & RegexExecFlags::NoSaves))
|
||||||
{
|
{
|
||||||
m_saves.push_back(std::make_unique<Saves>(Saves{1, Vector<Iterator>(m_program.save_count, Iterator{})}));
|
m_saves.push_back(Saves::allocate(m_program.save_count));
|
||||||
initial_saves = m_saves.back().get();
|
initial_saves = m_saves.back();
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool search = (flags & RegexExecFlags::Search);
|
const bool search = (flags & RegexExecFlags::Search);
|
||||||
|
@ -263,7 +290,7 @@ struct ThreadedRegexVM
|
||||||
}
|
}
|
||||||
|
|
||||||
if (thread.saves)
|
if (thread.saves)
|
||||||
m_captures = std::move(thread.saves->pos);
|
m_captures = thread.saves;
|
||||||
|
|
||||||
if (flags & RegexExecFlags::AnyMatch)
|
if (flags & RegexExecFlags::AnyMatch)
|
||||||
return true;
|
return true;
|
||||||
|
@ -299,7 +326,7 @@ struct ThreadedRegexVM
|
||||||
if (step(thread, current_threads) == StepResult::Matched)
|
if (step(thread, current_threads) == StepResult::Matched)
|
||||||
{
|
{
|
||||||
if (thread.saves)
|
if (thread.saves)
|
||||||
m_captures = std::move(thread.saves->pos);
|
m_captures = thread.saves;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -334,10 +361,10 @@ struct ThreadedRegexVM
|
||||||
Utf8It m_pos;
|
Utf8It m_pos;
|
||||||
RegexExecFlags m_flags;
|
RegexExecFlags m_flags;
|
||||||
|
|
||||||
Vector<std::unique_ptr<Saves>> m_saves;
|
Vector<Saves*> m_saves;
|
||||||
Vector<Saves*> m_free_saves;
|
Vector<Saves*> m_free_saves;
|
||||||
|
|
||||||
Vector<Iterator> m_captures;
|
Saves* m_captures = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename It>
|
template<typename It>
|
||||||
|
@ -355,7 +382,7 @@ bool regex_match(It begin, It end, Vector<It>& captures, const CompiledRegex& re
|
||||||
ThreadedRegexVM<It> vm{re};
|
ThreadedRegexVM<It> vm{re};
|
||||||
if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search)))
|
if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search)))
|
||||||
{
|
{
|
||||||
captures = std::move(vm.m_captures);
|
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -376,7 +403,7 @@ bool regex_search(It begin, It end, Vector<It>& captures, const CompiledRegex& r
|
||||||
ThreadedRegexVM<It> vm{re};
|
ThreadedRegexVM<It> vm{re};
|
||||||
if (vm.exec(begin, end, flags | RegexExecFlags::Search))
|
if (vm.exec(begin, end, flags | RegexExecFlags::Search))
|
||||||
{
|
{
|
||||||
captures = std::move(vm.m_captures);
|
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user