Regex: Use a custom allocated buffer for Saves instead of a Vector

This commit is contained in:
Maxime Coste 2017-10-04 11:14:24 +08:00
parent 1399563e40
commit e0fac20f6c
2 changed files with 52 additions and 25 deletions

View File

@ -852,7 +852,7 @@ auto test_regex = UnitTest{[]{
{ {
TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"}; TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"};
kak_assert(vm.exec("fooquxbarbaz")); kak_assert(vm.exec("fooquxbarbaz"));
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "qux"); kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "qux");
kak_assert(not vm.exec("fooquxbarbaze")); kak_assert(not vm.exec("fooquxbarbaze"));
kak_assert(not vm.exec("quxbar")); kak_assert(not vm.exec("quxbar"));
kak_assert(not vm.exec("blahblah")); kak_assert(not vm.exec("blahblah"));
@ -863,7 +863,7 @@ auto test_regex = UnitTest{[]{
{ {
TestVM vm{R"(.*\b(foo|bar)\b.*)"}; TestVM vm{R"(.*\b(foo|bar)\b.*)"};
kak_assert(vm.exec("qux foo baz")); kak_assert(vm.exec("qux foo baz"));
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "foo"); kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "foo");
kak_assert(not vm.exec("quxfoobaz")); kak_assert(not vm.exec("quxfoobaz"));
kak_assert(vm.exec("bar")); kak_assert(vm.exec("bar"));
kak_assert(not vm.exec("foobar")); kak_assert(not vm.exec("foobar"));
@ -909,11 +909,11 @@ auto test_regex = UnitTest{[]{
{ {
TestVM vm{R"(f.*a(.*o))"}; TestVM vm{R"(f.*a(.*o))"};
kak_assert(vm.exec("blahfoobarfoobaz", false, true)); kak_assert(vm.exec("blahfoobarfoobaz", false, true));
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "foobarfoo"); kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "foobarfoo");
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "rfoo"); kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "rfoo");
kak_assert(vm.exec("mais que fais la police", false, true)); kak_assert(vm.exec("mais que fais la police", false, true));
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "fais la po"); kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "fais la po");
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == " po"); kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == " po");
} }
{ {
@ -927,13 +927,13 @@ auto test_regex = UnitTest{[]{
{ {
TestVM vm{R"((a{3,5})a+)"}; TestVM vm{R"((a{3,5})a+)"};
kak_assert(vm.exec("aaaaaa", true, true)); kak_assert(vm.exec("aaaaaa", true, true));
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "aaaaa"); kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaaaa");
} }
{ {
TestVM vm{R"((a{3,5}?)a+)"}; TestVM vm{R"((a{3,5}?)a+)"};
kak_assert(vm.exec("aaaaaa", true, true)); kak_assert(vm.exec("aaaaaa", true, true));
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "aaa"); kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaa");
} }
{ {
@ -973,20 +973,20 @@ auto test_regex = UnitTest{[]{
{ {
TestVM vm{R"(foo\Kbar)"}; TestVM vm{R"(foo\Kbar)"};
kak_assert(vm.exec("foobar", true, true)); kak_assert(vm.exec("foobar", true, true));
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "bar"); kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "bar");
kak_assert(not vm.exec("bar", true, true)); kak_assert(not vm.exec("bar", true, true));
} }
{ {
TestVM vm{R"((fo+?).*)"}; TestVM vm{R"((fo+?).*)"};
kak_assert(vm.exec("foooo", true, true)); kak_assert(vm.exec("foooo", true, true));
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "fo"); kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "fo");
} }
{ {
TestVM vm{R"((?=foo).)"}; TestVM vm{R"((?=foo).)"};
kak_assert(vm.exec("barfoo", false, true)); kak_assert(vm.exec("barfoo", false, true));
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "f"); kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "f");
} }
{ {

View File

@ -69,10 +69,38 @@ struct ThreadedRegexVM
ThreadedRegexVM(const CompiledRegex& program) ThreadedRegexVM(const CompiledRegex& program)
: m_program{program} { kak_assert(m_program); } : m_program{program} { kak_assert(m_program); }
~ThreadedRegexVM()
{
for (auto* saves : m_saves)
{
for (size_t i = m_program.save_count-1; i > 0; --i)
saves->pos[i].~Iterator();
saves->~Saves();
}
}
struct Saves struct Saves
{ {
int refcount; int refcount;
Vector<Iterator> pos; Iterator pos[1];
static Saves* allocate(size_t count)
{
void* ptr = ::operator new (sizeof(Saves) + (count-1) * sizeof(Iterator));
Saves* saves = new (ptr) Saves{1, {}};
for (int i = 1; i < count; ++i)
new (&saves->pos[i]) Iterator{};
return saves;
}
static Saves* allocate(size_t count, const Iterator* pos)
{
void* ptr = ::operator new (sizeof(Saves) + (count-1) * sizeof(Iterator));
Saves* saves = new (ptr) Saves{1, pos[0]};
for (size_t i = 1; i < count; ++i)
new (&saves->pos[i]) Iterator{pos[i]};
return saves;
}
}; };
Saves* clone_saves(Saves* saves) Saves* clone_saves(Saves* saves)
@ -82,12 +110,12 @@ struct ThreadedRegexVM
Saves* res = m_free_saves.back(); Saves* res = m_free_saves.back();
m_free_saves.pop_back(); m_free_saves.pop_back();
res->refcount = 1; res->refcount = 1;
res->pos = saves->pos; std::copy(saves->pos, saves->pos + m_program.save_count, res->pos);
return res; return res;
} }
m_saves.push_back(std::make_unique<Saves>(Saves{1, saves->pos})); m_saves.push_back(Saves::allocate(m_program.save_count, saves->pos));
return m_saves.back().get(); return m_saves.back();
} }
void release_saves(Saves* saves) void release_saves(Saves* saves)
@ -150,13 +178,12 @@ struct ThreadedRegexVM
{ {
if (thread.saves == nullptr) if (thread.saves == nullptr)
break; break;
const char index = *thread.inst++;
if (thread.saves->refcount > 1) if (thread.saves->refcount > 1)
{ {
--thread.saves->refcount; --thread.saves->refcount;
thread.saves = clone_saves(thread.saves); thread.saves = clone_saves(thread.saves);
} }
const size_t index = *thread.inst++;
thread.saves->pos[index] = m_pos.base(); thread.saves->pos[index] = m_pos.base();
break; break;
} }
@ -237,8 +264,8 @@ struct ThreadedRegexVM
Saves* initial_saves = nullptr; Saves* initial_saves = nullptr;
if (not (m_flags & RegexExecFlags::NoSaves)) if (not (m_flags & RegexExecFlags::NoSaves))
{ {
m_saves.push_back(std::make_unique<Saves>(Saves{1, Vector<Iterator>(m_program.save_count, Iterator{})})); m_saves.push_back(Saves::allocate(m_program.save_count));
initial_saves = m_saves.back().get(); initial_saves = m_saves.back();
} }
const bool search = (flags & RegexExecFlags::Search); const bool search = (flags & RegexExecFlags::Search);
@ -263,7 +290,7 @@ struct ThreadedRegexVM
} }
if (thread.saves) if (thread.saves)
m_captures = std::move(thread.saves->pos); m_captures = thread.saves;
if (flags & RegexExecFlags::AnyMatch) if (flags & RegexExecFlags::AnyMatch)
return true; return true;
@ -299,7 +326,7 @@ struct ThreadedRegexVM
if (step(thread, current_threads) == StepResult::Matched) if (step(thread, current_threads) == StepResult::Matched)
{ {
if (thread.saves) if (thread.saves)
m_captures = std::move(thread.saves->pos); m_captures = thread.saves;
return true; return true;
} }
} }
@ -334,10 +361,10 @@ struct ThreadedRegexVM
Utf8It m_pos; Utf8It m_pos;
RegexExecFlags m_flags; RegexExecFlags m_flags;
Vector<std::unique_ptr<Saves>> m_saves; Vector<Saves*> m_saves;
Vector<Saves*> m_free_saves; Vector<Saves*> m_free_saves;
Vector<Iterator> m_captures; Saves* m_captures = nullptr;
}; };
template<typename It> template<typename It>
@ -355,7 +382,7 @@ bool regex_match(It begin, It end, Vector<It>& captures, const CompiledRegex& re
ThreadedRegexVM<It> vm{re}; ThreadedRegexVM<It> vm{re};
if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search))) if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search)))
{ {
captures = std::move(vm.m_captures); std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
return true; return true;
} }
return false; return false;
@ -376,7 +403,7 @@ bool regex_search(It begin, It end, Vector<It>& captures, const CompiledRegex& r
ThreadedRegexVM<It> vm{re}; ThreadedRegexVM<It> vm{re};
if (vm.exec(begin, end, flags | RegexExecFlags::Search)) if (vm.exec(begin, end, flags | RegexExecFlags::Search))
{ {
captures = std::move(vm.m_captures); std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
return true; return true;
} }
return false; return false;