Regex: Use a custom allocated buffer for Saves instead of a Vector

This commit is contained in:
Maxime Coste 2017-10-04 11:14:24 +08:00
parent 1399563e40
commit e0fac20f6c
2 changed files with 52 additions and 25 deletions

View File

@ -852,7 +852,7 @@ auto test_regex = UnitTest{[]{
{
TestVM vm{R"(^(foo|qux|baz)+(bar)?baz$)"};
kak_assert(vm.exec("fooquxbarbaz"));
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "qux");
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "qux");
kak_assert(not vm.exec("fooquxbarbaze"));
kak_assert(not vm.exec("quxbar"));
kak_assert(not vm.exec("blahblah"));
@ -863,7 +863,7 @@ auto test_regex = UnitTest{[]{
{
TestVM vm{R"(.*\b(foo|bar)\b.*)"};
kak_assert(vm.exec("qux foo baz"));
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "foo");
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "foo");
kak_assert(not vm.exec("quxfoobaz"));
kak_assert(vm.exec("bar"));
kak_assert(not vm.exec("foobar"));
@ -909,11 +909,11 @@ auto test_regex = UnitTest{[]{
{
TestVM vm{R"(f.*a(.*o))"};
kak_assert(vm.exec("blahfoobarfoobaz", false, true));
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "foobarfoo");
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "rfoo");
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "foobarfoo");
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "rfoo");
kak_assert(vm.exec("mais que fais la police", false, true));
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "fais la po");
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == " po");
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "fais la po");
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == " po");
}
{
@ -927,13 +927,13 @@ auto test_regex = UnitTest{[]{
{
TestVM vm{R"((a{3,5})a+)"};
kak_assert(vm.exec("aaaaaa", true, true));
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "aaaaa");
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaaaa");
}
{
TestVM vm{R"((a{3,5}?)a+)"};
kak_assert(vm.exec("aaaaaa", true, true));
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "aaa");
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "aaa");
}
{
@ -973,20 +973,20 @@ auto test_regex = UnitTest{[]{
{
TestVM vm{R"(foo\Kbar)"};
kak_assert(vm.exec("foobar", true, true));
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "bar");
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "bar");
kak_assert(not vm.exec("bar", true, true));
}
{
TestVM vm{R"((fo+?).*)"};
kak_assert(vm.exec("foooo", true, true));
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "fo");
kak_assert(StringView{vm.m_captures->pos[2], vm.m_captures->pos[3]} == "fo");
}
{
TestVM vm{R"((?=foo).)"};
kak_assert(vm.exec("barfoo", false, true));
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "f");
kak_assert(StringView{vm.m_captures->pos[0], vm.m_captures->pos[1]} == "f");
}
{

View File

@ -69,10 +69,38 @@ struct ThreadedRegexVM
ThreadedRegexVM(const CompiledRegex& program)
: m_program{program} { kak_assert(m_program); }
~ThreadedRegexVM()
{
for (auto* saves : m_saves)
{
for (size_t i = m_program.save_count-1; i > 0; --i)
saves->pos[i].~Iterator();
saves->~Saves();
}
}
struct Saves
{
int refcount;
Vector<Iterator> pos;
Iterator pos[1];
static Saves* allocate(size_t count)
{
void* ptr = ::operator new (sizeof(Saves) + (count-1) * sizeof(Iterator));
Saves* saves = new (ptr) Saves{1, {}};
for (int i = 1; i < count; ++i)
new (&saves->pos[i]) Iterator{};
return saves;
}
static Saves* allocate(size_t count, const Iterator* pos)
{
void* ptr = ::operator new (sizeof(Saves) + (count-1) * sizeof(Iterator));
Saves* saves = new (ptr) Saves{1, pos[0]};
for (size_t i = 1; i < count; ++i)
new (&saves->pos[i]) Iterator{pos[i]};
return saves;
}
};
Saves* clone_saves(Saves* saves)
@ -82,12 +110,12 @@ struct ThreadedRegexVM
Saves* res = m_free_saves.back();
m_free_saves.pop_back();
res->refcount = 1;
res->pos = saves->pos;
std::copy(saves->pos, saves->pos + m_program.save_count, res->pos);
return res;
}
m_saves.push_back(std::make_unique<Saves>(Saves{1, saves->pos}));
return m_saves.back().get();
m_saves.push_back(Saves::allocate(m_program.save_count, saves->pos));
return m_saves.back();
}
void release_saves(Saves* saves)
@ -150,13 +178,12 @@ struct ThreadedRegexVM
{
if (thread.saves == nullptr)
break;
const char index = *thread.inst++;
if (thread.saves->refcount > 1)
{
--thread.saves->refcount;
thread.saves = clone_saves(thread.saves);
}
const size_t index = *thread.inst++;
thread.saves->pos[index] = m_pos.base();
break;
}
@ -237,8 +264,8 @@ struct ThreadedRegexVM
Saves* initial_saves = nullptr;
if (not (m_flags & RegexExecFlags::NoSaves))
{
m_saves.push_back(std::make_unique<Saves>(Saves{1, Vector<Iterator>(m_program.save_count, Iterator{})}));
initial_saves = m_saves.back().get();
m_saves.push_back(Saves::allocate(m_program.save_count));
initial_saves = m_saves.back();
}
const bool search = (flags & RegexExecFlags::Search);
@ -263,7 +290,7 @@ struct ThreadedRegexVM
}
if (thread.saves)
m_captures = std::move(thread.saves->pos);
m_captures = thread.saves;
if (flags & RegexExecFlags::AnyMatch)
return true;
@ -299,7 +326,7 @@ struct ThreadedRegexVM
if (step(thread, current_threads) == StepResult::Matched)
{
if (thread.saves)
m_captures = std::move(thread.saves->pos);
m_captures = thread.saves;
return true;
}
}
@ -334,10 +361,10 @@ struct ThreadedRegexVM
Utf8It m_pos;
RegexExecFlags m_flags;
Vector<std::unique_ptr<Saves>> m_saves;
Vector<Saves*> m_saves;
Vector<Saves*> m_free_saves;
Vector<Iterator> m_captures;
Saves* m_captures = nullptr;
};
template<typename It>
@ -355,7 +382,7 @@ bool regex_match(It begin, It end, Vector<It>& captures, const CompiledRegex& re
ThreadedRegexVM<It> vm{re};
if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search)))
{
captures = std::move(vm.m_captures);
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
return true;
}
return false;
@ -376,7 +403,7 @@ bool regex_search(It begin, It end, Vector<It>& captures, const CompiledRegex& r
ThreadedRegexVM<It> vm{re};
if (vm.exec(begin, end, flags | RegexExecFlags::Search))
{
captures = std::move(vm.m_captures);
std::copy(vm.m_captures->pos, vm.m_captures->pos + re.save_count, std::back_inserter(captures));
return true;
}
return false;