Avoid clearing iterator buffer on saves allocation

When creating a new save, we had to clear all iterators to have valid
values. This operation is relatively costly because it gets optimized
to a memset whose call overhead is pretty high (as we usually have
less than 32 bytes to clear). Bypass this by storing a bitmap of
valid iterators.
This commit is contained in:
Maxime Coste 2024-03-13 23:45:51 +11:00
parent c4df0fac52
commit 83f12fc8e9

View File

@ -287,7 +287,15 @@ public:
ArrayView<const Iterator> captures() const ArrayView<const Iterator> captures() const
{ {
if (m_captures >= 0) if (m_captures >= 0)
return { m_saves[m_captures].pos, m_program.save_count }; {
auto& saves = m_saves[m_captures];
for (int i = 0; i < m_program.save_count; ++i)
{
if ((saves.valid_mask & (1 << i)) == 0)
saves.pos[i] = Iterator{};
}
return { saves.pos, m_program.save_count };
}
return {}; return {};
} }
@ -295,12 +303,15 @@ private:
struct Saves struct Saves
{ {
int32_t refcount; int32_t refcount;
int32_t next_free; union {
int32_t next_free;
uint32_t valid_mask;
};
Iterator* pos; Iterator* pos;
}; };
template<bool copy> template<bool copy>
int16_t new_saves(Iterator* pos) int16_t new_saves(Iterator* pos, uint32_t valid_mask)
{ {
kak_assert(not copy or pos != nullptr); kak_assert(not copy or pos != nullptr);
const auto count = m_program.save_count; const auto count = m_program.save_count;
@ -310,18 +321,16 @@ private:
Saves& saves = m_saves[res]; Saves& saves = m_saves[res];
m_first_free = saves.next_free; m_first_free = saves.next_free;
kak_assert(saves.refcount == 1); kak_assert(saves.refcount == 1);
if (copy) if constexpr (copy)
std::copy_n(pos, count, saves.pos); std::copy_n(pos, std::bit_width(valid_mask), saves.pos);
else saves.valid_mask = valid_mask;
std::fill_n(saves.pos, count, Iterator{});
return res; return res;
} }
auto* new_pos = reinterpret_cast<Iterator*>(operator new (count * sizeof(Iterator))); auto* new_pos = reinterpret_cast<Iterator*>(operator new (count * sizeof(Iterator)));
for (size_t i = 0; i < count; ++i) for (size_t i = 0; i < count; ++i)
new (new_pos+i) Iterator{copy ? pos[i] : Iterator{}}; new (new_pos+i) Iterator{copy ? pos[i] : Iterator{}};
m_saves.push_back({1, 0, new_pos}); m_saves.push_back({1, {.valid_mask=valid_mask}, new_pos});
return static_cast<int16_t>(m_saves.size() - 1); return static_cast<int16_t>(m_saves.size() - 1);
} }
@ -418,16 +427,17 @@ private:
} }
break; break;
case CompiledRegex::Save: case CompiledRegex::Save:
if (mode & RegexMode::NoSaves) if constexpr (mode & RegexMode::NoSaves)
break; break;
if (thread.saves < 0) if (thread.saves < 0)
thread.saves = new_saves<false>(nullptr); thread.saves = new_saves<false>(nullptr, 0);
else if (m_saves[thread.saves].refcount > 1) else if (auto& saves = m_saves[thread.saves]; saves.refcount > 1)
{ {
--m_saves[thread.saves].refcount; --saves.refcount;
thread.saves = new_saves<true>(m_saves[thread.saves].pos); thread.saves = new_saves<true>(saves.pos, saves.valid_mask);
} }
m_saves[thread.saves].pos[inst.param.save_index] = pos; m_saves[thread.saves].pos[inst.param.save_index] = pos;
m_saves[thread.saves].valid_mask |= (1 << inst.param.save_index);
break; break;
case CompiledRegex::CharClass: case CompiledRegex::CharClass:
if (pos == config.end) if (pos == config.end)