Refactor DualThreadStack as a RingBuffer

Instead of two stacks growing from the two ends of a buffer, use
a ring buffer growing from the same mid spot.

This avoids the costly memory copy every step when we set next
threads as the current ones.
This commit is contained in:
Maxime Coste 2023-02-13 22:51:53 +11:00
parent 762064dc68
commit d708b77186

View File

@ -197,6 +197,7 @@ template<typename It>
struct SentinelType<It, void_t<typename It::Sentinel>> { using Type = typename It::Sentinel; }; struct SentinelType<It, void_t<typename It::Sentinel>> { using Type = typename It::Sentinel; };
template<typename Iterator, RegexMode mode> template<typename Iterator, RegexMode mode>
requires (has_direction(mode))
class ThreadedRegexVM class ThreadedRegexVM
{ {
public: public:
@ -450,7 +451,7 @@ private:
kak_assert(m_threads.current_is_empty() and m_threads.next_is_empty()); kak_assert(m_threads.current_is_empty() and m_threads.next_is_empty());
release_saves(m_captures); release_saves(m_captures);
m_captures = -1; m_captures = -1;
m_threads.grow_ifn(); m_threads.ensure_initial_capacity();
const int16_t first_inst = forward ? 0 : m_program.first_backward_inst; const int16_t first_inst = forward ? 0 : m_program.first_backward_inst;
m_threads.push_current({first_inst, -1}); m_threads.push_current({first_inst, -1});
@ -478,9 +479,8 @@ private:
(m_threads.next_is_empty() and (not search or m_found_match)) or (m_threads.next_is_empty() and (not search or m_found_match)) or
(m_found_match and any_match)) (m_found_match and any_match))
{ {
for (auto& t : m_threads.next_threads()) while (not m_threads.next_is_empty())
release_saves(t.saves); release_saves(m_threads.pop_next().saves);
m_threads.clear_next();
return m_found_match; return m_found_match;
} }
@ -491,7 +491,6 @@ private:
{ {
if (start_desc and m_threads.next_is_empty()) if (start_desc and m_threads.next_is_empty())
to_next_start(pos, config, *start_desc); to_next_start(pos, config, *start_desc);
m_threads.grow_ifn();
m_threads.push_next({first_inst, -1}); m_threads.push_next({first_inst, -1});
} }
m_threads.swap_next(); m_threads.swap_next();
@ -607,62 +606,68 @@ private:
struct DualThreadStack struct DualThreadStack
{ {
DualThreadStack() = default; bool current_is_empty() const { return m_current == m_next_begin; }
DualThreadStack(const DualThreadStack&) = delete; bool next_is_empty() const { return m_next_end == m_next_begin; }
DualThreadStack(DualThreadStack&& other)
: m_data{other.m_data}, m_capacity{other.m_capacity}, m_current{other.m_current}, m_next{other.m_next}
{
other.m_data = nullptr;
}
~DualThreadStack() { delete[] m_data; }
bool current_is_empty() const { return m_current == 0; } void push_current(Thread thread) { m_data[decrement(m_current)] = thread; grow_ifn(); }
bool next_is_empty() const { return m_next == m_capacity; } Thread pop_current() { auto res = m_data[m_current]; increment(m_current); return res; }
void push_current(Thread thread) { kak_assert(m_current < m_next); m_data[m_current++] = thread; grow_ifn(); } void push_next(Thread thread) { m_data[m_next_end] = thread; increment(m_next_end); }
Thread pop_current() { kak_assert(m_current > 0); return m_data[--m_current]; } Thread pop_next() { return m_data[decrement(m_next_end)]; }
void push_next(Thread thread) { kak_assert(m_current < m_next); m_data[--m_next] = thread; }
void clear_next() { m_next = m_capacity; }
ConstArrayView<Thread> next_threads() const { return { m_data + m_next, m_data + m_capacity }; }
void swap_next() void swap_next()
{ {
kak_assert(m_next < m_capacity); m_current = m_next_begin;
const int32_t count = m_capacity - m_next; m_next_begin = m_next_end;
std::copy_n(m_data + m_next, count, m_data); }
m_current = count;
m_next = m_capacity; void ensure_initial_capacity() {
if (m_capacity == 0)
grow_ifn();
} }
void grow_ifn() void grow_ifn()
{ {
if (m_current != m_next) if (m_current != m_next_end)
return; return;
constexpr int32_t initial_capacity = 64 / sizeof(Thread); constexpr int32_t initial_capacity = 64 / sizeof(Thread);
static_assert(initial_capacity >= 4); static_assert(initial_capacity >= 4);
const auto new_capacity = m_capacity ? m_capacity * 2 : initial_capacity; const auto new_capacity = m_capacity ? m_capacity * 2 : initial_capacity;
const auto next_count = m_capacity - m_next;
const auto new_next = new_capacity - next_count;
Thread* new_data = new Thread[new_capacity]; Thread* new_data = new Thread[new_capacity];
std::copy_n(m_data, m_current, new_data); if (m_current < m_next_end)
std::copy_n(m_data + m_next, next_count, new_data + new_next); m_next_end = std::copy(m_data.get() + m_current, m_data.get() + m_next_end, new_data) - new_data;
delete[] m_data; else
m_data = new_data; m_next_end = std::copy(m_data.get(), m_data.get() + m_next_end, std::copy(m_data.get() + m_current, m_data.get() + m_capacity, new_data)) - new_data;
m_next_begin = m_next_begin >= m_current ? m_next_begin - m_current : m_capacity - (m_current - m_next_begin);
m_current = 0;
m_data.reset(new_data);
m_capacity = new_capacity; m_capacity = new_capacity;
m_next = new_next;
} }
private: private:
Thread* m_data = nullptr; int32_t decrement(int32_t& index) {
if (index == 0)
index = m_capacity;
return --index;
}
int32_t increment(int32_t& index) {
if (++index == m_capacity)
index = 0;
return index;
}
std::unique_ptr<Thread[]> m_data;
int32_t m_capacity = 0; // Maximum capacity should be 2*instruction count, so 65536 int32_t m_capacity = 0; // Maximum capacity should be 2*instruction count, so 65536
int32_t m_current = 0; int32_t m_current = 0;
int32_t m_next = 0; int32_t m_next_begin = 0;
int32_t m_next_end = 0;
}; };
static_assert(has_direction(mode));
static constexpr bool forward = mode & RegexMode::Forward; static constexpr bool forward = mode & RegexMode::Forward;
DualThreadStack m_threads; DualThreadStack m_threads;