Refactor DualThreadStack as a RingBuffer

Instead of two stacks growing from the two ends of a buffer, use a ring buffer growing from the same mid spot. This avoids the costly memory copy every step when we set next threads as the current ones.
2023-02-13 22:51:53 +11:00 · 2023-02-13 22:51:53 +11:00 · d708b77186
commit d708b77186
parent 762064dc68
1 changed files with 42 additions and 37 deletions
--- a/src/regex_impl.hh
+++ b/src/regex_impl.hh
@ -197,6 +197,7 @@ template<typename It>
 struct SentinelType<It, void_t<typename It::Sentinel>> { using Type = typename It::Sentinel; };
 template<typename Iterator, RegexMode mode>
    requires (has_direction(mode))
 class ThreadedRegexVM
 {
 public:
@ -450,7 +451,7 @@ private:
        kak_assert(m_threads.current_is_empty() and m_threads.next_is_empty());
        release_saves(m_captures);
        m_captures = -1;
-        m_threads.grow_ifn();
+        m_threads.ensure_initial_capacity();
        const int16_t first_inst = forward ? 0 : m_program.first_backward_inst;
        m_threads.push_current({first_inst, -1});
@ -478,9 +479,8 @@ private:
                (m_threads.next_is_empty() and (not search or m_found_match)) or
                (m_found_match and any_match))
            {
-                for (auto& t : m_threads.next_threads())
+                while (not m_threads.next_is_empty())
-                    release_saves(t.saves);
+                    release_saves(m_threads.pop_next().saves);
                m_threads.clear_next();
                return m_found_match;
            }
@ -491,7 +491,6 @@ private:
            {
                if (start_desc and m_threads.next_is_empty())
                    to_next_start(pos, config, *start_desc);
                m_threads.grow_ifn();
                m_threads.push_next({first_inst, -1});
            }
            m_threads.swap_next();
@ -607,62 +606,68 @@ private:
    struct DualThreadStack
    {
-        DualThreadStack() = default;
+        bool current_is_empty() const { return m_current == m_next_begin; }
-        DualThreadStack(const DualThreadStack&) = delete;
+        bool next_is_empty() const { return m_next_end == m_next_begin; }
        DualThreadStack(DualThreadStack&& other)
          : m_data{other.m_data}, m_capacity{other.m_capacity}, m_current{other.m_current}, m_next{other.m_next}
        {
            other.m_data = nullptr;
        }
        ~DualThreadStack() { delete[] m_data; }
-        bool current_is_empty() const { return m_current == 0; }
+        void push_current(Thread thread) { m_data[decrement(m_current)] = thread; grow_ifn(); }
-        bool next_is_empty() const { return m_next == m_capacity; }
+        Thread pop_current() { auto res = m_data[m_current]; increment(m_current); return res; }
-        void push_current(Thread thread) { kak_assert(m_current < m_next); m_data[m_current++] = thread; grow_ifn(); }
+        void push_next(Thread thread) { m_data[m_next_end] = thread; increment(m_next_end); }
-        Thread pop_current() { kak_assert(m_current > 0); return m_data[--m_current]; }
+        Thread pop_next() { return m_data[decrement(m_next_end)]; }
        void push_next(Thread thread) { kak_assert(m_current < m_next);  m_data[--m_next] = thread; }
        void clear_next() { m_next = m_capacity; }
        ConstArrayView<Thread> next_threads() const { return { m_data + m_next, m_data + m_capacity }; }
        void swap_next()
        {
-            kak_assert(m_next < m_capacity);
+            m_current = m_next_begin;
-            const int32_t count = m_capacity - m_next;
+            m_next_begin = m_next_end;
-            std::copy_n(m_data + m_next, count, m_data);
+        }
-            m_current = count;
+
-            m_next = m_capacity;
+        void ensure_initial_capacity() {
            if (m_capacity == 0)
                grow_ifn();
        }
        void grow_ifn()
        {
-            if (m_current != m_next)
+            if (m_current != m_next_end)
                return;
            constexpr int32_t initial_capacity = 64 / sizeof(Thread);
            static_assert(initial_capacity >= 4);
            const auto new_capacity = m_capacity ? m_capacity * 2 : initial_capacity;
            const auto next_count = m_capacity - m_next;
            const auto new_next = new_capacity - next_count;
            Thread* new_data = new Thread[new_capacity];
-            std::copy_n(m_data, m_current, new_data);
+            if (m_current < m_next_end)
-            std::copy_n(m_data + m_next, next_count, new_data + new_next);
+                m_next_end = std::copy(m_data.get() + m_current, m_data.get() + m_next_end, new_data) - new_data;
-            delete[] m_data;
+            else
-            m_data = new_data;
+                m_next_end = std::copy(m_data.get(), m_data.get() + m_next_end, std::copy(m_data.get() + m_current, m_data.get() + m_capacity, new_data)) - new_data;
            m_next_begin = m_next_begin >= m_current ? m_next_begin - m_current : m_capacity - (m_current - m_next_begin);
            m_current = 0;
            m_data.reset(new_data);
            m_capacity = new_capacity;
            m_next = new_next;
        }
    private:
-        Thread* m_data = nullptr;
+        int32_t decrement(int32_t& index) {
            if (index == 0)
                index = m_capacity;
            return --index;
        }
        int32_t increment(int32_t& index) {
            if (++index == m_capacity)
                index = 0;
            return index;
        }
        std::unique_ptr<Thread[]> m_data;
        int32_t m_capacity = 0; // Maximum capacity should be 2*instruction count, so 65536
        int32_t m_current = 0;
-        int32_t m_next = 0;
+        int32_t m_next_begin = 0;
        int32_t m_next_end = 0;
    };
    static_assert(has_direction(mode));
    static constexpr bool forward = mode & RegexMode::Forward;
    DualThreadStack m_threads;