From 4fdbf21ff8b042d48c9fb8a506bdc63018631453 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Sat, 30 Nov 2019 10:46:42 +1100 Subject: [PATCH] Refactor diff to make allocating a diff vector optional The diff interface now goes through a for_each_diff function that uses a callback for each found diff. --- src/buffer.cc | 17 ++++++---- src/diff.hh | 84 ++++++++++++++++++++++++++--------------------- src/normal.cc | 31 ++++++++--------- src/unit_tests.cc | 50 ++++++++++------------------ 4 files changed, 88 insertions(+), 94 deletions(-) diff --git a/src/buffer.cc b/src/buffer.cc index 90a4e9f2..fbf1c29d 100644 --- a/src/buffer.cc +++ b/src/buffer.cc @@ -262,17 +262,20 @@ void Buffer::reload(StringView data, timespec fs_timestamp) } else { - auto diff = find_diff(m_lines.begin(), m_lines.size(), - parsed_lines.lines.begin(), parsed_lines.lines.size(), - [](const StringDataPtr& lhs, const StringDataPtr& rhs) - { return lhs->strview() == rhs->strview(); }); + Vector diff; + for_each_diff(m_lines.begin(), m_lines.size(), + parsed_lines.lines.begin(), parsed_lines.lines.size(), + [&diff](DiffOp op, int len, int posB) + { diff.push_back({op, len, posB}); }, + [](const StringDataPtr& lhs, const StringDataPtr& rhs) + { return lhs->strview() == rhs->strview(); }); auto it = m_lines.begin(); for (auto& d : diff) { - if (d.mode == Diff::Keep) + if (d.op == DiffOp::Keep) it += d.len; - else if (d.mode == Diff::Add) + else if (d.op == DiffOp::Add) { const LineCount cur_line = (int)(it - m_lines.begin()); @@ -285,7 +288,7 @@ void Buffer::reload(StringView data, timespec fs_timestamp) m_lines.insert(it, parsed_lines.lines.begin() + d.posB, parsed_lines.lines.begin() + d.posB + d.len); it = m_lines.begin() + (int)(cur_line + d.len); } - else if (d.mode == Diff::Remove) + else if (d.op == DiffOp::Remove) { const LineCount cur_line = (int)(it - m_lines.begin()); diff --git a/src/diff.hh b/src/diff.hh index cc6b52d4..0205ef73 100644 --- a/src/diff.hh +++ b/src/diff.hh @@ -6,10 +6,10 @@ // (http://xmailserver.org/diff2.pdf) #include "array_view.hh" -#include "vector.hh" #include #include +#include namespace Kakoune { @@ -101,32 +101,24 @@ Snake find_middle_snake(Iterator a, int N, Iterator b, int M, return best; } -struct Diff +enum class DiffOp { - enum { Keep, Add, Remove } mode; - int len; - int posB; + Keep, + Add, + Remove }; -inline void append_diff(Vector& diffs, Diff diff) -{ - if (diff.len == 0) - return; - - if (not diffs.empty() and diffs.back().mode == diff.mode - and (diff.mode != Diff::Add or - diffs.back().posB + diffs.back().len == diff.posB)) - diffs.back().len += diff.len; - else - diffs.push_back(diff); -} - -template +template void find_diff_rec(Iterator a, int begA, int endA, Iterator b, int begB, int endB, int* V1, int* V2, int cost_limit, - Equal eq, Vector& diffs) + Equal eq, OnDiff&& on_diff) { + auto on_diff_ifn = [&](DiffOp op, int len, int posB) { + if (len != 0) + on_diff(op, len, posB); + }; + int prefix_len = 0; while (begA != endA and begB != endB and eq(a[begA], b[begB])) ++begA, ++begB, ++prefix_len; @@ -135,14 +127,14 @@ void find_diff_rec(Iterator a, int begA, int endA, while (begA != endA and begB != endB and eq(a[endA-1], b[endB-1])) --endA, --endB, ++suffix_len; - append_diff(diffs, {Diff::Keep, prefix_len, 0}); + on_diff_ifn(DiffOp::Keep, prefix_len, 0); const auto lenA = endA - begA, lenB = endB - begB; if (lenA == 0) - append_diff(diffs, {Diff::Add, lenB, begB}); + on_diff_ifn(DiffOp::Add, lenB, begB); else if (lenB == 0) - append_diff(diffs, {Diff::Remove, lenA, 0}); + on_diff_ifn(DiffOp::Remove, lenA, 0); else { auto snake = find_middle_snake(a + begA, lenA, b + begB, lenB, V1, V2, cost_limit, eq); @@ -150,38 +142,56 @@ void find_diff_rec(Iterator a, int begA, int endA, find_diff_rec(a, begA, begA + snake.x - (int)(snake.op == Snake::Del), b, begB, begB + snake.y - (int)(snake.op == Snake::Add), - V1, V2, cost_limit, eq, diffs); + V1, V2, cost_limit, eq, on_diff); if (snake.op == Snake::Add) - append_diff(diffs, {Diff::Add, 1, begB + snake.y - 1}); + on_diff_ifn(DiffOp::Add, 1, begB + snake.y - 1); if (snake.op == Snake::Del) - append_diff(diffs, {Diff::Remove, 1, 0}); + on_diff_ifn(DiffOp::Remove, 1, 0); - append_diff(diffs, {Diff::Keep, snake.u - snake.x, 0}); + on_diff_ifn(DiffOp::Keep, snake.u - snake.x, 0); if (snake.op == Snake::RevAdd) - append_diff(diffs, {Diff::Add, 1, begB + snake.v}); + on_diff_ifn(DiffOp::Add, 1, begB + snake.v); if (snake.op == Snake::RevDel) - append_diff(diffs, {Diff::Remove, 1, 0}); + on_diff_ifn(DiffOp::Remove, 1, 0); find_diff_rec(a, begA + snake.u + (int)(snake.op == Snake::RevDel), endA, b, begB + snake.v + (int)(snake.op == Snake::RevAdd), endB, - V1, V2, cost_limit, eq, diffs); + V1, V2, cost_limit, eq, on_diff); } - append_diff(diffs, {Diff::Keep, suffix_len, 0}); + on_diff_ifn(DiffOp::Keep, suffix_len, 0); } -template> -Vector find_diff(Iterator a, int N, Iterator b, int M, Equal eq = Equal{}) +struct Diff +{ + DiffOp op; + int len; + int posB; +}; + +template> +void for_each_diff(Iterator a, int N, Iterator b, int M, OnDiff&& on_diff, Equal eq = Equal{}) { const int max = 2 * (N + M) + 1; - Vector data(2*max); - Vector diffs; + std::unique_ptr data(new int[2*max]); constexpr int cost_limit = 1000; - find_diff_rec(a, 0, N, b, 0, M, &data[N+M], &data[max + N+M], cost_limit, eq, diffs); - return diffs; + Diff last{}; + find_diff_rec(a, 0, N, b, 0, M, &data[N+M], &data[max + N+M], cost_limit, eq, + [&last, &on_diff](DiffOp op, int len, int posB) { + if (last.op == op and (op != DiffOp::Add or last.posB + last.len == posB)) + last.len += len; + else + { + if (last.op != DiffOp{} or last.len != 0 or last.posB != 0) + on_diff(last.op, last.len, last.posB); + last = Diff{op, len, posB}; + } + }); + if (last.op != DiffOp{} or last.len != 0 or last.posB != 0) + on_diff(last.op, last.len, last.posB); } } diff --git a/src/normal.cc b/src/normal.cc index fd46dab9..54857258 100644 --- a/src/normal.cc +++ b/src/normal.cc @@ -508,33 +508,30 @@ BufferCoord apply_diff(Buffer& buffer, BufferCoord pos, StringView before, Strin const auto lines_before = before | split_after('\n') | gather>(); const auto lines_after = after | split_after('\n') | gather>(); - auto diffs = find_diff(lines_before.begin(), (int)lines_before.size(), - lines_after.begin(), (int)lines_after.size()); - auto byte_count = [](auto&& lines, int first, int count) { return std::accumulate(lines.begin() + first, lines.begin() + first + count, 0_byte, [](ByteCount l, StringView s) { return l + s.length(); }); }; - int posA = 0; - for (auto& diff : diffs) - { - switch (diff.mode) + for_each_diff(lines_before.begin(), (int)lines_before.size(), + lines_after.begin(), (int)lines_after.size(), + [&, posA = 0](DiffOp op, int len, int posB) mutable { + switch (op) { - case Diff::Keep: - pos = buffer.advance(pos, byte_count(lines_before, posA, diff.len)); - posA += diff.len; + case DiffOp::Keep: + pos = buffer.advance(pos, byte_count(lines_before, posA, len)); + posA += len; break; - case Diff::Add: - pos = buffer.insert(pos, {lines_after[diff.posB].begin(), - lines_after[diff.posB + diff.len - 1].end()}); + case DiffOp::Add: + pos = buffer.insert(pos, {lines_after[posB].begin(), + lines_after[posB + len - 1].end()}); break; - case Diff::Remove: - pos = buffer.erase(pos, buffer.advance(pos, byte_count(lines_before, posA, diff.len))); - posA += diff.len; + case DiffOp::Remove: + pos = buffer.erase(pos, buffer.advance(pos, byte_count(lines_before, posA, len))); + posA += len; break; } - } + }); return pos; } diff --git a/src/unit_tests.cc b/src/unit_tests.cc index 54269074..694ad041 100644 --- a/src/unit_tests.cc +++ b/src/unit_tests.cc @@ -17,41 +17,25 @@ UnitTest test_utf8{[]() UnitTest test_diff{[]() { - auto eq = [](const Diff& lhs, const Diff& rhs) { - return lhs.mode == rhs.mode and lhs.len == rhs.len and lhs.posB == rhs.posB; + struct Diff{DiffOp op; int len; int posB;}; + auto check_diff = [](StringView a, StringView b, std::initializer_list diffs) { + size_t count = 0; + for_each_diff(a.begin(), (int)a.length(), b.begin(), (int)b.length(), + [&](DiffOp op, int len, int posB) { + kak_assert(count < diffs.size()); + auto& d = diffs.begin()[count++]; + kak_assert(d.op == op and d.len == len and d.posB == posB); + }); + kak_assert(count == diffs.size()); }; + check_diff("a?", "!", {{DiffOp::Remove, 1, 0}, {DiffOp::Add, 1, 0}, {DiffOp::Remove, 1, 0}}); + check_diff("abcde", "cd", {{DiffOp::Remove, 2, 0}, {DiffOp::Keep, 2, 0}, {DiffOp::Remove, 1, 0}}); + check_diff("abcd", "cdef", {{DiffOp::Remove, 2, 0}, {DiffOp::Keep, 2, 0}, {DiffOp::Add, 2, 2}}); - { - auto diff = find_diff("a?", 2, "!", 1); - kak_assert(diff.size() == 3 and - eq(diff[0], {Diff::Remove, 1, 0}) and - eq(diff[1], {Diff::Add, 1, 0}) and - eq(diff[2], {Diff::Remove, 1, 0})); - } - - { - auto diff = find_diff("abcde", 5, "cd", 2); - kak_assert(diff.size() == 3 and - eq(diff[0], {Diff::Remove, 2, 0}) and - eq(diff[1], {Diff::Keep, 2, 0}) and - eq(diff[2], {Diff::Remove, 1, 0})); - } - - { - auto diff = find_diff("abcd", 4, "cdef", 4); - kak_assert(diff.size() == 3 and - eq(diff[0], {Diff::Remove, 2, 0}) and - eq(diff[1], {Diff::Keep, 2, 0}) and - eq(diff[2], {Diff::Add, 2, 2})); - } - - { - StringView s1 = "mais que fais la police"; - StringView s2 = "mais ou va la police"; - - auto diff = find_diff(s1.begin(), (int)s1.length(), s2.begin(), (int)s2.length()); - kak_assert(diff.size() == 11); - } + check_diff("mais que fais la police", "mais ou va la police", + {{DiffOp::Keep, 5, 0}, {DiffOp::Remove, 1, 0}, {DiffOp::Add, 1, 5}, {DiffOp::Keep, 1, 0}, + {DiffOp::Remove, 1, 0}, {DiffOp::Keep, 1, 0}, {DiffOp::Add, 1, 8}, {DiffOp::Remove, 1, 0}, + {DiffOp::Keep, 1, 0}, {DiffOp::Remove, 2, 0}, {DiffOp::Keep, 10, 0}} ); }}; UnitTest* UnitTest::list = nullptr;