From 49def73e4e6717e325275f936681c55e93cba200 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Tue, 12 May 2015 23:41:35 +0100 Subject: [PATCH] Add initial diff implementation based Eugene W. Myers' algorithm --- src/diff.hh | 175 ++++++++++++++++++++++++++++++++++++++++++++++ src/unit_tests.cc | 10 +++ 2 files changed, 185 insertions(+) create mode 100644 src/diff.hh diff --git a/src/diff.hh b/src/diff.hh new file mode 100644 index 00000000..aa482acf --- /dev/null +++ b/src/diff.hh @@ -0,0 +1,175 @@ +#include "array_view.hh" +#include "vector.hh" + +namespace Kakoune +{ + +template +struct MirroredArray : public ArrayView +{ + MirroredArray(ArrayView data, int size) + : ArrayView(data), size(size) + { + kak_assert(2 * size + 1 <= data.size()); + } + + T& operator[](int n) { return ArrayView::operator[](n + size); } + const T& operator[](int n) const { return ArrayView::operator[](n + size); } +private: + int size; +}; + +struct Snake{ int x, y, u, v; bool add; }; + +template +Snake find_end_snake_of_further_reaching_dpath(Iterator a, int N, Iterator b, int M, + const MirroredArray& V, + const int D, const int k) +{ + int x; // our position along a + + const bool add = k == -D or (k != D and V[k-1] < V[k+1]); + + // if diagonal on the right goes further along x than diagonal on the left, + // then we take a vertical edge from it to this diagonal, hence x = V[k+1] + if (add) + x = V[k+1]; + // else, we take an horizontal edge from our left diagonal,x = V[k-1]+1 + else + x = V[k-1]+1; + + int y = x - k; // we are by construction on diagonal k, so our position along + // b (y) is x - k. + + int u = x, v = y; + // follow end snake along diagonal k + while (u < N and v < M and a[u] == b[v]) + ++u, ++v; + + return { x, y, u, v, add }; +} + +struct SnakeLen : Snake +{ + SnakeLen(Snake s, int d) : Snake(s), d(d) {} + int d; +}; + +template +SnakeLen find_middle_snake(Iterator a, int N, Iterator b, int M, + ArrayView data1, ArrayView data2) +{ + const int delta = N - M; + MirroredArray V1{data1, N + M}; + MirroredArray V2{data2, N + M}; + + std::reverse_iterator ra{a + N}, rb{b + M}; + + for (int D = 0; D <= (M + N + 1) / 2; ++D) + { + for (int k1 = -D; k1 <= D; k1 += 2) + { + auto p = find_end_snake_of_further_reaching_dpath(a, N, b, M, V1, D, k1); + V1[k1] = p.u; + + const int k2 = -(k1 - delta); + if ((delta % 2 != 0) and -(D-1) <= k2 and k2 <= (D-1)) + { + if (V1[k1] + V2[k2] >= N) + return { p, 2 * D - 1 };// return last snake on forward path + } + } + + for (int k2 = -D; k2 <= D; k2 += 2) + { + auto p = find_end_snake_of_further_reaching_dpath(ra, N, rb, M, V2, D, k2); + V2[k2] = p.u; + + const int k1 = -(k2 - delta); + if ((delta % 2 == 0) and -D <= k1 and k1 <= D) + { + if (V1[k1] + V2[k2] >= N) + return { { N - p.u, M - p.v, N - p.x , M - p.y } , 2 * D };// return last snake on reverse path + } + } + } + + kak_assert(false); +} + +template +struct Diff +{ + bool add; + Iterator begin; + Iterator end; +}; + +template +void find_diff_rec(Iterator a, size_t N, Iterator b, size_t M, + ArrayView data1, ArrayView data2, + Vector>& diffs) +{ + if (N > 0 and M > 0) + { + auto middle_snake = find_middle_snake(a, N, b, M, data1, data2); + if (middle_snake.d > 1) + { + find_diff_rec(a, middle_snake.x, b, middle_snake.y, + data1, data2, diffs); + + find_diff_rec(a + middle_snake.u, N - middle_snake.u, + b + middle_snake.v, M - middle_snake.v, + data1, data2, diffs); + } + else if (middle_snake.d == 1) + { + int diag = 0; + while (a[diag] == b[diag]) + ++diag; + + if (middle_snake.add) + diffs.push_back({true, b + middle_snake.y, b + middle_snake.y + 1}); + else + diffs.push_back({false, a + middle_snake.x-1, a + middle_snake.x}); + } + } + else if (M > 0) + diffs.push_back({true, b, b + M}); + else if (N > 0) + diffs.push_back({false, a, a + N}); +} + +template +void compact_diffs(Vector>& diffs) +{ + if (diffs.size() < 2) + return; + + auto out_it = diffs.begin(); + for (auto it = out_it + 1; it != diffs.end(); ++it) + { + if (it->add == out_it->add and it->begin == out_it->end) + out_it->end = it->end; + else if (++out_it != it) + *out_it = *it; + } +} + +template +Vector> find_diff(Iterator a, size_t N, Iterator b, size_t M) +{ + Vector data(4 * (N+M)); + Vector> diffs; + const size_t max_D_size = 2 * (N + M) + 1; + find_diff_rec(a, N, b, M, + {data.data(), max_D_size}, + {data.data() + max_D_size, max_D_size}, + diffs); + + // compact_diffs(diffs); + + return diffs; +} + +} diff --git a/src/unit_tests.cc b/src/unit_tests.cc index 58f34a98..e054ee41 100644 --- a/src/unit_tests.cc +++ b/src/unit_tests.cc @@ -1,5 +1,6 @@ #include "assert.hh" #include "buffer.hh" +#include "diff.hh" #include "keys.hh" #include "selectors.hh" #include "word_db.hh" @@ -239,6 +240,14 @@ void test_line_modifications() } } +void test_diff() +{ + StringView s1 = "mais que fais la police"; + StringView s2 = "mais ou va la police"; + + auto diff = find_diff(s1.begin(), (int)s1.length(), s2.begin(), (int)s2.length()); +} + void run_unit_tests() { test_utf8(); @@ -248,4 +257,5 @@ void run_unit_tests() test_undo_group_optimizer(); test_word_db(); test_line_modifications(); + test_diff(); }