From d1c005dd8c98fb809a2ec0bb5dad503a03a263f8 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Sun, 23 Jul 2017 21:33:12 +0200 Subject: [PATCH] Limit diff algorithm complexity Return a non-optimal, but valid, diff when we detect too many iterations Fixes #1509 --- src/diff.hh | 56 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/src/diff.hh b/src/diff.hh index e4dd531e..cb28921c 100644 --- a/src/diff.hh +++ b/src/diff.hh @@ -43,15 +43,15 @@ Snake find_end_snake_of_further_reaching_dpath(Iterator a, int N, Iterator b, in template Snake find_middle_snake(Iterator a, int N, Iterator b, int M, - int* V1, int* V2, Equal eq) + int* V1, int* V2, int cost_limit, Equal eq) { const int delta = N - M; V1[1] = 0; V2[1] = 0; std::reverse_iterator ra{a + N}, rb{b + M}; - - for (int D = 0; D <= (M + N + 1) / 2; ++D) + const int max_D = std::min((M + N + 1) / 2 + 1, cost_limit); + for (int D = 0; D < max_D; ++D) { for (int k1 = -D; k1 <= D; k1 += 2) { @@ -59,11 +59,8 @@ Snake find_middle_snake(Iterator a, int N, Iterator b, int M, V1[k1] = p.u; const int k2 = -(k1 - delta); - if ((delta % 2 != 0) and -(D-1) <= k2 and k2 <= (D-1)) - { - if (V1[k1] + V2[k2] >= N) - return p;// return last snake on forward path, len = (2 * D - 1) - } + if ((delta % 2 != 0) and -(D-1) <= k2 and k2 <= (D-1) and V1[k1] + V2[k2] >= N) + return p;// return last snake on forward path, len = (2 * D - 1) } for (int k2 = -D; k2 <= D; k2 += 2) @@ -72,17 +69,32 @@ Snake find_middle_snake(Iterator a, int N, Iterator b, int M, V2[k2] = p.u; const int k1 = -(k2 - delta); - if ((delta % 2 == 0) and -D <= k1 and k1 <= D) - { - if (V1[k1] + V2[k2] >= N) - return { N - p.u, M - p.v, N - p.x , M - p.y, - (Snake::Op)(p.op + Snake::RevAdd) };// return last snake on reverse path, len = 2 * D - } + if ((delta % 2 == 0) and -D <= k1 and k1 <= D and V1[k1] + V2[k2] >= N) + return { N - p.u, M - p.v, N - p.x , M - p.y, + (Snake::Op)(p.op + Snake::RevAdd) };// return last snake on reverse path, len = 2 * D } } - kak_assert(false); - return {}; + // We did not find a minimal path in less than max_D iterations, iterate one more time finding the best + Snake best{}; + for (int k1 = -max_D; k1 <= max_D; k1 += 2) + { + auto p = find_end_snake_of_further_reaching_dpath(a, N, b, M, V1, max_D, k1, eq); + V1[k1] = p.u; + if ((delta % 2 != 0) and p.u + p.v >= best.u + best.v and p.u <= N and p.v <= M) + best = p; + } + for (int k2 = -max_D; k2 <= max_D; k2 += 2) + { + auto p = find_end_snake_of_further_reaching_dpath(ra, N, rb, M, V2, max_D, k2, eq); + V2[k2] = p.u; + if ((delta % 2 == 0) and p.u + p.v >= best.u + best.v and p.u <= N and p.v <= M) + best = {p.x, p.y, p.u, p.v, (Snake::Op)(p.op + Snake::RevAdd)}; + } + + if (best.op >= Snake::RevAdd) + best = { N - best.u, M - best.v, N - best.x , M - best.y, best.op }; + return best; } struct Diff @@ -108,7 +120,8 @@ inline void append_diff(Vector& diffs, Diff diff) template void find_diff_rec(Iterator a, int begA, int endA, Iterator b, int begB, int endB, - int* V1, int* V2, Equal eq, Vector& diffs) + int* V1, int* V2, int cost_limit, + Equal eq, Vector& diffs) { int prefix_len = 0; while (begA != endA and begB != endB and eq(a[begA], b[begB])) @@ -128,12 +141,12 @@ void find_diff_rec(Iterator a, int begA, int endA, append_diff(diffs, {Diff::Remove, lenA, 0}); else { - auto snake = find_middle_snake(a + begA, lenA, b + begB, lenB, V1, V2, eq); + auto snake = find_middle_snake(a + begA, lenA, b + begB, lenB, V1, V2, cost_limit, eq); kak_assert(snake.u <= lenA and snake.v <= lenB); find_diff_rec(a, begA, begA + snake.x - (int)(snake.op == Snake::Del), b, begB, begB + snake.y - (int)(snake.op == Snake::Add), - V1, V2, eq, diffs); + V1, V2, cost_limit, eq, diffs); if (snake.op == Snake::Add) append_diff(diffs, {Diff::Add, 1, begB + snake.y - 1}); @@ -149,7 +162,7 @@ void find_diff_rec(Iterator a, int begA, int endA, find_diff_rec(a, begA + snake.u + (int)(snake.op == Snake::RevDel), endA, b, begB + snake.v + (int)(snake.op == Snake::RevAdd), endB, - V1, V2, eq, diffs); + V1, V2, cost_limit, eq, diffs); } append_diff(diffs, {Diff::Keep, suffix_len, 0}); @@ -161,7 +174,8 @@ Vector find_diff(Iterator a, int N, Iterator b, int M, Equal eq = Equal{}) const int max = 2 * (N + M) + 1; Vector data(2*max); Vector diffs; - find_diff_rec(a, 0, N, b, 0, M, &data[N+M], &data[max + N+M], eq, diffs); + constexpr int cost_limit = 1000; + find_diff_rec(a, 0, N, b, 0, M, &data[N+M], &data[max + N+M], cost_limit, eq, diffs); return diffs; }