Refactor diff to make allocating a diff vector optional

The diff interface now goes through a for_each_diff function that
uses a callback for each found diff.
This commit is contained in:
Maxime Coste 2019-11-30 10:46:42 +11:00
parent b765fb4971
commit 4fdbf21ff8
4 changed files with 88 additions and 94 deletions

View File

@ -262,17 +262,20 @@ void Buffer::reload(StringView data, timespec fs_timestamp)
} }
else else
{ {
auto diff = find_diff(m_lines.begin(), m_lines.size(), Vector<Diff> diff;
parsed_lines.lines.begin(), parsed_lines.lines.size(), for_each_diff(m_lines.begin(), m_lines.size(),
[](const StringDataPtr& lhs, const StringDataPtr& rhs) parsed_lines.lines.begin(), parsed_lines.lines.size(),
{ return lhs->strview() == rhs->strview(); }); [&diff](DiffOp op, int len, int posB)
{ diff.push_back({op, len, posB}); },
[](const StringDataPtr& lhs, const StringDataPtr& rhs)
{ return lhs->strview() == rhs->strview(); });
auto it = m_lines.begin(); auto it = m_lines.begin();
for (auto& d : diff) for (auto& d : diff)
{ {
if (d.mode == Diff::Keep) if (d.op == DiffOp::Keep)
it += d.len; it += d.len;
else if (d.mode == Diff::Add) else if (d.op == DiffOp::Add)
{ {
const LineCount cur_line = (int)(it - m_lines.begin()); const LineCount cur_line = (int)(it - m_lines.begin());
@ -285,7 +288,7 @@ void Buffer::reload(StringView data, timespec fs_timestamp)
m_lines.insert(it, parsed_lines.lines.begin() + d.posB, parsed_lines.lines.begin() + d.posB + d.len); m_lines.insert(it, parsed_lines.lines.begin() + d.posB, parsed_lines.lines.begin() + d.posB + d.len);
it = m_lines.begin() + (int)(cur_line + d.len); it = m_lines.begin() + (int)(cur_line + d.len);
} }
else if (d.mode == Diff::Remove) else if (d.op == DiffOp::Remove)
{ {
const LineCount cur_line = (int)(it - m_lines.begin()); const LineCount cur_line = (int)(it - m_lines.begin());

View File

@ -6,10 +6,10 @@
// (http://xmailserver.org/diff2.pdf) // (http://xmailserver.org/diff2.pdf)
#include "array_view.hh" #include "array_view.hh"
#include "vector.hh"
#include <functional> #include <functional>
#include <iterator> #include <iterator>
#include <memory>
namespace Kakoune namespace Kakoune
{ {
@ -101,32 +101,24 @@ Snake find_middle_snake(Iterator a, int N, Iterator b, int M,
return best; return best;
} }
struct Diff enum class DiffOp
{ {
enum { Keep, Add, Remove } mode; Keep,
int len; Add,
int posB; Remove
}; };
inline void append_diff(Vector<Diff>& diffs, Diff diff) template<typename Iterator, typename Equal, typename OnDiff>
{
if (diff.len == 0)
return;
if (not diffs.empty() and diffs.back().mode == diff.mode
and (diff.mode != Diff::Add or
diffs.back().posB + diffs.back().len == diff.posB))
diffs.back().len += diff.len;
else
diffs.push_back(diff);
}
template<typename Iterator, typename Equal>
void find_diff_rec(Iterator a, int begA, int endA, void find_diff_rec(Iterator a, int begA, int endA,
Iterator b, int begB, int endB, Iterator b, int begB, int endB,
int* V1, int* V2, int cost_limit, int* V1, int* V2, int cost_limit,
Equal eq, Vector<Diff>& diffs) Equal eq, OnDiff&& on_diff)
{ {
auto on_diff_ifn = [&](DiffOp op, int len, int posB) {
if (len != 0)
on_diff(op, len, posB);
};
int prefix_len = 0; int prefix_len = 0;
while (begA != endA and begB != endB and eq(a[begA], b[begB])) while (begA != endA and begB != endB and eq(a[begA], b[begB]))
++begA, ++begB, ++prefix_len; ++begA, ++begB, ++prefix_len;
@ -135,14 +127,14 @@ void find_diff_rec(Iterator a, int begA, int endA,
while (begA != endA and begB != endB and eq(a[endA-1], b[endB-1])) while (begA != endA and begB != endB and eq(a[endA-1], b[endB-1]))
--endA, --endB, ++suffix_len; --endA, --endB, ++suffix_len;
append_diff(diffs, {Diff::Keep, prefix_len, 0}); on_diff_ifn(DiffOp::Keep, prefix_len, 0);
const auto lenA = endA - begA, lenB = endB - begB; const auto lenA = endA - begA, lenB = endB - begB;
if (lenA == 0) if (lenA == 0)
append_diff(diffs, {Diff::Add, lenB, begB}); on_diff_ifn(DiffOp::Add, lenB, begB);
else if (lenB == 0) else if (lenB == 0)
append_diff(diffs, {Diff::Remove, lenA, 0}); on_diff_ifn(DiffOp::Remove, lenA, 0);
else else
{ {
auto snake = find_middle_snake(a + begA, lenA, b + begB, lenB, V1, V2, cost_limit, eq); auto snake = find_middle_snake(a + begA, lenA, b + begB, lenB, V1, V2, cost_limit, eq);
@ -150,38 +142,56 @@ void find_diff_rec(Iterator a, int begA, int endA,
find_diff_rec(a, begA, begA + snake.x - (int)(snake.op == Snake::Del), find_diff_rec(a, begA, begA + snake.x - (int)(snake.op == Snake::Del),
b, begB, begB + snake.y - (int)(snake.op == Snake::Add), b, begB, begB + snake.y - (int)(snake.op == Snake::Add),
V1, V2, cost_limit, eq, diffs); V1, V2, cost_limit, eq, on_diff);
if (snake.op == Snake::Add) if (snake.op == Snake::Add)
append_diff(diffs, {Diff::Add, 1, begB + snake.y - 1}); on_diff_ifn(DiffOp::Add, 1, begB + snake.y - 1);
if (snake.op == Snake::Del) if (snake.op == Snake::Del)
append_diff(diffs, {Diff::Remove, 1, 0}); on_diff_ifn(DiffOp::Remove, 1, 0);
append_diff(diffs, {Diff::Keep, snake.u - snake.x, 0}); on_diff_ifn(DiffOp::Keep, snake.u - snake.x, 0);
if (snake.op == Snake::RevAdd) if (snake.op == Snake::RevAdd)
append_diff(diffs, {Diff::Add, 1, begB + snake.v}); on_diff_ifn(DiffOp::Add, 1, begB + snake.v);
if (snake.op == Snake::RevDel) if (snake.op == Snake::RevDel)
append_diff(diffs, {Diff::Remove, 1, 0}); on_diff_ifn(DiffOp::Remove, 1, 0);
find_diff_rec(a, begA + snake.u + (int)(snake.op == Snake::RevDel), endA, find_diff_rec(a, begA + snake.u + (int)(snake.op == Snake::RevDel), endA,
b, begB + snake.v + (int)(snake.op == Snake::RevAdd), endB, b, begB + snake.v + (int)(snake.op == Snake::RevAdd), endB,
V1, V2, cost_limit, eq, diffs); V1, V2, cost_limit, eq, on_diff);
} }
append_diff(diffs, {Diff::Keep, suffix_len, 0}); on_diff_ifn(DiffOp::Keep, suffix_len, 0);
} }
template<typename Iterator, typename Equal = std::equal_to<>> struct Diff
Vector<Diff> find_diff(Iterator a, int N, Iterator b, int M, Equal eq = Equal{}) {
DiffOp op;
int len;
int posB;
};
template<typename Iterator, typename OnDiff, typename Equal = std::equal_to<>>
void for_each_diff(Iterator a, int N, Iterator b, int M, OnDiff&& on_diff, Equal eq = Equal{})
{ {
const int max = 2 * (N + M) + 1; const int max = 2 * (N + M) + 1;
Vector<int> data(2*max); std::unique_ptr<int[]> data(new int[2*max]);
Vector<Diff> diffs;
constexpr int cost_limit = 1000; constexpr int cost_limit = 1000;
find_diff_rec(a, 0, N, b, 0, M, &data[N+M], &data[max + N+M], cost_limit, eq, diffs);
return diffs; Diff last{};
find_diff_rec(a, 0, N, b, 0, M, &data[N+M], &data[max + N+M], cost_limit, eq,
[&last, &on_diff](DiffOp op, int len, int posB) {
if (last.op == op and (op != DiffOp::Add or last.posB + last.len == posB))
last.len += len;
else
{
if (last.op != DiffOp{} or last.len != 0 or last.posB != 0)
on_diff(last.op, last.len, last.posB);
last = Diff{op, len, posB};
}
});
if (last.op != DiffOp{} or last.len != 0 or last.posB != 0)
on_diff(last.op, last.len, last.posB);
} }
} }

View File

@ -508,33 +508,30 @@ BufferCoord apply_diff(Buffer& buffer, BufferCoord pos, StringView before, Strin
const auto lines_before = before | split_after<StringView>('\n') | gather<Vector<StringView>>(); const auto lines_before = before | split_after<StringView>('\n') | gather<Vector<StringView>>();
const auto lines_after = after | split_after<StringView>('\n') | gather<Vector<StringView>>(); const auto lines_after = after | split_after<StringView>('\n') | gather<Vector<StringView>>();
auto diffs = find_diff(lines_before.begin(), (int)lines_before.size(),
lines_after.begin(), (int)lines_after.size());
auto byte_count = [](auto&& lines, int first, int count) { auto byte_count = [](auto&& lines, int first, int count) {
return std::accumulate(lines.begin() + first, lines.begin() + first + count, 0_byte, return std::accumulate(lines.begin() + first, lines.begin() + first + count, 0_byte,
[](ByteCount l, StringView s) { return l + s.length(); }); [](ByteCount l, StringView s) { return l + s.length(); });
}; };
int posA = 0; for_each_diff(lines_before.begin(), (int)lines_before.size(),
for (auto& diff : diffs) lines_after.begin(), (int)lines_after.size(),
{ [&, posA = 0](DiffOp op, int len, int posB) mutable {
switch (diff.mode) switch (op)
{ {
case Diff::Keep: case DiffOp::Keep:
pos = buffer.advance(pos, byte_count(lines_before, posA, diff.len)); pos = buffer.advance(pos, byte_count(lines_before, posA, len));
posA += diff.len; posA += len;
break; break;
case Diff::Add: case DiffOp::Add:
pos = buffer.insert(pos, {lines_after[diff.posB].begin(), pos = buffer.insert(pos, {lines_after[posB].begin(),
lines_after[diff.posB + diff.len - 1].end()}); lines_after[posB + len - 1].end()});
break; break;
case Diff::Remove: case DiffOp::Remove:
pos = buffer.erase(pos, buffer.advance(pos, byte_count(lines_before, posA, diff.len))); pos = buffer.erase(pos, buffer.advance(pos, byte_count(lines_before, posA, len)));
posA += diff.len; posA += len;
break; break;
} }
} });
return pos; return pos;
} }

View File

@ -17,41 +17,25 @@ UnitTest test_utf8{[]()
UnitTest test_diff{[]() UnitTest test_diff{[]()
{ {
auto eq = [](const Diff& lhs, const Diff& rhs) { struct Diff{DiffOp op; int len; int posB;};
return lhs.mode == rhs.mode and lhs.len == rhs.len and lhs.posB == rhs.posB; auto check_diff = [](StringView a, StringView b, std::initializer_list<Diff> diffs) {
size_t count = 0;
for_each_diff(a.begin(), (int)a.length(), b.begin(), (int)b.length(),
[&](DiffOp op, int len, int posB) {
kak_assert(count < diffs.size());
auto& d = diffs.begin()[count++];
kak_assert(d.op == op and d.len == len and d.posB == posB);
});
kak_assert(count == diffs.size());
}; };
check_diff("a?", "!", {{DiffOp::Remove, 1, 0}, {DiffOp::Add, 1, 0}, {DiffOp::Remove, 1, 0}});
check_diff("abcde", "cd", {{DiffOp::Remove, 2, 0}, {DiffOp::Keep, 2, 0}, {DiffOp::Remove, 1, 0}});
check_diff("abcd", "cdef", {{DiffOp::Remove, 2, 0}, {DiffOp::Keep, 2, 0}, {DiffOp::Add, 2, 2}});
{ check_diff("mais que fais la police", "mais ou va la police",
auto diff = find_diff("a?", 2, "!", 1); {{DiffOp::Keep, 5, 0}, {DiffOp::Remove, 1, 0}, {DiffOp::Add, 1, 5}, {DiffOp::Keep, 1, 0},
kak_assert(diff.size() == 3 and {DiffOp::Remove, 1, 0}, {DiffOp::Keep, 1, 0}, {DiffOp::Add, 1, 8}, {DiffOp::Remove, 1, 0},
eq(diff[0], {Diff::Remove, 1, 0}) and {DiffOp::Keep, 1, 0}, {DiffOp::Remove, 2, 0}, {DiffOp::Keep, 10, 0}} );
eq(diff[1], {Diff::Add, 1, 0}) and
eq(diff[2], {Diff::Remove, 1, 0}));
}
{
auto diff = find_diff("abcde", 5, "cd", 2);
kak_assert(diff.size() == 3 and
eq(diff[0], {Diff::Remove, 2, 0}) and
eq(diff[1], {Diff::Keep, 2, 0}) and
eq(diff[2], {Diff::Remove, 1, 0}));
}
{
auto diff = find_diff("abcd", 4, "cdef", 4);
kak_assert(diff.size() == 3 and
eq(diff[0], {Diff::Remove, 2, 0}) and
eq(diff[1], {Diff::Keep, 2, 0}) and
eq(diff[2], {Diff::Add, 2, 2}));
}
{
StringView s1 = "mais que fais la police";
StringView s2 = "mais ou va la police";
auto diff = find_diff(s1.begin(), (int)s1.length(), s2.begin(), (int)s2.length());
kak_assert(diff.size() == 11);
}
}}; }};
UnitTest* UnitTest::list = nullptr; UnitTest* UnitTest::list = nullptr;