Store buffer content in a list of lines

Instead of a big std::string, buffer now store it's content in a
list of lines. In order to achieve O(log(n)) random access, lines
contains both their content and their offset since the start of
the file, making binary search usable.

BufferIterator now have a LineAndColumn coordinate into the buffer
instead of an offset so that access is still O(1).
This commit is contained in:
Maxime Coste 2012-03-30 11:37:18 +00:00
parent c336bf1365
commit 0ba7c7286d
6 changed files with 262 additions and 148 deletions

View File

@ -51,9 +51,9 @@ class BufferIterator:
def to_string(self): def to_string(self):
if self.val['m_buffer'] != 0: if self.val['m_buffer'] != 0:
return "buffer<%s>@%d" % (self.val['m_buffer'].dereference()['m_name'], self.val['m_position']) return "buffer<%s>@(%d, %d)" % (self.val['m_buffer'].dereference()['m_name'], self.val['m_coord']['line'], self.val['m_coord']['column'])
else: else:
return "buffer<none>@%s" % (self.val['m_position']) return "buffer<none>@(%d, %d)" % (self.val['m_coord']['line'], self.val['m_coord']['column'])
def build_pretty_printer(): def build_pretty_printer():
pp = gdb.printing.RegexpCollectionPrettyPrinter("kakoune") pp = gdb.printing.RegexpCollectionPrettyPrinter("kakoune")

View File

@ -47,37 +47,25 @@ Buffer::~Buffer()
BufferIterator Buffer::iterator_at(const BufferCoord& line_and_column) const BufferIterator Buffer::iterator_at(const BufferCoord& line_and_column) const
{ {
if (m_lines.empty()) return BufferIterator(*this, clamp(line_and_column));
return begin();
BufferCoord clamped = clamp(line_and_column);
return BufferIterator(*this, m_lines[clamped.line] + clamped.column);
} }
BufferCoord Buffer::line_and_column_at(const BufferIterator& iterator) const BufferCoord Buffer::line_and_column_at(const BufferIterator& iterator) const
{ {
BufferCoord result; return iterator.m_coord;
if (not m_lines.empty())
{
result.line = line_at(iterator);
result.column = iterator.m_position - m_lines[result.line];
}
return result;
} }
BufferPos Buffer::line_at(const BufferIterator& iterator) const BufferPos Buffer::line_at(const BufferIterator& iterator) const
{ {
auto it = std::upper_bound(m_lines.begin(), m_lines.end(), return iterator.line();
iterator.m_position);
return it - m_lines.begin() - 1;
} }
BufferSize Buffer::line_length(BufferPos line) const BufferSize Buffer::line_length(BufferPos line) const
{ {
assert(not m_lines.empty()); assert(line < line_count());
BufferPos end = (line < m_lines.size() - 1) ? BufferPos end = (line < m_lines.size() - 1) ?
m_lines[line + 1] : length(); m_lines[line + 1].start : length();
return end - m_lines[line]; return end - m_lines[line].start;
} }
BufferCoord Buffer::clamp(const BufferCoord& line_and_column) const BufferCoord Buffer::clamp(const BufferCoord& line_and_column) const
@ -87,36 +75,39 @@ BufferCoord Buffer::clamp(const BufferCoord& line_and_column) const
BufferCoord result(line_and_column.line, line_and_column.column); BufferCoord result(line_and_column.line, line_and_column.column);
result.line = Kakoune::clamp<int>(0, m_lines.size() - 1, result.line); result.line = Kakoune::clamp<int>(0, m_lines.size() - 1, result.line);
int max_col = std::max(0, line_length(result.line)-2); int max_col = std::max(0, line_length(result.line) - 2);
result.column = Kakoune::clamp<int>(0, max_col, result.column); result.column = Kakoune::clamp<int>(0, max_col, result.column);
return result; return result;
} }
BufferIterator Buffer::iterator_at_line_begin(const BufferIterator& iterator) const BufferIterator Buffer::iterator_at_line_begin(const BufferIterator& iterator) const
{ {
return BufferIterator(*this, m_lines[line_at(iterator)]); return BufferIterator(*this, { iterator.line(), 0 });
} }
BufferIterator Buffer::iterator_at_line_end(const BufferIterator& iterator) const BufferIterator Buffer::iterator_at_line_end(const BufferIterator& iterator) const
{ {
BufferPos line = line_at(iterator) + 1; BufferPos line = iterator.line();
return line < m_lines.size() ? BufferIterator(*this, m_lines[line]) : end(); return ++BufferIterator(*this, { line, std::max(line_length(line) - 1, 0) });
} }
BufferIterator Buffer::begin() const BufferIterator Buffer::begin() const
{ {
return BufferIterator(*this, 0); return BufferIterator(*this, { 0, 0 });
} }
BufferIterator Buffer::end() const BufferIterator Buffer::end() const
{ {
return BufferIterator(*this, length()); if (m_lines.empty())
return BufferIterator(*this, { 0, 0 });
return BufferIterator(*this, { (int)line_count()-1, (int)m_lines.back().content.length() });
} }
BufferSize Buffer::length() const BufferSize Buffer::length() const
{ {
return m_content.size(); if (m_lines.empty())
return 0;
return m_lines.back().start + m_lines.back().content.length();
} }
BufferSize Buffer::line_count() const BufferSize Buffer::line_count() const
@ -126,7 +117,18 @@ BufferSize Buffer::line_count() const
String Buffer::string(const BufferIterator& begin, const BufferIterator& end) const String Buffer::string(const BufferIterator& begin, const BufferIterator& end) const
{ {
return m_content.substr(begin.m_position, end - begin); String res;
for (BufferPos line = begin.line(); line <= end.line(); ++line)
{
size_t start = 0;
if (line == begin.line())
start = begin.column();
size_t count = -1;
if (line == end.line())
count = end.column() - start;
res += m_lines[line].content.substr(start, count);
}
return res;
} }
void Buffer::begin_undo_group() void Buffer::begin_undo_group()
@ -182,79 +184,115 @@ bool Buffer::redo()
++m_history_cursor; ++m_history_cursor;
} }
void Buffer::update_lines(const Modification& modification) void Buffer::check_invariant() const
{ {
size_t length = modification.content.length(); BufferSize start = 0;
const BufferPos pos = modification.position.m_position; for (auto& line : m_lines)
const BufferPos endpos = pos + length;
// find the first line beginning after modification position
auto line_it = std::upper_bound(m_lines.begin(), m_lines.end(), pos);
if (modification.type == Modification::Insert)
{ {
// all following lines advanced by length assert(line.start == start);
for (auto it = line_it; it != m_lines.end(); ++it) start += line.content.length();
*it += length;
std::vector<BufferPos> new_lines;
// if we inserted at the end of the buffer, we may have created a new
// line without inserting a '\n'
if (endpos == m_content.size() and
(pos == 0 or m_content[pos-1] == '\n'))
new_lines.push_back(pos);
// every \n inserted that was not the last buffer character created a
// new line
for (BufferPos i = pos; i < endpos and i + 1 < m_content.size(); ++i)
{
if (m_content[i] == '\n')
new_lines.push_back(i + 1);
}
m_lines.insert(line_it, new_lines.begin(), new_lines.end());
} }
else if (modification.type == Modification::Erase) }
void Buffer::insert(const BufferIterator& pos, const String& content)
{
BufferSize offset = pos.offset();
// all following lines advanced by length
for (size_t i = pos.line()+1; i < line_count(); ++i)
m_lines[i].start += content.length();
// if we inserted at the end of the buffer, we may have created a new
// line without inserting a '\n'
if (pos == end() and (pos == begin() or *(pos-1) == '\n'))
{ {
// find the first line beginning after endpos int start = 0;
auto end = std::upper_bound(line_it, m_lines.end(), endpos); for (int i = 0; i < content.length(); ++i)
// all the lines until the end moved back by length
for (auto it = end; it != m_lines.end(); ++it)
{ {
*it -= length; if (content[i] == '\n')
assert(m_content[(*it)-1] == '\n'); {
m_lines.push_back({ offset + start, content.substr(start, i + 1 - start) });
start = i + 1;
}
} }
if (start != content.length())
// if we erased from the beginning of a line until the end of m_lines.push_back({ offset + start, content.substr(start) });
// the buffer, that line also needs to be erased
if (pos == m_content.size() and *(line_it-1) == pos)
--line_it;
m_lines.erase(line_it, end);
} }
else else
assert(false); {
String prefix = m_lines[pos.line()].content.substr(0, pos.column());
String suffix = m_lines[pos.line()].content.substr(pos.column());
auto line_it = m_lines.begin() + pos.line();
line_it = m_lines.erase(line_it);
int start = 0;
for (int i = 0; i < content.length(); ++i)
{
if (content[i] == '\n')
{
String line_content = content.substr(start, i + 1 - start);
if (start == 0)
{
line_content = prefix + line_content;
line_it = m_lines.insert(line_it, { offset + start - (int)prefix.length(),
std::move(line_content) });
}
else
line_it = m_lines.insert(line_it, { offset + start,
std::move(line_content) });
++line_it;
start = i + 1;
}
}
if (start == 0)
m_lines.insert(line_it, { offset + start - (int)prefix.length(), prefix + content + suffix });
else
m_lines.insert(line_it, { offset + start, content.substr(start) + suffix });
}
check_invariant();
}
void Buffer::erase(const BufferIterator& pos, BufferSize length)
{
BufferIterator end = pos + length;
String prefix = m_lines[pos.line()].content.substr(0, pos.column());
String suffix = m_lines[end.line()].content.substr(end.column());
Line new_line = { m_lines[pos.line()].start, prefix + suffix };
m_lines.erase(m_lines.begin() + pos.line(), m_lines.begin() + end.line() + 1);
m_lines.insert(m_lines.begin() + pos.line(), new_line);
for (size_t i = pos.line()+1; i < line_count(); ++i)
m_lines[i].start -= length;
check_invariant();
} }
void Buffer::apply_modification(const Modification& modification) void Buffer::apply_modification(const Modification& modification)
{ {
const String& content = modification.content;
const BufferIterator& pos = modification.position;
switch (modification.type) switch (modification.type)
{ {
case Modification::Insert: case Modification::Insert:
m_content.insert(modification.position.m_position, insert(modification.position, modification.content);
modification.content);
break; break;
case Modification::Erase: case Modification::Erase:
{ {
size_t size = modification.content.size(); size_t size = modification.content.size();
assert(string(modification.position, modification.position + size) assert(string(modification.position, modification.position + size)
== modification.content); == modification.content);
m_content.erase(modification.position.m_position, size); erase(modification.position, size);
break; break;
} }
default: default:
assert(false); assert(false);
} }
update_lines(modification);
for (auto listener : m_modification_listeners) for (auto listener : m_modification_listeners)
listener->on_modification(modification); listener->on_modification(modification);
} }

View File

@ -12,6 +12,7 @@ namespace Kakoune
{ {
class Buffer; class Buffer;
class Modification;
class Window; class Window;
typedef int BufferPos; typedef int BufferPos;
@ -39,8 +40,8 @@ public:
typedef const value_type& reference; typedef const value_type& reference;
typedef std::bidirectional_iterator_tag iterator_category; typedef std::bidirectional_iterator_tag iterator_category;
BufferIterator() : m_buffer(NULL), m_position(0) {} BufferIterator() : m_buffer(NULL) {}
BufferIterator(const Buffer& buffer, BufferPos position); BufferIterator(const Buffer& buffer, BufferCoord coord);
BufferIterator& operator=(const BufferIterator& iterator); BufferIterator& operator=(const BufferIterator& iterator);
bool operator== (const BufferIterator& iterator) const; bool operator== (const BufferIterator& iterator) const;
@ -66,11 +67,17 @@ public:
bool is_end() const; bool is_end() const;
bool is_valid() const; bool is_valid() const;
void update(const Modification& modification);
const Buffer& buffer() const; const Buffer& buffer() const;
private: private:
BufferSize line() const { return m_coord.line; }
BufferSize column() const { return m_coord.column; }
BufferSize offset() const;
const Buffer* m_buffer; const Buffer* m_buffer;
BufferPos m_position; BufferCoord m_coord;
friend class Buffer; friend class Buffer;
}; };
@ -145,8 +152,6 @@ public:
const std::string& name() const { return m_name; } const std::string& name() const { return m_name; }
const String& content() const { return m_content; }
Window* get_or_create_window(); Window* get_or_create_window();
void delete_window(Window* window); void delete_window(Window* window);
@ -166,17 +171,26 @@ public:
// not on the last one) // not on the last one)
BufferIterator iterator_at_line_end(const BufferIterator& iterator) const; BufferIterator iterator_at_line_end(const BufferIterator& iterator) const;
const String& line_content(size_t l) const { return m_lines[l].content; }
private: private:
friend class BufferIterator; friend class BufferIterator;
std::vector<BufferPos> m_lines; void check_invariant() const;
void update_lines(const Modification& modification);
struct Line
{
BufferPos start;
String content;
};
std::vector<Line> m_lines;
void insert(const BufferIterator& pos, const String& content);
void erase(const BufferIterator& pos, BufferSize length);
BufferPos line_at(const BufferIterator& iterator) const; BufferPos line_at(const BufferIterator& iterator) const;
BufferSize line_length(BufferPos line) const; BufferSize line_length(BufferPos line) const;
String m_content;
std::string m_name; std::string m_name;
const Type m_type; const Type m_type;

View File

@ -6,10 +6,10 @@
namespace Kakoune namespace Kakoune
{ {
inline BufferIterator::BufferIterator(const Buffer& buffer, BufferPos position) inline BufferIterator::BufferIterator(const Buffer& buffer, BufferCoord coord)
: m_buffer(&buffer), : m_buffer(&buffer), m_coord(coord)
m_position(std::max(0, std::min(position, (BufferPos)buffer.length())))
{ {
assert(is_valid());
} }
inline const Buffer& BufferIterator::buffer() const inline const Buffer& BufferIterator::buffer() const
@ -20,90 +20,175 @@ inline const Buffer& BufferIterator::buffer() const
inline bool BufferIterator::is_valid() const inline bool BufferIterator::is_valid() const
{ {
return m_buffer; return m_buffer and
((line() < m_buffer->line_count() and
column() < m_buffer->m_lines[line()].content.length()) or
((line() == m_buffer->line_count() and column() == 0)) or
(line() == m_buffer->line_count() - 1 and
column() == m_buffer->m_lines.back().content.length()));
} }
inline BufferIterator& BufferIterator::operator=(const BufferIterator& iterator) inline BufferIterator& BufferIterator::operator=(const BufferIterator& iterator)
{ {
m_buffer = iterator.m_buffer; m_buffer = iterator.m_buffer;
m_position = iterator.m_position; m_coord = iterator.m_coord;
return *this; return *this;
} }
inline bool BufferIterator::operator==(const BufferIterator& iterator) const inline bool BufferIterator::operator==(const BufferIterator& iterator) const
{ {
assert(m_buffer == iterator.m_buffer); assert(m_buffer == iterator.m_buffer);
return (m_position == iterator.m_position); return (m_coord == iterator.m_coord);
} }
inline bool BufferIterator::operator!=(const BufferIterator& iterator) const inline bool BufferIterator::operator!=(const BufferIterator& iterator) const
{ {
assert(m_buffer == iterator.m_buffer); assert(m_buffer == iterator.m_buffer);
return (m_position != iterator.m_position); return (m_coord != iterator.m_coord);
} }
inline bool BufferIterator::operator<(const BufferIterator& iterator) const inline bool BufferIterator::operator<(const BufferIterator& iterator) const
{ {
assert(m_buffer == iterator.m_buffer); assert(m_buffer == iterator.m_buffer);
return (m_position < iterator.m_position); return (m_coord < iterator.m_coord);
} }
inline bool BufferIterator::operator<=(const BufferIterator& iterator) const inline bool BufferIterator::operator<=(const BufferIterator& iterator) const
{ {
assert(m_buffer == iterator.m_buffer); assert(m_buffer == iterator.m_buffer);
return (m_position <= iterator.m_position); return (m_coord <= iterator.m_coord);
} }
inline bool BufferIterator::operator>(const BufferIterator& iterator) const inline bool BufferIterator::operator>(const BufferIterator& iterator) const
{ {
assert(m_buffer == iterator.m_buffer); assert(m_buffer == iterator.m_buffer);
return (m_position > iterator.m_position); return (m_coord > iterator.m_coord);
} }
inline bool BufferIterator::operator>=(const BufferIterator& iterator) const inline bool BufferIterator::operator>=(const BufferIterator& iterator) const
{ {
assert(m_buffer == iterator.m_buffer); assert(m_buffer == iterator.m_buffer);
return (m_position >= iterator.m_position); return (m_coord >= iterator.m_coord);
}
inline BufferCoord measure_string(const String& string)
{
BufferCoord res;
for (auto c : string)
{
if (c == '\n')
{
++res.line;
res.column = 0;
}
else
++res.column;
}
return res;
}
inline BufferCoord advance(const BufferCoord& base, const BufferCoord& offset)
{
if (offset.line == 0)
return BufferCoord{base.line, base.column + offset.column};
else
return BufferCoord{base.line + offset.line, offset.column};
}
inline void BufferIterator::update(const Modification& modification)
{
const BufferIterator& pos = modification.position;
if (*this < pos)
return;
BufferCoord measure = measure_string(modification.content);
if (modification.type == Modification::Erase)
{
BufferCoord end = advance(pos.m_coord, measure);
if (m_coord <= end)
m_coord = pos.m_coord;
else
{
m_coord.line -= measure.line;
if (end.line == m_coord.line)
m_coord.column -= measure.column;
}
if (is_end())
operator--();
}
else
{
assert(modification.type == Modification::Insert);
if (pos.line() == line())
{
BufferCoord end = advance(pos.m_coord, measure);
m_coord.column = end.column + column() - pos.column();
}
m_coord.line += measure.line;
}
assert(is_valid());
} }
inline BufferChar BufferIterator::operator*() const inline BufferChar BufferIterator::operator*() const
{ {
assert(m_buffer); assert(m_buffer);
return m_buffer->m_content[m_position]; return m_buffer->m_lines[line()].content[column()];
}
inline BufferSize BufferIterator::offset() const
{
assert(m_buffer);
return line() == 0 ? column() : m_buffer->m_lines[line()].start + column();
} }
inline BufferSize BufferIterator::operator-(const BufferIterator& iterator) const inline BufferSize BufferIterator::operator-(const BufferIterator& iterator) const
{ {
assert(m_buffer == iterator.m_buffer); assert(m_buffer == iterator.m_buffer);
return m_position - iterator.m_position; return offset() - iterator.offset();
} }
inline BufferIterator BufferIterator::operator+(BufferSize size) const inline BufferIterator BufferIterator::operator+(BufferSize size) const
{ {
assert(m_buffer); assert(m_buffer);
return BufferIterator(*m_buffer, m_position + size); if (size >= 0)
{
BufferSize o = std::min(m_buffer->length(), offset() + size);
for (int i = line() + 1; i < m_buffer->line_count(); ++i)
{
if (m_buffer->m_lines[i].start > o)
return BufferIterator(*m_buffer, { i-1, o - m_buffer->m_lines[i-1].start });
}
int last_line = m_buffer->line_count() - 1;
return BufferIterator(*m_buffer, { last_line, o - m_buffer->m_lines[last_line].start });
}
return operator-(-size);
} }
inline BufferIterator BufferIterator::operator-(BufferSize size) const inline BufferIterator BufferIterator::operator-(BufferSize size) const
{ {
assert(m_buffer); assert(m_buffer);
return BufferIterator(*m_buffer, m_position - size); if (size >= 0)
{
BufferSize o = std::max(0, offset() - size);
for (int i = line(); i >= 0; --i)
{
if (m_buffer->m_lines[i].start <= o)
return BufferIterator(*m_buffer, { i, o - m_buffer->m_lines[i].start });
}
assert(false);
}
return operator+(-size);
} }
inline BufferIterator& BufferIterator::operator+=(BufferSize size) inline BufferIterator& BufferIterator::operator+=(BufferSize size)
{ {
assert(m_buffer); return *this = (*this + size);
m_position = std::max(0, std::min((BufferSize)m_position + size,
m_buffer->length()));
return *this;
} }
inline BufferIterator& BufferIterator::operator-=(BufferSize size) inline BufferIterator& BufferIterator::operator-=(BufferSize size)
{ {
assert(m_buffer); return *this = (*this - size);
m_position = std::max(0, std::min((BufferSize)m_position - size,
m_buffer->length()));
return *this;
} }
inline BufferIterator& BufferIterator::operator++() inline BufferIterator& BufferIterator::operator++()
@ -119,13 +204,13 @@ inline BufferIterator& BufferIterator::operator--()
inline bool BufferIterator::is_begin() const inline bool BufferIterator::is_begin() const
{ {
assert(m_buffer); assert(m_buffer);
return m_position == 0; return m_coord.line == 0 and m_coord.column == 0;
} }
inline bool BufferIterator::is_end() const inline bool BufferIterator::is_end() const
{ {
assert(m_buffer); assert(m_buffer);
return m_position == m_buffer->length(); return offset() == m_buffer->length();
} }
} }

View File

@ -88,18 +88,21 @@ void write_buffer_to_file(const Buffer& buffer, const std::string& filename)
if (fd == -1) if (fd == -1)
throw file_access_error(filename, strerror(errno)); throw file_access_error(filename, strerror(errno));
const String& content = buffer.content(); for (size_t i = 0; i < buffer.line_count(); ++i)
ssize_t count = content.length() * sizeof(BufferChar);
const char* ptr = content.c_str();
while (count)
{ {
ssize_t written = write(fd, ptr, count); const String& content = buffer.line_content(i);
ptr += written; ssize_t count = content.length() * sizeof(BufferChar);
count -= written; const char* ptr = content.c_str();
if (written == -1) while (count)
throw file_access_error(filename, strerror(errno)); {
ssize_t written = write(fd, ptr, count);
ptr += written;
count -= written;
if (written == -1)
throw file_access_error(filename, strerror(errno));
}
} }
close(fd); close(fd);
} }

View File

@ -54,36 +54,10 @@ void Selection::merge_with(const Selection& selection)
m_last = selection.m_last; m_last = selection.m_last;
} }
static void update_iterator(const Modification& modification,
BufferIterator& iterator)
{
if (iterator < modification.position)
return;
size_t length = modification.content.length();
if (modification.type == Modification::Erase)
{
// do not move length on the other side of the inequality,
// as modification.position + length may be after buffer end
if (iterator - length <= modification.position)
iterator = modification.position;
else
iterator -= length;
if (iterator.is_end())
--iterator;
}
else
{
assert(modification.type == Modification::Insert);
iterator += length;
}
}
void Selection::on_modification(const Modification& modification) void Selection::on_modification(const Modification& modification)
{ {
update_iterator(modification, m_first); m_first.update(modification);
update_iterator(modification, m_last); m_last.update(modification);
} }
void Selection::register_with_buffer() void Selection::register_with_buffer()