From 0ba7c7286dd97d7d3d7c03bccf3ea98389f0e273 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Fri, 30 Mar 2012 11:37:18 +0000 Subject: [PATCH] Store buffer content in a list of lines Instead of a big std::string, buffer now store it's content in a list of lines. In order to achieve O(log(n)) random access, lines contains both their content and their offset since the start of the file, making binary search usable. BufferIterator now have a LineAndColumn coordinate into the buffer instead of an offset so that access is still O(1). --- gdb/kakoune.py | 4 +- src/buffer.cc | 186 ++++++++++++++++++++++--------------- src/buffer.hh | 32 +++++-- src/buffer_iterator.inl.hh | 135 ++++++++++++++++++++++----- src/file.cc | 23 +++-- src/selection.cc | 30 +----- 6 files changed, 262 insertions(+), 148 deletions(-) diff --git a/gdb/kakoune.py b/gdb/kakoune.py index f9ef11f3..fb284864 100644 --- a/gdb/kakoune.py +++ b/gdb/kakoune.py @@ -51,9 +51,9 @@ class BufferIterator: def to_string(self): if self.val['m_buffer'] != 0: - return "buffer<%s>@%d" % (self.val['m_buffer'].dereference()['m_name'], self.val['m_position']) + return "buffer<%s>@(%d, %d)" % (self.val['m_buffer'].dereference()['m_name'], self.val['m_coord']['line'], self.val['m_coord']['column']) else: - return "buffer@%s" % (self.val['m_position']) + return "buffer@(%d, %d)" % (self.val['m_coord']['line'], self.val['m_coord']['column']) def build_pretty_printer(): pp = gdb.printing.RegexpCollectionPrettyPrinter("kakoune") diff --git a/src/buffer.cc b/src/buffer.cc index 824006df..75416712 100644 --- a/src/buffer.cc +++ b/src/buffer.cc @@ -47,37 +47,25 @@ Buffer::~Buffer() BufferIterator Buffer::iterator_at(const BufferCoord& line_and_column) const { - if (m_lines.empty()) - return begin(); - - BufferCoord clamped = clamp(line_and_column); - return BufferIterator(*this, m_lines[clamped.line] + clamped.column); + return BufferIterator(*this, clamp(line_and_column)); } BufferCoord Buffer::line_and_column_at(const BufferIterator& iterator) const { - BufferCoord result; - if (not m_lines.empty()) - { - result.line = line_at(iterator); - result.column = iterator.m_position - m_lines[result.line]; - } - return result; + return iterator.m_coord; } BufferPos Buffer::line_at(const BufferIterator& iterator) const { - auto it = std::upper_bound(m_lines.begin(), m_lines.end(), - iterator.m_position); - return it - m_lines.begin() - 1; + return iterator.line(); } BufferSize Buffer::line_length(BufferPos line) const { - assert(not m_lines.empty()); + assert(line < line_count()); BufferPos end = (line < m_lines.size() - 1) ? - m_lines[line + 1] : length(); - return end - m_lines[line]; + m_lines[line + 1].start : length(); + return end - m_lines[line].start; } BufferCoord Buffer::clamp(const BufferCoord& line_and_column) const @@ -87,36 +75,39 @@ BufferCoord Buffer::clamp(const BufferCoord& line_and_column) const BufferCoord result(line_and_column.line, line_and_column.column); result.line = Kakoune::clamp(0, m_lines.size() - 1, result.line); - int max_col = std::max(0, line_length(result.line)-2); + int max_col = std::max(0, line_length(result.line) - 2); result.column = Kakoune::clamp(0, max_col, result.column); return result; } BufferIterator Buffer::iterator_at_line_begin(const BufferIterator& iterator) const { - return BufferIterator(*this, m_lines[line_at(iterator)]); + return BufferIterator(*this, { iterator.line(), 0 }); } BufferIterator Buffer::iterator_at_line_end(const BufferIterator& iterator) const { - BufferPos line = line_at(iterator) + 1; - return line < m_lines.size() ? BufferIterator(*this, m_lines[line]) : end(); + BufferPos line = iterator.line(); + return ++BufferIterator(*this, { line, std::max(line_length(line) - 1, 0) }); } - BufferIterator Buffer::begin() const { - return BufferIterator(*this, 0); + return BufferIterator(*this, { 0, 0 }); } BufferIterator Buffer::end() const { - return BufferIterator(*this, length()); + if (m_lines.empty()) + return BufferIterator(*this, { 0, 0 }); + return BufferIterator(*this, { (int)line_count()-1, (int)m_lines.back().content.length() }); } BufferSize Buffer::length() const { - return m_content.size(); + if (m_lines.empty()) + return 0; + return m_lines.back().start + m_lines.back().content.length(); } BufferSize Buffer::line_count() const @@ -126,7 +117,18 @@ BufferSize Buffer::line_count() const String Buffer::string(const BufferIterator& begin, const BufferIterator& end) const { - return m_content.substr(begin.m_position, end - begin); + String res; + for (BufferPos line = begin.line(); line <= end.line(); ++line) + { + size_t start = 0; + if (line == begin.line()) + start = begin.column(); + size_t count = -1; + if (line == end.line()) + count = end.column() - start; + res += m_lines[line].content.substr(start, count); + } + return res; } void Buffer::begin_undo_group() @@ -182,79 +184,115 @@ bool Buffer::redo() ++m_history_cursor; } -void Buffer::update_lines(const Modification& modification) +void Buffer::check_invariant() const { - size_t length = modification.content.length(); - const BufferPos pos = modification.position.m_position; - const BufferPos endpos = pos + length; - - // find the first line beginning after modification position - auto line_it = std::upper_bound(m_lines.begin(), m_lines.end(), pos); - - if (modification.type == Modification::Insert) + BufferSize start = 0; + for (auto& line : m_lines) { - // all following lines advanced by length - for (auto it = line_it; it != m_lines.end(); ++it) - *it += length; - - std::vector new_lines; - // if we inserted at the end of the buffer, we may have created a new - // line without inserting a '\n' - if (endpos == m_content.size() and - (pos == 0 or m_content[pos-1] == '\n')) - new_lines.push_back(pos); - - // every \n inserted that was not the last buffer character created a - // new line - for (BufferPos i = pos; i < endpos and i + 1 < m_content.size(); ++i) - { - if (m_content[i] == '\n') - new_lines.push_back(i + 1); - } - m_lines.insert(line_it, new_lines.begin(), new_lines.end()); + assert(line.start == start); + start += line.content.length(); } - else if (modification.type == Modification::Erase) +} + +void Buffer::insert(const BufferIterator& pos, const String& content) +{ + BufferSize offset = pos.offset(); + + // all following lines advanced by length + for (size_t i = pos.line()+1; i < line_count(); ++i) + m_lines[i].start += content.length(); + + // if we inserted at the end of the buffer, we may have created a new + // line without inserting a '\n' + if (pos == end() and (pos == begin() or *(pos-1) == '\n')) { - // find the first line beginning after endpos - auto end = std::upper_bound(line_it, m_lines.end(), endpos); - - // all the lines until the end moved back by length - for (auto it = end; it != m_lines.end(); ++it) + int start = 0; + for (int i = 0; i < content.length(); ++i) { - *it -= length; - assert(m_content[(*it)-1] == '\n'); + if (content[i] == '\n') + { + m_lines.push_back({ offset + start, content.substr(start, i + 1 - start) }); + start = i + 1; + } } - - // if we erased from the beginning of a line until the end of - // the buffer, that line also needs to be erased - if (pos == m_content.size() and *(line_it-1) == pos) - --line_it; - m_lines.erase(line_it, end); + if (start != content.length()) + m_lines.push_back({ offset + start, content.substr(start) }); } else - assert(false); + { + String prefix = m_lines[pos.line()].content.substr(0, pos.column()); + String suffix = m_lines[pos.line()].content.substr(pos.column()); + + auto line_it = m_lines.begin() + pos.line(); + line_it = m_lines.erase(line_it); + + int start = 0; + for (int i = 0; i < content.length(); ++i) + { + if (content[i] == '\n') + { + String line_content = content.substr(start, i + 1 - start); + if (start == 0) + { + line_content = prefix + line_content; + line_it = m_lines.insert(line_it, { offset + start - (int)prefix.length(), + std::move(line_content) }); + } + else + line_it = m_lines.insert(line_it, { offset + start, + std::move(line_content) }); + + ++line_it; + start = i + 1; + } + } + if (start == 0) + m_lines.insert(line_it, { offset + start - (int)prefix.length(), prefix + content + suffix }); + else + m_lines.insert(line_it, { offset + start, content.substr(start) + suffix }); + } + + check_invariant(); +} + +void Buffer::erase(const BufferIterator& pos, BufferSize length) +{ + BufferIterator end = pos + length; + String prefix = m_lines[pos.line()].content.substr(0, pos.column()); + String suffix = m_lines[end.line()].content.substr(end.column()); + Line new_line = { m_lines[pos.line()].start, prefix + suffix }; + + m_lines.erase(m_lines.begin() + pos.line(), m_lines.begin() + end.line() + 1); + m_lines.insert(m_lines.begin() + pos.line(), new_line); + + for (size_t i = pos.line()+1; i < line_count(); ++i) + m_lines[i].start -= length; + + check_invariant(); } void Buffer::apply_modification(const Modification& modification) { + const String& content = modification.content; + const BufferIterator& pos = modification.position; + switch (modification.type) { case Modification::Insert: - m_content.insert(modification.position.m_position, - modification.content); + insert(modification.position, modification.content); break; case Modification::Erase: { size_t size = modification.content.size(); assert(string(modification.position, modification.position + size) == modification.content); - m_content.erase(modification.position.m_position, size); + erase(modification.position, size); break; } default: assert(false); } - update_lines(modification); + for (auto listener : m_modification_listeners) listener->on_modification(modification); } diff --git a/src/buffer.hh b/src/buffer.hh index 60e6f7e2..526110fd 100644 --- a/src/buffer.hh +++ b/src/buffer.hh @@ -12,6 +12,7 @@ namespace Kakoune { class Buffer; +class Modification; class Window; typedef int BufferPos; @@ -39,8 +40,8 @@ public: typedef const value_type& reference; typedef std::bidirectional_iterator_tag iterator_category; - BufferIterator() : m_buffer(NULL), m_position(0) {} - BufferIterator(const Buffer& buffer, BufferPos position); + BufferIterator() : m_buffer(NULL) {} + BufferIterator(const Buffer& buffer, BufferCoord coord); BufferIterator& operator=(const BufferIterator& iterator); bool operator== (const BufferIterator& iterator) const; @@ -66,11 +67,17 @@ public: bool is_end() const; bool is_valid() const; + void update(const Modification& modification); + const Buffer& buffer() const; private: + BufferSize line() const { return m_coord.line; } + BufferSize column() const { return m_coord.column; } + BufferSize offset() const; + const Buffer* m_buffer; - BufferPos m_position; + BufferCoord m_coord; friend class Buffer; }; @@ -145,8 +152,6 @@ public: const std::string& name() const { return m_name; } - const String& content() const { return m_content; } - Window* get_or_create_window(); void delete_window(Window* window); @@ -166,17 +171,26 @@ public: // not on the last one) BufferIterator iterator_at_line_end(const BufferIterator& iterator) const; + const String& line_content(size_t l) const { return m_lines[l].content; } + private: friend class BufferIterator; - std::vector m_lines; - void update_lines(const Modification& modification); + void check_invariant() const; + + struct Line + { + BufferPos start; + String content; + }; + std::vector m_lines; + + void insert(const BufferIterator& pos, const String& content); + void erase(const BufferIterator& pos, BufferSize length); BufferPos line_at(const BufferIterator& iterator) const; BufferSize line_length(BufferPos line) const; - String m_content; - std::string m_name; const Type m_type; diff --git a/src/buffer_iterator.inl.hh b/src/buffer_iterator.inl.hh index d7def812..fa9e8f67 100644 --- a/src/buffer_iterator.inl.hh +++ b/src/buffer_iterator.inl.hh @@ -6,10 +6,10 @@ namespace Kakoune { -inline BufferIterator::BufferIterator(const Buffer& buffer, BufferPos position) - : m_buffer(&buffer), - m_position(std::max(0, std::min(position, (BufferPos)buffer.length()))) +inline BufferIterator::BufferIterator(const Buffer& buffer, BufferCoord coord) + : m_buffer(&buffer), m_coord(coord) { + assert(is_valid()); } inline const Buffer& BufferIterator::buffer() const @@ -20,90 +20,175 @@ inline const Buffer& BufferIterator::buffer() const inline bool BufferIterator::is_valid() const { - return m_buffer; + return m_buffer and + ((line() < m_buffer->line_count() and + column() < m_buffer->m_lines[line()].content.length()) or + ((line() == m_buffer->line_count() and column() == 0)) or + (line() == m_buffer->line_count() - 1 and + column() == m_buffer->m_lines.back().content.length())); } inline BufferIterator& BufferIterator::operator=(const BufferIterator& iterator) { m_buffer = iterator.m_buffer; - m_position = iterator.m_position; + m_coord = iterator.m_coord; return *this; } inline bool BufferIterator::operator==(const BufferIterator& iterator) const { assert(m_buffer == iterator.m_buffer); - return (m_position == iterator.m_position); + return (m_coord == iterator.m_coord); } inline bool BufferIterator::operator!=(const BufferIterator& iterator) const { assert(m_buffer == iterator.m_buffer); - return (m_position != iterator.m_position); + return (m_coord != iterator.m_coord); } inline bool BufferIterator::operator<(const BufferIterator& iterator) const { assert(m_buffer == iterator.m_buffer); - return (m_position < iterator.m_position); + return (m_coord < iterator.m_coord); } inline bool BufferIterator::operator<=(const BufferIterator& iterator) const { assert(m_buffer == iterator.m_buffer); - return (m_position <= iterator.m_position); + return (m_coord <= iterator.m_coord); } inline bool BufferIterator::operator>(const BufferIterator& iterator) const { assert(m_buffer == iterator.m_buffer); - return (m_position > iterator.m_position); + return (m_coord > iterator.m_coord); } inline bool BufferIterator::operator>=(const BufferIterator& iterator) const { assert(m_buffer == iterator.m_buffer); - return (m_position >= iterator.m_position); + return (m_coord >= iterator.m_coord); +} + +inline BufferCoord measure_string(const String& string) +{ + BufferCoord res; + for (auto c : string) + { + if (c == '\n') + { + ++res.line; + res.column = 0; + } + else + ++res.column; + } + return res; +} + +inline BufferCoord advance(const BufferCoord& base, const BufferCoord& offset) +{ + if (offset.line == 0) + return BufferCoord{base.line, base.column + offset.column}; + else + return BufferCoord{base.line + offset.line, offset.column}; +} + +inline void BufferIterator::update(const Modification& modification) +{ + const BufferIterator& pos = modification.position; + if (*this < pos) + return; + + BufferCoord measure = measure_string(modification.content); + if (modification.type == Modification::Erase) + { + BufferCoord end = advance(pos.m_coord, measure); + if (m_coord <= end) + m_coord = pos.m_coord; + else + { + m_coord.line -= measure.line; + if (end.line == m_coord.line) + m_coord.column -= measure.column; + } + + if (is_end()) + operator--(); + } + else + { + assert(modification.type == Modification::Insert); + if (pos.line() == line()) + { + BufferCoord end = advance(pos.m_coord, measure); + m_coord.column = end.column + column() - pos.column(); + } + m_coord.line += measure.line; + } + assert(is_valid()); } inline BufferChar BufferIterator::operator*() const { assert(m_buffer); - return m_buffer->m_content[m_position]; + return m_buffer->m_lines[line()].content[column()]; +} + +inline BufferSize BufferIterator::offset() const +{ + assert(m_buffer); + return line() == 0 ? column() : m_buffer->m_lines[line()].start + column(); } inline BufferSize BufferIterator::operator-(const BufferIterator& iterator) const { assert(m_buffer == iterator.m_buffer); - return m_position - iterator.m_position; + return offset() - iterator.offset(); } inline BufferIterator BufferIterator::operator+(BufferSize size) const { assert(m_buffer); - return BufferIterator(*m_buffer, m_position + size); + if (size >= 0) + { + BufferSize o = std::min(m_buffer->length(), offset() + size); + for (int i = line() + 1; i < m_buffer->line_count(); ++i) + { + if (m_buffer->m_lines[i].start > o) + return BufferIterator(*m_buffer, { i-1, o - m_buffer->m_lines[i-1].start }); + } + int last_line = m_buffer->line_count() - 1; + return BufferIterator(*m_buffer, { last_line, o - m_buffer->m_lines[last_line].start }); + } + return operator-(-size); } inline BufferIterator BufferIterator::operator-(BufferSize size) const { assert(m_buffer); - return BufferIterator(*m_buffer, m_position - size); + if (size >= 0) + { + BufferSize o = std::max(0, offset() - size); + for (int i = line(); i >= 0; --i) + { + if (m_buffer->m_lines[i].start <= o) + return BufferIterator(*m_buffer, { i, o - m_buffer->m_lines[i].start }); + } + assert(false); + } + return operator+(-size); } inline BufferIterator& BufferIterator::operator+=(BufferSize size) { - assert(m_buffer); - m_position = std::max(0, std::min((BufferSize)m_position + size, - m_buffer->length())); - return *this; + return *this = (*this + size); } inline BufferIterator& BufferIterator::operator-=(BufferSize size) { - assert(m_buffer); - m_position = std::max(0, std::min((BufferSize)m_position - size, - m_buffer->length())); - return *this; + return *this = (*this - size); } inline BufferIterator& BufferIterator::operator++() @@ -119,13 +204,13 @@ inline BufferIterator& BufferIterator::operator--() inline bool BufferIterator::is_begin() const { assert(m_buffer); - return m_position == 0; + return m_coord.line == 0 and m_coord.column == 0; } inline bool BufferIterator::is_end() const { assert(m_buffer); - return m_position == m_buffer->length(); + return offset() == m_buffer->length(); } } diff --git a/src/file.cc b/src/file.cc index 08ce8ae6..fdb50927 100644 --- a/src/file.cc +++ b/src/file.cc @@ -88,18 +88,21 @@ void write_buffer_to_file(const Buffer& buffer, const std::string& filename) if (fd == -1) throw file_access_error(filename, strerror(errno)); - const String& content = buffer.content(); - ssize_t count = content.length() * sizeof(BufferChar); - const char* ptr = content.c_str(); - - while (count) + for (size_t i = 0; i < buffer.line_count(); ++i) { - ssize_t written = write(fd, ptr, count); - ptr += written; - count -= written; + const String& content = buffer.line_content(i); + ssize_t count = content.length() * sizeof(BufferChar); + const char* ptr = content.c_str(); - if (written == -1) - throw file_access_error(filename, strerror(errno)); + while (count) + { + ssize_t written = write(fd, ptr, count); + ptr += written; + count -= written; + + if (written == -1) + throw file_access_error(filename, strerror(errno)); + } } close(fd); } diff --git a/src/selection.cc b/src/selection.cc index 1d4e1765..c34c1362 100644 --- a/src/selection.cc +++ b/src/selection.cc @@ -54,36 +54,10 @@ void Selection::merge_with(const Selection& selection) m_last = selection.m_last; } -static void update_iterator(const Modification& modification, - BufferIterator& iterator) -{ - if (iterator < modification.position) - return; - - size_t length = modification.content.length(); - if (modification.type == Modification::Erase) - { - // do not move length on the other side of the inequality, - // as modification.position + length may be after buffer end - if (iterator - length <= modification.position) - iterator = modification.position; - else - iterator -= length; - - if (iterator.is_end()) - --iterator; - } - else - { - assert(modification.type == Modification::Insert); - iterator += length; - } -} - void Selection::on_modification(const Modification& modification) { - update_iterator(modification, m_first); - update_iterator(modification, m_last); + m_first.update(modification); + m_last.update(modification); } void Selection::register_with_buffer()