From 76d806e98d5e80f7f0c233d561371b89b8724c23 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Thu, 15 Jan 2015 13:54:38 +0000 Subject: [PATCH] Replace InternedStrings with SharedString, shared_ptr based --- src/buffer.cc | 18 +++--- src/buffer.hh | 10 +-- src/commands.cc | 4 +- src/interned_string.cc | 78 ----------------------- src/interned_string.hh | 137 ----------------------------------------- src/main.cc | 3 +- src/memory.hh | 4 +- src/shared_string.cc | 45 ++++++++++++++ src/shared_string.hh | 69 +++++++++++++++++++++ src/word_db.cc | 2 +- src/word_db.hh | 6 +- 11 files changed, 138 insertions(+), 238 deletions(-) delete mode 100644 src/interned_string.cc delete mode 100644 src/interned_string.hh create mode 100644 src/shared_string.cc create mode 100644 src/shared_string.hh diff --git a/src/buffer.cc b/src/buffer.cc index a87c48de..32a909a0 100644 --- a/src/buffer.cc +++ b/src/buffer.cc @@ -6,7 +6,7 @@ #include "containers.hh" #include "context.hh" #include "file.hh" -#include "interned_string.hh" +#include "shared_string.hh" #include "utils.hh" #include "window.hh" @@ -146,9 +146,9 @@ struct Buffer::Modification Type type; ByteCoord coord; - InternedString content; + SharedString content; - Modification(Type type, ByteCoord coord, InternedString content) + Modification(Type type, ByteCoord coord, SharedString content) : type(type), coord(coord), content(std::move(content)) {} Modification inverse() const @@ -284,7 +284,7 @@ ByteCoord Buffer::do_insert(ByteCoord pos, StringView content) StringView prefix = m_lines[pos.line].substr(0, pos.column); StringView suffix = m_lines[pos.line].substr(pos.column); - Vector new_lines; + Vector new_lines; ByteCount start = 0; for (ByteCount i = 0; i < content.length(); ++i) @@ -332,7 +332,7 @@ ByteCoord Buffer::do_erase(ByteCoord begin, ByteCoord end) if (new_line.length() != 0) { m_lines.erase(m_lines.begin() + (int)begin.line, m_lines.begin() + (int)end.line); - m_lines[begin.line] = InternedString(new_line); + m_lines[begin.line] = SharedString(new_line); next = begin; } else @@ -381,11 +381,11 @@ BufferIterator Buffer::insert(const BufferIterator& pos, StringView content) if (content.empty()) return pos; - InternedString real_content; + SharedString real_content; if (pos == end() and content.back() != '\n') - real_content = InternedString(content + "\n"); + real_content = intern(content + "\n"); else - real_content = content; + real_content = intern(content); // for undo and redo purpose it is better to use one past last line rather // than one past last char coord. @@ -406,7 +406,7 @@ BufferIterator Buffer::erase(BufferIterator begin, BufferIterator end) if (not (m_flags & Flags::NoUndo)) m_current_undo_group.emplace_back(Modification::Erase, begin.coord(), - InternedString(string(begin.coord(), end.coord()))); + intern(string(begin.coord(), end.coord()))); return {*this, do_erase(begin.coord(), end.coord())}; } diff --git a/src/buffer.hh b/src/buffer.hh index f66e364f..1f7fee83 100644 --- a/src/buffer.hh +++ b/src/buffer.hh @@ -5,7 +5,7 @@ #include "flags.hh" #include "safe_ptr.hh" #include "scope.hh" -#include "interned_string.hh" +#include "shared_string.hh" #include "value.hh" #include "vector.hh" @@ -121,7 +121,7 @@ public: BufferIterator end() const; LineCount line_count() const; - const InternedString& operator[](LineCount line) const + const SharedString& operator[](LineCount line) const { return m_lines[line]; } // returns an iterator at given coordinates. clamp line_and_column @@ -166,15 +166,15 @@ private: void on_option_changed(const Option& option) override; - using LineListBase = Vector; + using LineListBase = Vector; struct LineList : LineListBase { [[gnu::always_inline]] - InternedString& operator[](LineCount line) + SharedString& operator[](LineCount line) { return LineListBase::operator[]((int)line); } [[gnu::always_inline]] - const InternedString& operator[](LineCount line) const + const SharedString& operator[](LineCount line) const { return LineListBase::operator[]((int)line); } }; LineList m_lines; diff --git a/src/commands.cc b/src/commands.cc index af6b6167..8373bddd 100644 --- a/src/commands.cc +++ b/src/commands.cc @@ -805,7 +805,7 @@ const CommandDesc debug_cmd = { PerArgumentCommandCompleter({ [](const Context& context, CompletionFlags flags, const String& prefix, ByteCount cursor_pos) -> Completions { - auto c = {"info", "buffers", "options", "memory", "interned-strings"}; + auto c = {"info", "buffers", "options", "memory", "shared-strings"}; return { 0_byte, cursor_pos, complete(prefix, cursor_pos, c) }; } }), [](const ParametersParser& parser, Context& context) @@ -842,7 +842,7 @@ const CommandDesc debug_cmd = { write_debug(" Malloced: " + to_string(mallinfo().uordblks)); #endif } - else if (parser[0] == "interned-strings") + else if (parser[0] == "shared-strings") { StringRegistry::instance().debug_stats(); } diff --git a/src/interned_string.cc b/src/interned_string.cc deleted file mode 100644 index 0755e3d4..00000000 --- a/src/interned_string.cc +++ /dev/null @@ -1,78 +0,0 @@ -#include "interned_string.hh" -#include "debug.hh" - -namespace Kakoune -{ - -void StringRegistry::debug_stats() const -{ - write_debug("Interned Strings stats:"); - write_debug(" slots: " + to_string(m_storage.size()) + " allocated, " + to_string(m_free_slots.size()) + " free"); - size_t total_refcount = 0; - size_t total_size = 0; - size_t count = 0; - for (auto& st : m_storage) - { - if (st.refcount == 0) - continue; - total_refcount += st.refcount; - total_size += st.data.size(); - ++count; - } - write_debug(" data size: " + to_string(total_size) + ", mean: " + to_string((float)total_size/count)); - write_debug(" refcounts: " + to_string(total_refcount) + ", mean: " + to_string((float)total_refcount/count)); -} - -InternedString StringRegistry::acquire(StringView str) -{ - auto it = m_slot_map.find(str); - if (it == m_slot_map.end()) - { - Slot slot; - if (not m_free_slots.empty()) - { - slot = m_free_slots.back(); - m_free_slots.pop_back(); - kak_assert(m_storage[slot].refcount == 0); - m_storage[slot] = DataAndRefCount{{str.begin(), str.end()}, 1}; - } - else - { - slot = m_storage.size(); - m_storage.push_back({{str.begin(), str.end()}, 1}); - } - // Create a new string view that point to the storage data - StringView storage_view{m_storage[slot].data.data(), (int)m_storage[slot].data.size()}; - m_slot_map[storage_view] = slot; - - return InternedString{storage_view, slot}; - } - - Slot slot = it->second; - auto& data = m_storage[slot]; - ++data.refcount; - return {{data.data.data(), (int)data.data.size()}, slot}; -} - -void StringRegistry::acquire(Slot slot) -{ - kak_assert(slot < m_storage.size()); - kak_assert(m_storage[slot].refcount > 0); - ++m_storage[slot].refcount; -} - -void StringRegistry::release(Slot slot) noexcept -{ - kak_assert(m_storage[slot].refcount > 0); - if (--m_storage[slot].refcount == 0) - { - m_free_slots.push_back(slot); - auto& data = m_storage[slot].data; - auto it = m_slot_map.find(StringView{data.data(), (int)data.size()}); - kak_assert(it != m_slot_map.end()); - m_slot_map.erase(it); - data = Vector{}; - } -} - -} diff --git a/src/interned_string.hh b/src/interned_string.hh deleted file mode 100644 index b74653b7..00000000 --- a/src/interned_string.hh +++ /dev/null @@ -1,137 +0,0 @@ -#ifndef interned_string_hh_INCLUDED -#define interned_string_hh_INCLUDED - -#include "string.hh" -#include "utils.hh" -#include "unordered_map.hh" -#include "vector.hh" - -#include - -namespace Kakoune -{ - -class InternedString; - -class StringRegistry : public Singleton -{ -public: - void debug_stats() const; -private: - friend class InternedString; - using Slot = uint32_t; - - InternedString acquire(StringView str); - void acquire(Slot slot); - void release(Slot slot) noexcept; - - UnorderedMap m_slot_map; - Vector m_free_slots; - struct DataAndRefCount - { - Vector data; - int refcount; - }; - Vector m_storage; -}; - -class InternedString : public StringView -{ -public: - InternedString() = default; - - InternedString(const InternedString& str) : InternedString(str, str.m_slot) - { - if (m_slot != -1) - StringRegistry::instance().acquire(m_slot); - } - - InternedString(InternedString&& str) noexcept : StringView(str) - { - m_slot = str.m_slot; - str.m_slot = -1; - } - - InternedString(const char* str) : StringView() { acquire_ifn(str); } - InternedString(StringView str) : StringView() { acquire_ifn(str); } - - InternedString& operator=(const InternedString& str) - { - if (str.data() == data() and str.length() == length()) - return *this; - static_cast(*this) = str; - if (str.m_slot != m_slot) - { - release_ifn(); - m_slot = str.m_slot; - if (str.m_slot != -1) - StringRegistry::instance().acquire(str.m_slot); - } - - return *this; - } - - InternedString& operator=(InternedString&& str) noexcept - { - release_ifn(); - - static_cast(*this) = str; - m_slot = str.m_slot; - str.m_slot = -1; - return *this; - } - - ~InternedString() - { - release_ifn(); - } - - InternedString acquire_substr(ByteCount from, ByteCount length = INT_MAX) const - { - if (m_slot == -1) - return InternedString{}; - StringRegistry::instance().acquire(m_slot); - return InternedString{StringView::substr(from, length), m_slot}; - } - InternedString acquire_substr(CharCount from, CharCount length = INT_MAX) const - { - if (m_slot == -1) - return InternedString{}; - StringRegistry::instance().acquire(m_slot); - return InternedString{StringView::substr(from, length), m_slot}; - } - -private: - friend class StringRegistry; - - InternedString(StringView str, StringRegistry::Slot slot) - : StringView(str), m_slot(slot) {} - - void acquire_ifn(StringView str) - { - if (str.empty()) - { - static_cast(*this) = StringView{}; - m_slot = -1; - } - else - *this = StringRegistry::instance().acquire(str); - } - - void release_ifn() noexcept - { - if (m_slot != -1) - StringRegistry::instance().release(m_slot); - } - - StringRegistry::Slot m_slot = -1; -}; - -inline size_t hash_value(const Kakoune::InternedString& str) -{ - return hash_data(str.data(), (int)str.length()); -} - -} - -#endif // interned_string_hh_INCLUDED diff --git a/src/main.cc b/src/main.cc index 15be8559..0a7a9cc6 100644 --- a/src/main.cc +++ b/src/main.cc @@ -13,7 +13,7 @@ #include "file.hh" #include "highlighters.hh" #include "insert_completer.hh" -#include "interned_string.hh" +#include "shared_string.hh" #include "ncurses_ui.hh" #include "parameters_parser.hh" #include "register_manager.hh" @@ -405,6 +405,7 @@ int run_server(StringView session, StringView init_command, client_manager.handle_pending_inputs(); client_manager.clear_mode_trashes(); buffer_manager.clear_buffer_trash(); + string_registry.purge_unused(); } { diff --git a/src/memory.hh b/src/memory.hh index f6ccb039..26b506a6 100644 --- a/src/memory.hh +++ b/src/memory.hh @@ -15,7 +15,7 @@ enum class MemoryDomain { Undefined, String, - InternedString, + SharedString, BufferContent, BufferMeta, Options, @@ -39,7 +39,7 @@ inline const char* domain_name(MemoryDomain domain) { case MemoryDomain::Undefined: return "Undefined"; case MemoryDomain::String: return "String"; - case MemoryDomain::InternedString: return "InternedString"; + case MemoryDomain::SharedString: return "SharedString"; case MemoryDomain::BufferContent: return "BufferContent"; case MemoryDomain::BufferMeta: return "BufferMeta"; case MemoryDomain::Options: return "Options"; diff --git a/src/shared_string.cc b/src/shared_string.cc new file mode 100644 index 00000000..d98affe9 --- /dev/null +++ b/src/shared_string.cc @@ -0,0 +1,45 @@ +#include "shared_string.hh" +#include "debug.hh" + +namespace Kakoune +{ + +SharedString StringRegistry::intern(StringView str) +{ + auto it = m_strings.find(str); + if (it == m_strings.end()) + { + SharedString shared_str = str; + it = m_strings.emplace(StringView{shared_str}, shared_str.m_storage).first; + } + return {*it->second, it->second}; +} + +void StringRegistry::purge_unused() +{ + for (auto it = m_strings.begin(); it != m_strings.end(); ) + { + if (it->second.unique()) + it = m_strings.erase(it); + else + ++it; + } +} + +void StringRegistry::debug_stats() const +{ + + write_debug("Shared Strings stats:"); + size_t total_refcount = 0; + size_t total_size = 0; + size_t count = m_strings.size(); + for (auto& st : m_strings) + { + total_refcount += st.second.use_count() - 1; + total_size += (int)st.second->length(); + } + write_debug(" data size: " + to_string(total_size) + ", mean: " + to_string((float)total_size/count)); + write_debug(" refcounts: " + to_string(total_refcount) + ", mean: " + to_string((float)total_refcount/count)); +} + +} diff --git a/src/shared_string.hh b/src/shared_string.hh new file mode 100644 index 00000000..50eec956 --- /dev/null +++ b/src/shared_string.hh @@ -0,0 +1,69 @@ +#ifndef shared_string_hh_INCLUDED +#define shared_string_hh_INCLUDED + +#include "string.hh" +#include "utils.hh" +#include "unordered_map.hh" + +#include + +namespace Kakoune +{ + +class SharedString : public StringView +{ +public: + using Storage = std::basic_string, + Allocator>; + SharedString() = default; + SharedString(StringView str) + { + if (not str.empty()) + { + m_storage = std::make_shared(str.begin(), str.end()); + StringView::operator=(*m_storage); + } + } + SharedString(const char* str) : SharedString(StringView{str}) {} + + SharedString acquire_substr(ByteCount from, ByteCount length = INT_MAX) const + { + return SharedString{StringView::substr(from, length), m_storage}; + } + SharedString acquire_substr(CharCount from, CharCount length = INT_MAX) const + { + return SharedString{StringView::substr(from, length), m_storage}; + } + +private: + SharedString(StringView str, std::shared_ptr storage) + : StringView{str}, m_storage(std::move(storage)) {} + + friend class StringRegistry; + std::shared_ptr m_storage; +}; + +inline size_t hash_value(const SharedString& str) +{ + return hash_data(str.data(), (int)str.length()); +} + +class StringRegistry : public Singleton +{ +public: + void debug_stats() const; + SharedString intern(StringView str); + void purge_unused(); + +private: + UnorderedMap, MemoryDomain::SharedString> m_strings; +}; + +inline SharedString intern(StringView str) +{ + return StringRegistry::instance().intern(str); +} + +} + +#endif // shared_string_hh_INCLUDED diff --git a/src/word_db.cc b/src/word_db.cc index a2dce1cf..2043cbcd 100644 --- a/src/word_db.cc +++ b/src/word_db.cc @@ -26,7 +26,7 @@ UsedLetters used_letters(StringView str) return res; } -static WordDB::WordList get_words(const InternedString& content) +static WordDB::WordList get_words(const SharedString& content) { WordDB::WordList res; using Iterator = utf8::iterator; diff --git a/src/word_db.hh b/src/word_db.hh index 9ead39b2..fbb32371 100644 --- a/src/word_db.hh +++ b/src/word_db.hh @@ -2,7 +2,7 @@ #define word_db_hh_INCLUDED #include "buffer.hh" -#include "interned_string.hh" +#include "shared_string.hh" #include "unordered_map.hh" #include "vector.hh" @@ -22,7 +22,7 @@ public: WordDB(const WordDB&) = delete; WordDB(WordDB&&) = default; - using WordList = Vector; + using WordList = Vector; template WordList find_matching(StringView str, MatchFunc match) { @@ -49,7 +49,7 @@ private: UsedLetters letters; int refcount; }; - using WordToInfo = UnorderedMap; + using WordToInfo = UnorderedMap; using LineToWords = Vector; safe_ptr m_buffer;