Replace InternedStrings with SharedString, shared_ptr based

This commit is contained in:
Maxime Coste 2015-01-15 13:54:38 +00:00
parent 2a878d51fd
commit 76d806e98d
11 changed files with 138 additions and 238 deletions

View File

@ -6,7 +6,7 @@
#include "containers.hh" #include "containers.hh"
#include "context.hh" #include "context.hh"
#include "file.hh" #include "file.hh"
#include "interned_string.hh" #include "shared_string.hh"
#include "utils.hh" #include "utils.hh"
#include "window.hh" #include "window.hh"
@ -146,9 +146,9 @@ struct Buffer::Modification
Type type; Type type;
ByteCoord coord; ByteCoord coord;
InternedString content; SharedString content;
Modification(Type type, ByteCoord coord, InternedString content) Modification(Type type, ByteCoord coord, SharedString content)
: type(type), coord(coord), content(std::move(content)) {} : type(type), coord(coord), content(std::move(content)) {}
Modification inverse() const Modification inverse() const
@ -284,7 +284,7 @@ ByteCoord Buffer::do_insert(ByteCoord pos, StringView content)
StringView prefix = m_lines[pos.line].substr(0, pos.column); StringView prefix = m_lines[pos.line].substr(0, pos.column);
StringView suffix = m_lines[pos.line].substr(pos.column); StringView suffix = m_lines[pos.line].substr(pos.column);
Vector<InternedString> new_lines; Vector<SharedString> new_lines;
ByteCount start = 0; ByteCount start = 0;
for (ByteCount i = 0; i < content.length(); ++i) for (ByteCount i = 0; i < content.length(); ++i)
@ -332,7 +332,7 @@ ByteCoord Buffer::do_erase(ByteCoord begin, ByteCoord end)
if (new_line.length() != 0) if (new_line.length() != 0)
{ {
m_lines.erase(m_lines.begin() + (int)begin.line, m_lines.begin() + (int)end.line); m_lines.erase(m_lines.begin() + (int)begin.line, m_lines.begin() + (int)end.line);
m_lines[begin.line] = InternedString(new_line); m_lines[begin.line] = SharedString(new_line);
next = begin; next = begin;
} }
else else
@ -381,11 +381,11 @@ BufferIterator Buffer::insert(const BufferIterator& pos, StringView content)
if (content.empty()) if (content.empty())
return pos; return pos;
InternedString real_content; SharedString real_content;
if (pos == end() and content.back() != '\n') if (pos == end() and content.back() != '\n')
real_content = InternedString(content + "\n"); real_content = intern(content + "\n");
else else
real_content = content; real_content = intern(content);
// for undo and redo purpose it is better to use one past last line rather // for undo and redo purpose it is better to use one past last line rather
// than one past last char coord. // than one past last char coord.
@ -406,7 +406,7 @@ BufferIterator Buffer::erase(BufferIterator begin, BufferIterator end)
if (not (m_flags & Flags::NoUndo)) if (not (m_flags & Flags::NoUndo))
m_current_undo_group.emplace_back(Modification::Erase, begin.coord(), m_current_undo_group.emplace_back(Modification::Erase, begin.coord(),
InternedString(string(begin.coord(), end.coord()))); intern(string(begin.coord(), end.coord())));
return {*this, do_erase(begin.coord(), end.coord())}; return {*this, do_erase(begin.coord(), end.coord())};
} }

View File

@ -5,7 +5,7 @@
#include "flags.hh" #include "flags.hh"
#include "safe_ptr.hh" #include "safe_ptr.hh"
#include "scope.hh" #include "scope.hh"
#include "interned_string.hh" #include "shared_string.hh"
#include "value.hh" #include "value.hh"
#include "vector.hh" #include "vector.hh"
@ -121,7 +121,7 @@ public:
BufferIterator end() const; BufferIterator end() const;
LineCount line_count() const; LineCount line_count() const;
const InternedString& operator[](LineCount line) const const SharedString& operator[](LineCount line) const
{ return m_lines[line]; } { return m_lines[line]; }
// returns an iterator at given coordinates. clamp line_and_column // returns an iterator at given coordinates. clamp line_and_column
@ -166,15 +166,15 @@ private:
void on_option_changed(const Option& option) override; void on_option_changed(const Option& option) override;
using LineListBase = Vector<InternedString, MemoryDomain::BufferContent>; using LineListBase = Vector<SharedString, MemoryDomain::BufferContent>;
struct LineList : LineListBase struct LineList : LineListBase
{ {
[[gnu::always_inline]] [[gnu::always_inline]]
InternedString& operator[](LineCount line) SharedString& operator[](LineCount line)
{ return LineListBase::operator[]((int)line); } { return LineListBase::operator[]((int)line); }
[[gnu::always_inline]] [[gnu::always_inline]]
const InternedString& operator[](LineCount line) const const SharedString& operator[](LineCount line) const
{ return LineListBase::operator[]((int)line); } { return LineListBase::operator[]((int)line); }
}; };
LineList m_lines; LineList m_lines;

View File

@ -805,7 +805,7 @@ const CommandDesc debug_cmd = {
PerArgumentCommandCompleter({ PerArgumentCommandCompleter({
[](const Context& context, CompletionFlags flags, [](const Context& context, CompletionFlags flags,
const String& prefix, ByteCount cursor_pos) -> Completions { const String& prefix, ByteCount cursor_pos) -> Completions {
auto c = {"info", "buffers", "options", "memory", "interned-strings"}; auto c = {"info", "buffers", "options", "memory", "shared-strings"};
return { 0_byte, cursor_pos, complete(prefix, cursor_pos, c) }; return { 0_byte, cursor_pos, complete(prefix, cursor_pos, c) };
} }), } }),
[](const ParametersParser& parser, Context& context) [](const ParametersParser& parser, Context& context)
@ -842,7 +842,7 @@ const CommandDesc debug_cmd = {
write_debug(" Malloced: " + to_string(mallinfo().uordblks)); write_debug(" Malloced: " + to_string(mallinfo().uordblks));
#endif #endif
} }
else if (parser[0] == "interned-strings") else if (parser[0] == "shared-strings")
{ {
StringRegistry::instance().debug_stats(); StringRegistry::instance().debug_stats();
} }

View File

@ -1,78 +0,0 @@
#include "interned_string.hh"
#include "debug.hh"
namespace Kakoune
{
void StringRegistry::debug_stats() const
{
write_debug("Interned Strings stats:");
write_debug(" slots: " + to_string(m_storage.size()) + " allocated, " + to_string(m_free_slots.size()) + " free");
size_t total_refcount = 0;
size_t total_size = 0;
size_t count = 0;
for (auto& st : m_storage)
{
if (st.refcount == 0)
continue;
total_refcount += st.refcount;
total_size += st.data.size();
++count;
}
write_debug(" data size: " + to_string(total_size) + ", mean: " + to_string((float)total_size/count));
write_debug(" refcounts: " + to_string(total_refcount) + ", mean: " + to_string((float)total_refcount/count));
}
InternedString StringRegistry::acquire(StringView str)
{
auto it = m_slot_map.find(str);
if (it == m_slot_map.end())
{
Slot slot;
if (not m_free_slots.empty())
{
slot = m_free_slots.back();
m_free_slots.pop_back();
kak_assert(m_storage[slot].refcount == 0);
m_storage[slot] = DataAndRefCount{{str.begin(), str.end()}, 1};
}
else
{
slot = m_storage.size();
m_storage.push_back({{str.begin(), str.end()}, 1});
}
// Create a new string view that point to the storage data
StringView storage_view{m_storage[slot].data.data(), (int)m_storage[slot].data.size()};
m_slot_map[storage_view] = slot;
return InternedString{storage_view, slot};
}
Slot slot = it->second;
auto& data = m_storage[slot];
++data.refcount;
return {{data.data.data(), (int)data.data.size()}, slot};
}
void StringRegistry::acquire(Slot slot)
{
kak_assert(slot < m_storage.size());
kak_assert(m_storage[slot].refcount > 0);
++m_storage[slot].refcount;
}
void StringRegistry::release(Slot slot) noexcept
{
kak_assert(m_storage[slot].refcount > 0);
if (--m_storage[slot].refcount == 0)
{
m_free_slots.push_back(slot);
auto& data = m_storage[slot].data;
auto it = m_slot_map.find(StringView{data.data(), (int)data.size()});
kak_assert(it != m_slot_map.end());
m_slot_map.erase(it);
data = Vector<char, MemoryDomain::InternedString>{};
}
}
}

View File

@ -1,137 +0,0 @@
#ifndef interned_string_hh_INCLUDED
#define interned_string_hh_INCLUDED
#include "string.hh"
#include "utils.hh"
#include "unordered_map.hh"
#include "vector.hh"
#include <cstddef>
namespace Kakoune
{
class InternedString;
class StringRegistry : public Singleton<StringRegistry>
{
public:
void debug_stats() const;
private:
friend class InternedString;
using Slot = uint32_t;
InternedString acquire(StringView str);
void acquire(Slot slot);
void release(Slot slot) noexcept;
UnorderedMap<StringView, Slot, MemoryDomain::InternedString> m_slot_map;
Vector<Slot, MemoryDomain::InternedString> m_free_slots;
struct DataAndRefCount
{
Vector<char, MemoryDomain::InternedString> data;
int refcount;
};
Vector<DataAndRefCount, MemoryDomain::InternedString> m_storage;
};
class InternedString : public StringView
{
public:
InternedString() = default;
InternedString(const InternedString& str) : InternedString(str, str.m_slot)
{
if (m_slot != -1)
StringRegistry::instance().acquire(m_slot);
}
InternedString(InternedString&& str) noexcept : StringView(str)
{
m_slot = str.m_slot;
str.m_slot = -1;
}
InternedString(const char* str) : StringView() { acquire_ifn(str); }
InternedString(StringView str) : StringView() { acquire_ifn(str); }
InternedString& operator=(const InternedString& str)
{
if (str.data() == data() and str.length() == length())
return *this;
static_cast<StringView&>(*this) = str;
if (str.m_slot != m_slot)
{
release_ifn();
m_slot = str.m_slot;
if (str.m_slot != -1)
StringRegistry::instance().acquire(str.m_slot);
}
return *this;
}
InternedString& operator=(InternedString&& str) noexcept
{
release_ifn();
static_cast<StringView&>(*this) = str;
m_slot = str.m_slot;
str.m_slot = -1;
return *this;
}
~InternedString()
{
release_ifn();
}
InternedString acquire_substr(ByteCount from, ByteCount length = INT_MAX) const
{
if (m_slot == -1)
return InternedString{};
StringRegistry::instance().acquire(m_slot);
return InternedString{StringView::substr(from, length), m_slot};
}
InternedString acquire_substr(CharCount from, CharCount length = INT_MAX) const
{
if (m_slot == -1)
return InternedString{};
StringRegistry::instance().acquire(m_slot);
return InternedString{StringView::substr(from, length), m_slot};
}
private:
friend class StringRegistry;
InternedString(StringView str, StringRegistry::Slot slot)
: StringView(str), m_slot(slot) {}
void acquire_ifn(StringView str)
{
if (str.empty())
{
static_cast<StringView&>(*this) = StringView{};
m_slot = -1;
}
else
*this = StringRegistry::instance().acquire(str);
}
void release_ifn() noexcept
{
if (m_slot != -1)
StringRegistry::instance().release(m_slot);
}
StringRegistry::Slot m_slot = -1;
};
inline size_t hash_value(const Kakoune::InternedString& str)
{
return hash_data(str.data(), (int)str.length());
}
}
#endif // interned_string_hh_INCLUDED

View File

@ -13,7 +13,7 @@
#include "file.hh" #include "file.hh"
#include "highlighters.hh" #include "highlighters.hh"
#include "insert_completer.hh" #include "insert_completer.hh"
#include "interned_string.hh" #include "shared_string.hh"
#include "ncurses_ui.hh" #include "ncurses_ui.hh"
#include "parameters_parser.hh" #include "parameters_parser.hh"
#include "register_manager.hh" #include "register_manager.hh"
@ -405,6 +405,7 @@ int run_server(StringView session, StringView init_command,
client_manager.handle_pending_inputs(); client_manager.handle_pending_inputs();
client_manager.clear_mode_trashes(); client_manager.clear_mode_trashes();
buffer_manager.clear_buffer_trash(); buffer_manager.clear_buffer_trash();
string_registry.purge_unused();
} }
{ {

View File

@ -15,7 +15,7 @@ enum class MemoryDomain
{ {
Undefined, Undefined,
String, String,
InternedString, SharedString,
BufferContent, BufferContent,
BufferMeta, BufferMeta,
Options, Options,
@ -39,7 +39,7 @@ inline const char* domain_name(MemoryDomain domain)
{ {
case MemoryDomain::Undefined: return "Undefined"; case MemoryDomain::Undefined: return "Undefined";
case MemoryDomain::String: return "String"; case MemoryDomain::String: return "String";
case MemoryDomain::InternedString: return "InternedString"; case MemoryDomain::SharedString: return "SharedString";
case MemoryDomain::BufferContent: return "BufferContent"; case MemoryDomain::BufferContent: return "BufferContent";
case MemoryDomain::BufferMeta: return "BufferMeta"; case MemoryDomain::BufferMeta: return "BufferMeta";
case MemoryDomain::Options: return "Options"; case MemoryDomain::Options: return "Options";

45
src/shared_string.cc Normal file
View File

@ -0,0 +1,45 @@
#include "shared_string.hh"
#include "debug.hh"
namespace Kakoune
{
SharedString StringRegistry::intern(StringView str)
{
auto it = m_strings.find(str);
if (it == m_strings.end())
{
SharedString shared_str = str;
it = m_strings.emplace(StringView{shared_str}, shared_str.m_storage).first;
}
return {*it->second, it->second};
}
void StringRegistry::purge_unused()
{
for (auto it = m_strings.begin(); it != m_strings.end(); )
{
if (it->second.unique())
it = m_strings.erase(it);
else
++it;
}
}
void StringRegistry::debug_stats() const
{
write_debug("Shared Strings stats:");
size_t total_refcount = 0;
size_t total_size = 0;
size_t count = m_strings.size();
for (auto& st : m_strings)
{
total_refcount += st.second.use_count() - 1;
total_size += (int)st.second->length();
}
write_debug(" data size: " + to_string(total_size) + ", mean: " + to_string((float)total_size/count));
write_debug(" refcounts: " + to_string(total_refcount) + ", mean: " + to_string((float)total_refcount/count));
}
}

69
src/shared_string.hh Normal file
View File

@ -0,0 +1,69 @@
#ifndef shared_string_hh_INCLUDED
#define shared_string_hh_INCLUDED
#include "string.hh"
#include "utils.hh"
#include "unordered_map.hh"
#include <memory>
namespace Kakoune
{
class SharedString : public StringView
{
public:
using Storage = std::basic_string<char, std::char_traits<char>,
Allocator<char, MemoryDomain::SharedString>>;
SharedString() = default;
SharedString(StringView str)
{
if (not str.empty())
{
m_storage = std::make_shared<Storage>(str.begin(), str.end());
StringView::operator=(*m_storage);
}
}
SharedString(const char* str) : SharedString(StringView{str}) {}
SharedString acquire_substr(ByteCount from, ByteCount length = INT_MAX) const
{
return SharedString{StringView::substr(from, length), m_storage};
}
SharedString acquire_substr(CharCount from, CharCount length = INT_MAX) const
{
return SharedString{StringView::substr(from, length), m_storage};
}
private:
SharedString(StringView str, std::shared_ptr<Storage> storage)
: StringView{str}, m_storage(std::move(storage)) {}
friend class StringRegistry;
std::shared_ptr<Storage> m_storage;
};
inline size_t hash_value(const SharedString& str)
{
return hash_data(str.data(), (int)str.length());
}
class StringRegistry : public Singleton<StringRegistry>
{
public:
void debug_stats() const;
SharedString intern(StringView str);
void purge_unused();
private:
UnorderedMap<StringView, std::shared_ptr<SharedString::Storage>, MemoryDomain::SharedString> m_strings;
};
inline SharedString intern(StringView str)
{
return StringRegistry::instance().intern(str);
}
}
#endif // shared_string_hh_INCLUDED

View File

@ -26,7 +26,7 @@ UsedLetters used_letters(StringView str)
return res; return res;
} }
static WordDB::WordList get_words(const InternedString& content) static WordDB::WordList get_words(const SharedString& content)
{ {
WordDB::WordList res; WordDB::WordList res;
using Iterator = utf8::iterator<const char*, utf8::InvalidPolicy::Pass>; using Iterator = utf8::iterator<const char*, utf8::InvalidPolicy::Pass>;

View File

@ -2,7 +2,7 @@
#define word_db_hh_INCLUDED #define word_db_hh_INCLUDED
#include "buffer.hh" #include "buffer.hh"
#include "interned_string.hh" #include "shared_string.hh"
#include "unordered_map.hh" #include "unordered_map.hh"
#include "vector.hh" #include "vector.hh"
@ -22,7 +22,7 @@ public:
WordDB(const WordDB&) = delete; WordDB(const WordDB&) = delete;
WordDB(WordDB&&) = default; WordDB(WordDB&&) = default;
using WordList = Vector<InternedString, MemoryDomain::WordDB>; using WordList = Vector<SharedString, MemoryDomain::WordDB>;
template<typename MatchFunc> template<typename MatchFunc>
WordList find_matching(StringView str, MatchFunc match) WordList find_matching(StringView str, MatchFunc match)
{ {
@ -49,7 +49,7 @@ private:
UsedLetters letters; UsedLetters letters;
int refcount; int refcount;
}; };
using WordToInfo = UnorderedMap<InternedString, WordInfo, MemoryDomain::WordDB>; using WordToInfo = UnorderedMap<SharedString, WordInfo, MemoryDomain::WordDB>;
using LineToWords = Vector<WordList, MemoryDomain::WordDB>; using LineToWords = Vector<WordList, MemoryDomain::WordDB>;
safe_ptr<const Buffer> m_buffer; safe_ptr<const Buffer> m_buffer;