Improve code-generation for Strings

Make String::Data use trivial copy of the short/long union to avoid
unnecessary branching there, inline release() as it can be elided by
the compiler on moved-from Strings.
main
Maxime Coste 2021-07-20 19:53:06 +10:00
parent c643cd4467
commit a4dd89f214
3 changed files with 59 additions and 65 deletions

View File

@ -87,12 +87,12 @@ class String:
def to_string(self):
data = self.val["m_data"]
if (data["s"]["size"] & 1) != 1:
ptr = data["l"]["ptr"]
len = data["l"]["size"]
if (data["u"]["s"]["size"] & 1) != 1:
ptr = data["u"]["l"]["ptr"]
len = data["u"]["l"]["size"]
else:
ptr = data["s"]["string"]
len = data["s"]["size"] >> 1
ptr = data["u"]["s"]["string"]
len = data["u"]["s"]["size"] >> 1
return "\"%s\"" % (ptr.string("utf-8", "ignore", len))

View File

@ -14,29 +14,18 @@ String::Data::Data(const char* data, size_t size, size_t capacity)
++capacity;
kak_assert(capacity < Long::max_capacity);
l.ptr = Alloc{}.allocate(capacity+1);
l.size = size;
l.capacity = capacity;
u.l.ptr = Alloc{}.allocate(capacity+1);
u.l.size = size;
u.l.capacity = capacity;
if (data != nullptr)
memcpy(l.ptr, data, size);
l.ptr[size] = 0;
memcpy(u.l.ptr, data, size);
u.l.ptr[size] = 0;
}
else
set_short(data, size);
}
String::Data::Data(Data&& other) noexcept
{
if (other.is_long())
{
l = other.l;
other.set_empty();
}
else
s = other.s;
}
String::Data& String::Data::operator=(const Data& other)
{
if (&other == this)
@ -59,11 +48,11 @@ String::Data& String::Data::operator=(Data&& other) noexcept
if (other.is_long())
{
l = other.l;
u.l = other.u.l;
other.set_empty();
}
else
s = other.s;
u.s = other.u.s;
return *this;
}
@ -75,7 +64,7 @@ void String::Data::reserve(size_t new_capacity)
return;
if (is_long())
new_capacity = std::max(l.capacity * 2, new_capacity);
new_capacity = std::max(u.l.capacity * 2, new_capacity);
if (new_capacity & 1)
++new_capacity;
@ -85,12 +74,12 @@ void String::Data::reserve(size_t new_capacity)
if (copy)
{
memcpy(new_ptr, data(), size()+1);
l.size = size();
u.l.size = size();
}
release();
l.ptr = new_ptr;
l.capacity = new_capacity;
u.l.ptr = new_ptr;
u.l.capacity = new_capacity;
}
template void String::Data::reserve<true>(size_t);
@ -121,12 +110,6 @@ void String::Data::clear()
set_empty();
}
void String::Data::release()
{
if (is_long() and l.capacity != 0)
Alloc{}.deallocate(l.ptr, l.capacity+1);
}
void String::resize(ByteCount size, char c)
{
const size_t target_size = (size_t)size;
@ -146,17 +129,17 @@ void String::resize(ByteCount size, char c)
void String::Data::set_size(size_t size)
{
if (is_long())
l.size = size;
u.l.size = size;
else
s.size = (size << 1) | 1;
u.s.size = (size << 1) | 1;
}
void String::Data::set_short(const char* data, size_t size)
{
s.size = (size << 1) | 1;
u.s.size = (size << 1) | 1;
if (data != nullptr)
memcpy(s.string, data, size);
s.string[size] = 0;
memcpy(u.s.string, data, size);
u.s.string[size] = 0;
}
const String String::ms_empty;

View File

@ -156,45 +156,28 @@ public:
// capacity must be pair, on little endian systems that means the allocated
// capacity cannot use its most significant byte, so we effectively limit
// capacity to 2^24 on 32bit arch, and 2^60 on 64.
union Data
struct Data
{
using Alloc = Allocator<char, MemoryDomain::String>;
struct Long
{
static constexpr size_t max_capacity =
(size_t)1 << 8 * (sizeof(size_t) - 1);
char* ptr;
size_t size;
size_t capacity;
} l;
struct Short
{
static constexpr size_t capacity = sizeof(Long) - 2;
char string[capacity+1];
unsigned char size;
} s;
Data() { set_empty(); }
Data(NoCopy, const char* data, size_t size) : l{const_cast<char*>(data), size, 0} {}
Data(NoCopy, const char* data, size_t size) : u{Long{const_cast<char*>(data), size, 0}} {}
Data(const char* data, size_t size, size_t capacity);
Data(const char* data, size_t size) : Data(data, size, size) {}
Data(const Data& other) : Data{other.data(), other.size()} {}
~Data() { release(); }
Data(Data&& other) noexcept;
Data(Data&& other) noexcept : u{other.u} { other.set_empty(); }
Data& operator=(const Data& other);
Data& operator=(Data&& other) noexcept;
bool is_long() const { return (s.size & 1) == 0; }
size_t size() const { return is_long() ? l.size : (s.size >> 1); }
size_t capacity() const { return is_long() ? l.capacity : Short::capacity; }
bool is_long() const { return (u.s.size & 1) == 0; }
size_t size() const { return is_long() ? u.l.size : (u.s.size >> 1); }
size_t capacity() const { return is_long() ? u.l.capacity : Short::capacity; }
const char* data() const { return is_long() ? l.ptr : s.string; }
char* data() { return is_long() ? l.ptr : s.string; }
const char* data() const { return is_long() ? u.l.ptr : u.s.string; }
char* data() { return is_long() ? u.l.ptr : u.s.string; }
template<bool copy = true>
void reserve(size_t new_capacity);
@ -204,8 +187,36 @@ public:
void clear();
private:
void release();
void set_empty() { s.size = 1; s.string[0] = 0; }
struct Long
{
static constexpr size_t max_capacity =
(size_t)1 << 8 * (sizeof(size_t) - 1);
char* ptr;
size_t size;
size_t capacity;
};
struct Short
{
static constexpr size_t capacity = sizeof(Long) - 2;
char string[capacity+1];
unsigned char size;
};
union
{
Long l;
Short s;
} u;
void release()
{
if (is_long() and u.l.capacity != 0)
Alloc{}.deallocate(u.l.ptr, u.l.capacity+1);
}
void set_empty() { u.s.size = 1; u.s.string[0] = 0; }
void set_short(const char* data, size_t size);
};