2012-04-14 03:17:09 +02:00
|
|
|
#ifndef string_hh_INCLUDED
|
|
|
|
#define string_hh_INCLUDED
|
|
|
|
|
2017-10-09 16:12:42 +02:00
|
|
|
#include "memory.hh"
|
2015-05-01 19:47:22 +02:00
|
|
|
#include "hash.hh"
|
2012-08-23 23:56:35 +02:00
|
|
|
#include "units.hh"
|
2012-10-11 00:41:48 +02:00
|
|
|
#include "utf8.hh"
|
2012-04-14 03:17:09 +02:00
|
|
|
|
2014-10-13 14:12:33 +02:00
|
|
|
#include <climits>
|
2013-04-09 20:05:40 +02:00
|
|
|
|
2012-04-14 03:17:09 +02:00
|
|
|
namespace Kakoune
|
|
|
|
{
|
|
|
|
|
2014-05-11 13:44:51 +02:00
|
|
|
class StringView;
|
|
|
|
|
2015-03-10 20:33:46 +01:00
|
|
|
template<typename Type, typename CharType>
|
|
|
|
class StringOps
|
2015-02-11 00:09:30 +01:00
|
|
|
{
|
2015-03-10 20:33:46 +01:00
|
|
|
public:
|
|
|
|
using value_type = CharType;
|
|
|
|
|
2017-10-17 16:45:17 +02:00
|
|
|
friend constexpr size_t hash_value(const Type& str)
|
2015-03-10 20:33:46 +01:00
|
|
|
{
|
|
|
|
return hash_data(str.data(), (int)str.length());
|
|
|
|
}
|
|
|
|
|
|
|
|
using iterator = CharType*;
|
|
|
|
using const_iterator = const CharType*;
|
|
|
|
using reverse_iterator = std::reverse_iterator<iterator>;
|
|
|
|
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
|
|
|
|
|
|
|
[[gnu::always_inline]]
|
|
|
|
iterator begin() { return type().data(); }
|
|
|
|
|
|
|
|
[[gnu::always_inline]]
|
|
|
|
const_iterator begin() const { return type().data(); }
|
|
|
|
|
|
|
|
[[gnu::always_inline]]
|
|
|
|
iterator end() { return type().data() + (int)type().length(); }
|
|
|
|
|
|
|
|
[[gnu::always_inline]]
|
|
|
|
const_iterator end() const { return type().data() + (int)type().length(); }
|
|
|
|
|
|
|
|
reverse_iterator rbegin() { return reverse_iterator{end()}; }
|
|
|
|
const_reverse_iterator rbegin() const { return const_reverse_iterator{end()}; }
|
|
|
|
|
|
|
|
reverse_iterator rend() { return reverse_iterator{begin()}; }
|
|
|
|
const_reverse_iterator rend() const { return const_reverse_iterator{begin()}; }
|
|
|
|
|
|
|
|
CharType& front() { return *type().data(); }
|
|
|
|
const CharType& front() const { return *type().data(); }
|
|
|
|
CharType& back() { return type().data()[(int)type().length() - 1]; }
|
|
|
|
const CharType& back() const { return type().data()[(int)type().length() - 1]; }
|
|
|
|
|
|
|
|
[[gnu::always_inline]]
|
|
|
|
CharType& operator[](ByteCount pos) { return type().data()[(int)pos]; }
|
|
|
|
|
|
|
|
[[gnu::always_inline]]
|
2015-03-11 20:41:44 +01:00
|
|
|
const CharType& operator[](ByteCount pos) const { return type().data()[(int)pos]; }
|
2015-02-11 00:09:30 +01:00
|
|
|
|
2015-03-10 20:33:46 +01:00
|
|
|
Codepoint operator[](CharCount pos) const
|
|
|
|
{ return utf8::codepoint(utf8::advance(begin(), end(), pos), end()); }
|
|
|
|
|
|
|
|
CharCount char_length() const { return utf8::distance(begin(), end()); }
|
2016-09-22 21:36:26 +02:00
|
|
|
ColumnCount column_length() const { return utf8::column_distance(begin(), end()); }
|
2015-03-10 20:33:46 +01:00
|
|
|
|
|
|
|
[[gnu::always_inline]]
|
|
|
|
bool empty() const { return type().length() == 0_byte; }
|
|
|
|
|
|
|
|
ByteCount byte_count_to(CharCount count) const
|
2016-09-22 21:36:26 +02:00
|
|
|
{ return utf8::advance(begin(), end(), count) - begin(); }
|
|
|
|
|
|
|
|
ByteCount byte_count_to(ColumnCount count) const
|
|
|
|
{ return utf8::advance(begin(), end(), count) - begin(); }
|
2015-03-10 20:33:46 +01:00
|
|
|
|
|
|
|
CharCount char_count_to(ByteCount count) const
|
|
|
|
{ return utf8::distance(begin(), begin() + (int)count); }
|
|
|
|
|
2016-09-22 21:36:26 +02:00
|
|
|
ColumnCount column_count_to(ByteCount count) const
|
|
|
|
{ return utf8::column_distance(begin(), begin() + (int)count); }
|
|
|
|
|
2015-03-10 20:33:46 +01:00
|
|
|
StringView substr(ByteCount from, ByteCount length = INT_MAX) const;
|
|
|
|
StringView substr(CharCount from, CharCount length = INT_MAX) const;
|
2016-09-22 21:36:26 +02:00
|
|
|
StringView substr(ColumnCount from, ColumnCount length = INT_MAX) const;
|
2015-03-10 20:33:46 +01:00
|
|
|
|
|
|
|
private:
|
2015-03-11 20:41:44 +01:00
|
|
|
[[gnu::always_inline]]
|
2015-03-10 20:33:46 +01:00
|
|
|
Type& type() { return *static_cast<Type*>(this); }
|
2015-03-11 20:41:44 +01:00
|
|
|
[[gnu::always_inline]]
|
2015-03-10 20:33:46 +01:00
|
|
|
const Type& type() const { return *static_cast<const Type*>(this); }
|
|
|
|
};
|
|
|
|
|
2016-02-05 00:52:06 +01:00
|
|
|
constexpr ByteCount strlen(const char* s)
|
|
|
|
{
|
2016-03-12 16:27:54 +01:00
|
|
|
int i = 0;
|
|
|
|
while (*s++ != 0)
|
|
|
|
++i;
|
|
|
|
return {i};
|
2016-02-05 00:52:06 +01:00
|
|
|
}
|
|
|
|
|
2015-03-10 20:33:46 +01:00
|
|
|
class String : public StringOps<String, char>
|
2012-04-14 03:23:20 +02:00
|
|
|
{
|
|
|
|
public:
|
2015-03-12 14:02:46 +01:00
|
|
|
String() {}
|
2016-04-22 22:01:54 +02:00
|
|
|
String(const char* content) : m_data(content, (size_t)strlen(content)) {}
|
|
|
|
String(const char* content, ByteCount len) : m_data(content, (size_t)len) {}
|
2013-11-14 01:12:15 +01:00
|
|
|
explicit String(Codepoint cp, CharCount count = 1)
|
|
|
|
{
|
2016-02-05 10:13:07 +01:00
|
|
|
reserve(utf8::codepoint_size(cp) * (int)count);
|
2013-11-14 01:12:15 +01:00
|
|
|
while (count-- > 0)
|
2015-03-10 20:33:46 +01:00
|
|
|
utf8::dump(std::back_inserter(*this), cp);
|
2013-11-14 01:12:15 +01:00
|
|
|
}
|
2016-09-22 21:36:26 +02:00
|
|
|
explicit String(Codepoint cp, ColumnCount count)
|
|
|
|
{
|
2016-12-14 14:53:30 +01:00
|
|
|
int cp_count = (int)(count / std::max(codepoint_width(cp), 1_col));
|
2016-09-22 21:36:26 +02:00
|
|
|
reserve(utf8::codepoint_size(cp) * cp_count);
|
|
|
|
while (cp_count-- > 0)
|
|
|
|
utf8::dump(std::back_inserter(*this), cp);
|
|
|
|
}
|
2016-02-05 00:52:06 +01:00
|
|
|
String(const char* begin, const char* end) : m_data(begin, end-begin) {}
|
2013-11-14 01:12:15 +01:00
|
|
|
|
2017-03-06 20:47:26 +01:00
|
|
|
explicit String(StringView str);
|
|
|
|
|
2019-03-19 12:00:57 +01:00
|
|
|
struct NoCopy{};
|
|
|
|
String(NoCopy, StringView str);
|
|
|
|
|
2021-08-21 08:42:08 +02:00
|
|
|
static String no_copy(StringView str);
|
|
|
|
|
2015-03-10 20:33:46 +01:00
|
|
|
[[gnu::always_inline]]
|
2016-02-05 10:27:22 +01:00
|
|
|
char* data() { return m_data.data(); }
|
2013-11-14 01:12:15 +01:00
|
|
|
|
2014-08-17 16:36:12 +02:00
|
|
|
[[gnu::always_inline]]
|
2015-03-10 20:33:46 +01:00
|
|
|
const char* data() const { return m_data.data(); }
|
|
|
|
|
2014-08-17 16:36:12 +02:00
|
|
|
[[gnu::always_inline]]
|
2016-02-05 10:27:22 +01:00
|
|
|
ByteCount length() const { return m_data.size(); }
|
2014-08-17 16:36:12 +02:00
|
|
|
|
|
|
|
[[gnu::always_inline]]
|
2016-02-05 10:27:22 +01:00
|
|
|
const char* c_str() const { return m_data.data(); }
|
2015-03-10 20:33:46 +01:00
|
|
|
|
2015-03-11 20:41:44 +01:00
|
|
|
[[gnu::always_inline]]
|
2016-04-22 22:01:54 +02:00
|
|
|
void append(const char* data, ByteCount count) { m_data.append(data, (size_t)count); }
|
2013-11-14 01:12:15 +01:00
|
|
|
|
2015-09-19 13:19:17 +02:00
|
|
|
void clear() { m_data.clear(); }
|
|
|
|
|
2016-02-05 10:27:22 +01:00
|
|
|
void push_back(char c) { m_data.append(&c, 1); }
|
2016-04-22 22:01:54 +02:00
|
|
|
void force_size(ByteCount size) { m_data.force_size((size_t)size); }
|
|
|
|
void reserve(ByteCount size) { m_data.reserve((size_t)size); }
|
2016-06-19 18:01:27 +02:00
|
|
|
void resize(ByteCount size, char c);
|
2013-11-14 01:12:15 +01:00
|
|
|
|
2015-11-25 22:07:41 +01:00
|
|
|
static const String ms_empty;
|
2016-08-06 10:05:50 +02:00
|
|
|
static constexpr const char* option_type_name = "str";
|
2015-11-25 22:07:41 +01:00
|
|
|
|
2016-09-28 20:03:26 +02:00
|
|
|
// String data storage using small string optimization.
|
|
|
|
//
|
|
|
|
// the LSB of the last byte is used to flag if we are using the small buffer
|
|
|
|
// or an allocated one. On big endian systems that means the allocated
|
|
|
|
// capacity must be pair, on little endian systems that means the allocated
|
|
|
|
// capacity cannot use its most significant byte, so we effectively limit
|
|
|
|
// capacity to 2^24 on 32bit arch, and 2^60 on 64.
|
2021-07-20 11:53:06 +02:00
|
|
|
struct Data
|
2016-02-05 10:27:22 +01:00
|
|
|
{
|
|
|
|
using Alloc = Allocator<char, MemoryDomain::String>;
|
|
|
|
|
|
|
|
Data() { set_empty(); }
|
2021-07-20 11:53:06 +02:00
|
|
|
Data(NoCopy, const char* data, size_t size) : u{Long{const_cast<char*>(data), size, 0}} {}
|
2021-07-09 09:03:22 +02:00
|
|
|
|
2016-02-05 10:27:22 +01:00
|
|
|
Data(const char* data, size_t size, size_t capacity);
|
|
|
|
Data(const char* data, size_t size) : Data(data, size, size) {}
|
|
|
|
Data(const Data& other) : Data{other.data(), other.size()} {}
|
|
|
|
|
|
|
|
~Data() { release(); }
|
2021-07-20 11:53:06 +02:00
|
|
|
Data(Data&& other) noexcept : u{other.u} { other.set_empty(); }
|
2016-02-05 10:27:22 +01:00
|
|
|
Data& operator=(const Data& other);
|
|
|
|
Data& operator=(Data&& other) noexcept;
|
|
|
|
|
2021-07-20 11:53:06 +02:00
|
|
|
bool is_long() const { return (u.s.size & 1) == 0; }
|
|
|
|
size_t size() const { return is_long() ? u.l.size : (u.s.size >> 1); }
|
|
|
|
size_t capacity() const { return is_long() ? u.l.capacity : Short::capacity; }
|
2016-02-05 10:27:22 +01:00
|
|
|
|
2021-07-20 11:53:06 +02:00
|
|
|
const char* data() const { return is_long() ? u.l.ptr : u.s.string; }
|
|
|
|
char* data() { return is_long() ? u.l.ptr : u.s.string; }
|
2016-02-05 10:27:22 +01:00
|
|
|
|
2018-04-05 00:52:33 +02:00
|
|
|
template<bool copy = true>
|
2016-02-05 10:27:22 +01:00
|
|
|
void reserve(size_t new_capacity);
|
2016-06-19 18:01:27 +02:00
|
|
|
void set_size(size_t size);
|
2016-02-05 10:27:22 +01:00
|
|
|
void force_size(size_t new_size);
|
|
|
|
void append(const char* str, size_t len);
|
|
|
|
void clear();
|
|
|
|
|
|
|
|
private:
|
2021-07-20 11:53:06 +02:00
|
|
|
struct Long
|
|
|
|
{
|
|
|
|
static constexpr size_t max_capacity =
|
|
|
|
(size_t)1 << 8 * (sizeof(size_t) - 1);
|
|
|
|
|
|
|
|
char* ptr;
|
|
|
|
size_t size;
|
|
|
|
size_t capacity;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Short
|
|
|
|
{
|
|
|
|
static constexpr size_t capacity = sizeof(Long) - 2;
|
|
|
|
char string[capacity+1];
|
|
|
|
unsigned char size;
|
|
|
|
};
|
|
|
|
|
|
|
|
union
|
|
|
|
{
|
|
|
|
Long l;
|
|
|
|
Short s;
|
|
|
|
} u;
|
|
|
|
|
|
|
|
void release()
|
|
|
|
{
|
|
|
|
if (is_long() and u.l.capacity != 0)
|
|
|
|
Alloc{}.deallocate(u.l.ptr, u.l.capacity+1);
|
|
|
|
}
|
|
|
|
|
|
|
|
void set_empty() { u.s.size = 1; u.s.string[0] = 0; }
|
2016-02-05 10:27:22 +01:00
|
|
|
void set_short(const char* data, size_t size);
|
|
|
|
};
|
|
|
|
|
2015-03-10 20:33:46 +01:00
|
|
|
private:
|
2016-02-05 10:27:22 +01:00
|
|
|
Data m_data;
|
2012-04-14 03:23:20 +02:00
|
|
|
};
|
|
|
|
|
2015-03-10 20:33:46 +01:00
|
|
|
class StringView : public StringOps<StringView, const char>
|
2014-04-18 14:45:33 +02:00
|
|
|
{
|
|
|
|
public:
|
2017-01-29 14:49:45 +01:00
|
|
|
StringView() = default;
|
2014-04-18 14:45:33 +02:00
|
|
|
constexpr StringView(const char* data, ByteCount length)
|
|
|
|
: m_data{data}, m_length{length} {}
|
2015-06-01 20:06:35 +02:00
|
|
|
constexpr StringView(const char* data) : m_data{data}, m_length{data ? strlen(data) : 0} {}
|
2014-04-18 14:45:33 +02:00
|
|
|
constexpr StringView(const char* begin, const char* end) : m_data{begin}, m_length{(int)(end - begin)} {}
|
2015-03-10 20:33:46 +01:00
|
|
|
StringView(const String& str) : m_data{str.data()}, m_length{(int)str.length()} {}
|
2014-11-04 14:31:15 +01:00
|
|
|
StringView(const char& c) : m_data(&c), m_length(1) {}
|
2015-03-30 14:33:46 +02:00
|
|
|
StringView(int c) = delete;
|
2016-07-27 10:08:08 +02:00
|
|
|
StringView(Codepoint c) = delete;
|
2014-04-18 14:45:33 +02:00
|
|
|
|
2014-08-17 16:36:12 +02:00
|
|
|
[[gnu::always_inline]]
|
2015-03-11 20:41:44 +01:00
|
|
|
constexpr const char* data() const { return m_data; }
|
2014-04-18 14:45:33 +02:00
|
|
|
|
2014-08-17 16:36:12 +02:00
|
|
|
[[gnu::always_inline]]
|
2015-03-11 20:41:44 +01:00
|
|
|
constexpr ByteCount length() const { return m_length; }
|
2014-04-18 14:45:33 +02:00
|
|
|
|
2016-02-05 00:52:06 +01:00
|
|
|
String str() const { return {m_data, m_length}; }
|
2014-04-18 14:45:33 +02:00
|
|
|
|
2014-04-20 13:16:32 +02:00
|
|
|
struct ZeroTerminatedString
|
|
|
|
{
|
|
|
|
ZeroTerminatedString(const char* begin, const char* end)
|
|
|
|
{
|
|
|
|
if (*end == '\0')
|
|
|
|
unowned = begin;
|
|
|
|
else
|
2016-02-05 10:27:22 +01:00
|
|
|
owned = String::Data(begin, end - begin);
|
2014-04-20 13:16:32 +02:00
|
|
|
}
|
2016-02-05 10:27:22 +01:00
|
|
|
operator const char*() const { return unowned ? unowned : owned.data(); }
|
2014-04-20 13:16:32 +02:00
|
|
|
|
|
|
|
private:
|
2016-02-05 10:27:22 +01:00
|
|
|
String::Data owned;
|
2014-04-20 13:16:32 +02:00
|
|
|
const char* unowned = nullptr;
|
|
|
|
};
|
2015-03-11 20:41:44 +01:00
|
|
|
ZeroTerminatedString zstr() const { return {begin(), end()}; }
|
2014-04-20 13:16:32 +02:00
|
|
|
|
2015-05-29 14:35:54 +02:00
|
|
|
private:
|
2017-01-29 14:49:45 +01:00
|
|
|
const char* m_data;
|
|
|
|
ByteCount m_length;
|
2014-04-18 14:45:33 +02:00
|
|
|
};
|
|
|
|
|
2017-01-29 14:49:45 +01:00
|
|
|
static_assert(std::is_trivial<StringView>::value, "");
|
|
|
|
|
2017-08-18 03:17:02 +02:00
|
|
|
template<> struct HashCompatible<String, StringView> : std::true_type {};
|
|
|
|
template<> struct HashCompatible<StringView, String> : std::true_type {};
|
2017-03-06 20:47:26 +01:00
|
|
|
|
|
|
|
inline String::String(StringView str) : String{str.begin(), str.length()} {}
|
2019-03-19 12:00:57 +01:00
|
|
|
inline String::String(NoCopy, StringView str) : m_data{NoCopy{}, str.begin(), (size_t)str.length()} {}
|
2021-08-21 08:42:08 +02:00
|
|
|
inline String String::no_copy(StringView str) { return {NoCopy{}, str}; }
|
2017-03-06 20:47:26 +01:00
|
|
|
|
2015-03-10 20:33:46 +01:00
|
|
|
template<typename Type, typename CharType>
|
|
|
|
inline StringView StringOps<Type, CharType>::substr(ByteCount from, ByteCount length) const
|
2014-04-18 14:45:33 +02:00
|
|
|
{
|
2014-04-20 13:03:57 +02:00
|
|
|
if (length < 0)
|
|
|
|
length = INT_MAX;
|
2016-09-27 00:24:04 +02:00
|
|
|
const auto str_len = type().length();
|
|
|
|
kak_assert(from >= 0 and from <= str_len);
|
|
|
|
return StringView{ type().data() + (int)from, std::min(str_len - from, length) };
|
2014-04-18 14:45:33 +02:00
|
|
|
}
|
|
|
|
|
2015-03-10 20:33:46 +01:00
|
|
|
template<typename Type, typename CharType>
|
|
|
|
inline StringView StringOps<Type, CharType>::substr(CharCount from, CharCount length) const
|
2014-04-18 14:45:33 +02:00
|
|
|
{
|
2014-04-20 13:03:57 +02:00
|
|
|
if (length < 0)
|
|
|
|
length = INT_MAX;
|
2016-09-22 21:36:26 +02:00
|
|
|
auto beg = utf8::advance(begin(), end(), from);
|
|
|
|
return StringView{ beg, utf8::advance(beg, end(), length) };
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename Type, typename CharType>
|
|
|
|
inline StringView StringOps<Type, CharType>::substr(ColumnCount from, ColumnCount length) const
|
|
|
|
{
|
|
|
|
if (length < 0)
|
|
|
|
length = INT_MAX;
|
|
|
|
auto beg = utf8::advance(begin(), end(), from);
|
2014-04-18 14:45:33 +02:00
|
|
|
return StringView{ beg, utf8::advance(beg, end(), length) };
|
|
|
|
}
|
|
|
|
|
|
|
|
inline String& operator+=(String& lhs, StringView rhs)
|
|
|
|
{
|
2015-03-10 20:33:46 +01:00
|
|
|
lhs.append(rhs.data(), rhs.length());
|
2014-04-18 14:45:33 +02:00
|
|
|
return lhs;
|
|
|
|
}
|
|
|
|
|
2014-12-08 14:59:29 +01:00
|
|
|
inline String operator+(StringView lhs, StringView rhs)
|
|
|
|
{
|
2015-03-10 20:33:46 +01:00
|
|
|
String res;
|
2016-11-29 00:53:50 +01:00
|
|
|
res.reserve(lhs.length() + rhs.length());
|
2015-03-11 20:41:44 +01:00
|
|
|
res.append(lhs.data(), lhs.length());
|
|
|
|
res.append(rhs.data(), rhs.length());
|
2014-04-18 14:45:33 +02:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2015-03-30 20:59:08 +02:00
|
|
|
[[gnu::always_inline]]
|
|
|
|
inline bool operator==(const StringView& lhs, const StringView& rhs)
|
|
|
|
{
|
|
|
|
return lhs.length() == rhs.length() and
|
|
|
|
std::equal(lhs.begin(), lhs.end(), rhs.begin());
|
|
|
|
}
|
|
|
|
|
|
|
|
[[gnu::always_inline]]
|
|
|
|
inline bool operator!=(const StringView& lhs, const StringView& rhs)
|
|
|
|
{ return not (lhs == rhs); }
|
|
|
|
|
|
|
|
inline bool operator<(const StringView& lhs, const StringView& rhs)
|
|
|
|
{
|
|
|
|
return std::lexicographical_compare(lhs.begin(), lhs.end(),
|
|
|
|
rhs.begin(), rhs.end());
|
|
|
|
}
|
|
|
|
|
2016-11-29 00:53:50 +01:00
|
|
|
inline String operator"" _str(const char* str, size_t)
|
|
|
|
{
|
|
|
|
return String(str);
|
|
|
|
}
|
|
|
|
|
2017-03-06 20:47:26 +01:00
|
|
|
inline StringView operator"" _sv(const char* str, size_t)
|
|
|
|
{
|
|
|
|
return StringView{str};
|
|
|
|
}
|
|
|
|
|
2012-04-14 03:23:20 +02:00
|
|
|
}
|
2012-04-14 03:17:09 +02:00
|
|
|
|
|
|
|
#endif // string_hh_INCLUDED
|