Move all non-core string code to string_utils.{hh,cc}
This commit is contained in:
parent
d1b9c24afc
commit
6ada6e6d77
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "exception.hh"
|
||||
#include "ranges.hh"
|
||||
#include "string_utils.hh"
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "user_interface.hh"
|
||||
#include "window.hh"
|
||||
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "exception.hh"
|
||||
#include "ranges.hh"
|
||||
#include "string_utils.hh"
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "unicode.hh"
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstring>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "highlighter_group.hh"
|
||||
|
||||
#include "ranges.hh"
|
||||
#include "string_utils.hh"
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "file.hh"
|
||||
#include "keys.hh"
|
||||
#include "ranges.hh"
|
||||
#include "string_utils.hh"
|
||||
#include "unit_tests.hh"
|
||||
#include "value.hh"
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "unit_tests.hh"
|
||||
#include "utf8_iterator.hh"
|
||||
#include "utils.hh"
|
||||
#include "string_utils.hh"
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "event_manager.hh"
|
||||
#include "keys.hh"
|
||||
#include "ranges.hh"
|
||||
#include "string_utils.hh"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "ranges.hh"
|
||||
#include "utils.hh"
|
||||
#include "vector.hh"
|
||||
#include "string_utils.hh"
|
||||
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "array_view.hh"
|
||||
#include "optional.hh"
|
||||
#include "string.hh"
|
||||
#include "string_utils.hh"
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "flags.hh"
|
||||
#include "unit_tests.hh"
|
||||
#include "utf8_iterator.hh"
|
||||
#include "optional.hh"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#define regex_hh_INCLUDED
|
||||
|
||||
#include "string.hh"
|
||||
#include "string_utils.hh"
|
||||
#include "exception.hh"
|
||||
#include "utf8_iterator.hh"
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "assert.hh"
|
||||
#include "hash_map.hh"
|
||||
#include "string_utils.hh"
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include "exception.hh"
|
||||
#include "utils.hh"
|
||||
#include "vector.hh"
|
||||
#include "optional.hh"
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#include "shared_string.hh"
|
||||
#include "buffer_utils.hh"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
||||
|
|
447
src/string.cc
447
src/string.cc
|
@ -1,11 +1,7 @@
|
|||
#include "string.hh"
|
||||
|
||||
#include "exception.hh"
|
||||
#include "ranges.hh"
|
||||
#include "unit_tests.hh"
|
||||
#include "utf8_iterator.hh"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
@ -164,445 +160,4 @@ void String::Data::set_short(const char* data, size_t size)
|
|||
|
||||
const String String::ms_empty;
|
||||
|
||||
Vector<String> split(StringView str, char separator, char escape)
|
||||
{
|
||||
Vector<String> res;
|
||||
auto it = str.begin();
|
||||
auto start = it;
|
||||
while (it != str.end())
|
||||
{
|
||||
res.emplace_back();
|
||||
String& element = res.back();
|
||||
while (it != str.end())
|
||||
{
|
||||
auto c = *it;
|
||||
if (c == escape and it + 1 != str.end() and *(it+1) == separator)
|
||||
{
|
||||
element += StringView{start, it+1};
|
||||
element.back() = separator;
|
||||
it += 2;
|
||||
start = it;
|
||||
}
|
||||
else if (c == separator)
|
||||
{
|
||||
element += StringView{start, it};
|
||||
++it;
|
||||
start = it;
|
||||
break;
|
||||
}
|
||||
else
|
||||
++it;
|
||||
}
|
||||
}
|
||||
if (start != str.end())
|
||||
res.back() += StringView{start, str.end()};
|
||||
return res;
|
||||
}
|
||||
|
||||
Vector<StringView> split(StringView str, char separator)
|
||||
{
|
||||
Vector<StringView> res;
|
||||
if (str.empty())
|
||||
return res;
|
||||
|
||||
auto beg = str.begin();
|
||||
for (auto it = beg; it != str.end(); ++it)
|
||||
{
|
||||
if (*it == separator)
|
||||
{
|
||||
res.emplace_back(beg, it);
|
||||
beg = it + 1;
|
||||
}
|
||||
}
|
||||
res.emplace_back(beg, str.end());
|
||||
return res;
|
||||
}
|
||||
|
||||
StringView trim_whitespaces(StringView str)
|
||||
{
|
||||
auto beg = str.begin(), end = str.end();
|
||||
while (beg != end and is_blank(*beg))
|
||||
++beg;
|
||||
while (beg != end and is_blank(*(end-1)))
|
||||
--end;
|
||||
return {beg, end};
|
||||
}
|
||||
|
||||
|
||||
String escape(StringView str, StringView characters, char escape)
|
||||
{
|
||||
String res;
|
||||
res.reserve(str.length());
|
||||
auto cbeg = characters.begin(), cend = characters.end();
|
||||
for (auto it = str.begin(), end = str.end(); it != end; )
|
||||
{
|
||||
auto next = std::find_first_of(it, end, cbeg, cend);
|
||||
if (next != end)
|
||||
{
|
||||
res += StringView{it, next+1};
|
||||
res.back() = escape;
|
||||
res += *next;
|
||||
it = next+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
res += StringView{it, next};
|
||||
break;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
String unescape(StringView str, StringView characters, char escape)
|
||||
{
|
||||
String res;
|
||||
res.reserve(str.length());
|
||||
for (auto it = str.begin(), end = str.end(); it != end; )
|
||||
{
|
||||
auto next = std::find(it, end, escape);
|
||||
if (next != end and next+1 != end and contains(characters, *(next+1)))
|
||||
{
|
||||
res += StringView{it, next+1};
|
||||
res.back() = *(next+1);
|
||||
it = next + 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
res += StringView{it, next == end ? next : next + 1};
|
||||
it = next == end ? next : next + 1;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
String indent(StringView str, StringView indent)
|
||||
{
|
||||
String res;
|
||||
res.reserve(str.length());
|
||||
bool was_eol = true;
|
||||
for (ByteCount i = 0; i < str.length(); ++i)
|
||||
{
|
||||
if (was_eol)
|
||||
res += indent;
|
||||
res += str[i];
|
||||
was_eol = is_eol(str[i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
String replace(StringView str, StringView substr, StringView replacement)
|
||||
{
|
||||
String res;
|
||||
for (auto it = str.begin(); it != str.end(); )
|
||||
{
|
||||
auto match = std::search(it, str.end(), substr.begin(), substr.end());
|
||||
res += StringView{it, match};
|
||||
if (match == str.end())
|
||||
break;
|
||||
|
||||
res += replacement;
|
||||
it = match + (int)substr.length();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
Optional<int> str_to_int_ifp(StringView str)
|
||||
{
|
||||
bool negative = not str.empty() and str[0] == '-';
|
||||
if (negative)
|
||||
str = str.substr(1_byte);
|
||||
|
||||
unsigned int res = 0;
|
||||
for (auto c : str)
|
||||
{
|
||||
if (c < '0' or c > '9')
|
||||
return {};
|
||||
res = res * 10 + c - '0';
|
||||
}
|
||||
return negative ? -res : res;
|
||||
}
|
||||
|
||||
int str_to_int(StringView str)
|
||||
{
|
||||
if (auto val = str_to_int_ifp(str))
|
||||
return *val;
|
||||
throw runtime_error{str + " is not a number"};
|
||||
}
|
||||
|
||||
InplaceString<15> to_string(int val)
|
||||
{
|
||||
InplaceString<15> res;
|
||||
res.m_length = sprintf(res.m_data, "%i", val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<23> to_string(long int val)
|
||||
{
|
||||
InplaceString<23> res;
|
||||
res.m_length = sprintf(res.m_data, "%li", val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<23> to_string(long long int val)
|
||||
{
|
||||
InplaceString<23> res;
|
||||
res.m_length = sprintf(res.m_data, "%lli", val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<23> to_string(size_t val)
|
||||
{
|
||||
InplaceString<23> res;
|
||||
res.m_length = sprintf(res.m_data, "%zu", val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<23> to_string(Hex val)
|
||||
{
|
||||
InplaceString<23> res;
|
||||
res.m_length = sprintf(res.m_data, "%zx", val.val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<23> to_string(float val)
|
||||
{
|
||||
InplaceString<23> res;
|
||||
res.m_length = sprintf(res.m_data, "%f", val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<7> to_string(Codepoint c)
|
||||
{
|
||||
InplaceString<7> res;
|
||||
char* ptr = res.m_data;
|
||||
utf8::dump(ptr, c);
|
||||
res.m_length = (int)(ptr - res.m_data);
|
||||
return res;
|
||||
}
|
||||
|
||||
bool subsequence_match(StringView str, StringView subseq)
|
||||
{
|
||||
auto it = str.begin();
|
||||
for (auto& c : subseq)
|
||||
{
|
||||
if (it == str.end())
|
||||
return false;
|
||||
while (*it != c)
|
||||
{
|
||||
if (++it == str.end())
|
||||
return false;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
String expand_tabs(StringView line, ColumnCount tabstop, ColumnCount col)
|
||||
{
|
||||
String res;
|
||||
res.reserve(line.length());
|
||||
for (auto it = line.begin(), end = line.end(); it != end; )
|
||||
{
|
||||
if (*it == '\t')
|
||||
{
|
||||
ColumnCount end_col = (col / tabstop + 1) * tabstop;
|
||||
res += String{' ', end_col - col};
|
||||
col = end_col;
|
||||
++it;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto char_beg = it;
|
||||
auto cp = utf8::read_codepoint(it, end);
|
||||
res += {char_beg, it};
|
||||
col += codepoint_width(cp);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
Vector<StringView> wrap_lines(StringView text, ColumnCount max_width)
|
||||
{
|
||||
if (max_width <= 0)
|
||||
throw runtime_error("Invalid max width");
|
||||
|
||||
using Utf8It = utf8::iterator<const char*>;
|
||||
Utf8It it{text.begin(), text};
|
||||
Utf8It end{text.end(), text};
|
||||
Utf8It line_begin = it;
|
||||
Utf8It last_word_end = it;
|
||||
|
||||
Vector<StringView> lines;
|
||||
while (it != end)
|
||||
{
|
||||
const CharCategories cat = categorize(*it, {});
|
||||
if (cat == CharCategories::EndOfLine)
|
||||
{
|
||||
lines.emplace_back(line_begin.base(), it.base());
|
||||
line_begin = it = it+1;
|
||||
continue;
|
||||
}
|
||||
|
||||
Utf8It word_end = it+1;
|
||||
while (word_end != end and categorize(*word_end, {}) == cat)
|
||||
++word_end;
|
||||
|
||||
while (word_end > line_begin and
|
||||
utf8::column_distance(line_begin.base(), word_end.base()) >= max_width)
|
||||
{
|
||||
auto line_end = last_word_end <= line_begin ?
|
||||
Utf8It{utf8::advance(line_begin.base(), text.end(), max_width), text}
|
||||
: last_word_end;
|
||||
|
||||
lines.emplace_back(line_begin.base(), line_end.base());
|
||||
|
||||
while (line_end != end and is_horizontal_blank(*line_end))
|
||||
++line_end;
|
||||
|
||||
if (line_end != end and *line_end == '\n')
|
||||
++line_end;
|
||||
|
||||
it = line_begin = line_end;
|
||||
}
|
||||
if (cat == CharCategories::Word or cat == CharCategories::Punctuation)
|
||||
last_word_end = word_end;
|
||||
|
||||
if (word_end > line_begin)
|
||||
it = word_end;
|
||||
}
|
||||
if (line_begin != end)
|
||||
lines.emplace_back(line_begin.base(), text.end());
|
||||
return lines;
|
||||
}
|
||||
|
||||
template<typename AppendFunc>
|
||||
void format_impl(StringView fmt, ArrayView<const StringView> params, AppendFunc append)
|
||||
{
|
||||
int implicitIndex = 0;
|
||||
for (auto it = fmt.begin(), end = fmt.end(); it != end;)
|
||||
{
|
||||
auto opening = std::find(it, end, '{');
|
||||
if (opening == end)
|
||||
{
|
||||
append(StringView{it, opening});
|
||||
break;
|
||||
}
|
||||
else if (opening != it and *(opening-1) == '\\')
|
||||
{
|
||||
append(StringView{it, opening-1});
|
||||
append('{');
|
||||
it = opening + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
append(StringView{it, opening});
|
||||
auto closing = std::find(opening, end, '}');
|
||||
if (closing == end)
|
||||
throw runtime_error("Format string error, unclosed '{'");
|
||||
|
||||
const int index = (closing == opening + 1) ?
|
||||
implicitIndex : str_to_int({opening+1, closing});
|
||||
|
||||
if (index >= params.size())
|
||||
throw runtime_error("Format string parameter index too big");
|
||||
|
||||
append(params[index]);
|
||||
implicitIndex = index+1;
|
||||
it = closing+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
StringView format_to(ArrayView<char> buffer, StringView fmt, ArrayView<const StringView> params)
|
||||
{
|
||||
char* ptr = buffer.begin();
|
||||
const char* end = buffer.end();
|
||||
format_impl(fmt, params, [&](StringView s) mutable {
|
||||
for (auto c : s)
|
||||
{
|
||||
if (ptr == end)
|
||||
throw runtime_error("buffer is too small");
|
||||
*ptr++ = c;
|
||||
}
|
||||
});
|
||||
if (ptr == end)
|
||||
throw runtime_error("buffer is too small");
|
||||
*ptr = 0;
|
||||
|
||||
return { buffer.begin(), ptr };
|
||||
}
|
||||
|
||||
String format(StringView fmt, ArrayView<const StringView> params)
|
||||
{
|
||||
ByteCount size = fmt.length();
|
||||
for (auto& s : params) size += s.length();
|
||||
String res;
|
||||
res.reserve(size);
|
||||
|
||||
format_impl(fmt, params, [&](StringView s) { res += s; });
|
||||
return res;
|
||||
}
|
||||
|
||||
UnitTest test_string{[]()
|
||||
{
|
||||
kak_assert(String("youpi ") + "matin" == "youpi matin");
|
||||
|
||||
Vector<String> splited = split("youpi:matin::tchou\\:kanaky:hihi\\:", ':', '\\');
|
||||
kak_assert(splited[0] == "youpi");
|
||||
kak_assert(splited[1] == "matin");
|
||||
kak_assert(splited[2] == "");
|
||||
kak_assert(splited[3] == "tchou:kanaky");
|
||||
kak_assert(splited[4] == "hihi:");
|
||||
|
||||
Vector<StringView> splitedview = split("youpi:matin::tchou\\:kanaky:hihi\\:", ':');
|
||||
kak_assert(splitedview[0] == "youpi");
|
||||
kak_assert(splitedview[1] == "matin");
|
||||
kak_assert(splitedview[2] == "");
|
||||
kak_assert(splitedview[3] == "tchou\\");
|
||||
kak_assert(splitedview[4] == "kanaky");
|
||||
kak_assert(splitedview[5] == "hihi\\");
|
||||
kak_assert(splitedview[6] == "");
|
||||
|
||||
Vector<StringView> wrapped = wrap_lines("wrap this paragraph\n respecting whitespaces and much_too_long_words", 16);
|
||||
kak_assert(wrapped.size() == 6);
|
||||
kak_assert(wrapped[0] == "wrap this");
|
||||
kak_assert(wrapped[1] == "paragraph");
|
||||
kak_assert(wrapped[2] == " respecting");
|
||||
kak_assert(wrapped[3] == "whitespaces and");
|
||||
kak_assert(wrapped[4] == "much_too_long_wo");
|
||||
kak_assert(wrapped[5] == "rds");
|
||||
|
||||
Vector<StringView> wrapped2 = wrap_lines("error: unknown type", 7);
|
||||
kak_assert(wrapped2.size() == 3);
|
||||
kak_assert(wrapped2[0] == "error:");
|
||||
kak_assert(wrapped2[1] == "unknown");
|
||||
kak_assert(wrapped2[2] == "type");
|
||||
|
||||
kak_assert(escape("youpi:matin:tchou:", ':', '\\') == "youpi\\:matin\\:tchou\\:");
|
||||
kak_assert(unescape("youpi\\:matin\\:tchou\\:", ':', '\\') == "youpi:matin:tchou:");
|
||||
|
||||
kak_assert(prefix_match("tchou kanaky", "tchou"));
|
||||
kak_assert(prefix_match("tchou kanaky", "tchou kanaky"));
|
||||
kak_assert(prefix_match("tchou kanaky", "t"));
|
||||
kak_assert(not prefix_match("tchou kanaky", "c"));
|
||||
|
||||
kak_assert(subsequence_match("tchou kanaky", "tknky"));
|
||||
kak_assert(subsequence_match("tchou kanaky", "knk"));
|
||||
kak_assert(subsequence_match("tchou kanaky", "tchou kanaky"));
|
||||
kak_assert(not subsequence_match("tchou kanaky", "tchou kanaky"));
|
||||
|
||||
kak_assert(format("Youhou {1} {} {0} \\{}", 10, "hehe", 5) == "Youhou hehe 5 10 {}");
|
||||
|
||||
char buffer[20];
|
||||
kak_assert(format_to(buffer, "Hey {}", 15) == "Hey 15");
|
||||
|
||||
kak_assert(str_to_int("5") == 5);
|
||||
kak_assert(str_to_int(to_string(INT_MAX)) == INT_MAX);
|
||||
kak_assert(str_to_int(to_string(INT_MIN)) == INT_MIN);
|
||||
kak_assert(str_to_int("00") == 0);
|
||||
kak_assert(str_to_int("-0") == 0);
|
||||
|
||||
kak_assert(replace("tchou/tcha/tchi", "/", "!!") == "tchou!!tcha!!tchi");
|
||||
}};
|
||||
|
||||
}
|
||||
|
|
107
src/string.hh
107
src/string.hh
|
@ -1,14 +1,11 @@
|
|||
#ifndef string_hh_INCLUDED
|
||||
#define string_hh_INCLUDED
|
||||
|
||||
#include "array_view.hh"
|
||||
#include "memory.hh"
|
||||
#include "hash.hh"
|
||||
#include "optional.hh"
|
||||
#include "units.hh"
|
||||
#include "utf8.hh"
|
||||
#include "vector.hh"
|
||||
|
||||
#include <cstring>
|
||||
#include <climits>
|
||||
|
||||
namespace Kakoune
|
||||
|
@ -331,108 +328,6 @@ inline StringView operator"" _sv(const char* str, size_t)
|
|||
return StringView{str};
|
||||
}
|
||||
|
||||
Vector<String> split(StringView str, char separator, char escape);
|
||||
Vector<StringView> split(StringView str, char separator);
|
||||
|
||||
StringView trim_whitespaces(StringView str);
|
||||
|
||||
String escape(StringView str, StringView characters, char escape);
|
||||
String unescape(StringView str, StringView characters, char escape);
|
||||
|
||||
String indent(StringView str, StringView indent = " ");
|
||||
|
||||
String replace(StringView str, StringView substr, StringView replacement);
|
||||
|
||||
template<typename Container>
|
||||
String join(const Container& container, char joiner, bool esc_joiner = true)
|
||||
{
|
||||
const char to_escape[2] = { joiner, '\\' };
|
||||
String res;
|
||||
for (const auto& str : container)
|
||||
{
|
||||
if (not res.empty())
|
||||
res += joiner;
|
||||
res += esc_joiner ? escape(str, {to_escape, 2}, '\\') : str;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
inline bool prefix_match(StringView str, StringView prefix)
|
||||
{
|
||||
return str.substr(0_byte, prefix.length()) == prefix;
|
||||
}
|
||||
|
||||
bool subsequence_match(StringView str, StringView subseq);
|
||||
|
||||
String expand_tabs(StringView line, ColumnCount tabstop, ColumnCount col = 0);
|
||||
|
||||
Vector<StringView> wrap_lines(StringView text, ColumnCount max_width);
|
||||
|
||||
int str_to_int(StringView str); // throws on error
|
||||
Optional<int> str_to_int_ifp(StringView str);
|
||||
|
||||
inline String option_to_string(StringView opt) { return opt.str(); }
|
||||
inline void option_from_string(StringView str, String& opt) { opt = str.str(); }
|
||||
inline bool option_add(String& opt, StringView val) { opt += val; return not val.empty(); }
|
||||
|
||||
template<size_t N>
|
||||
struct InplaceString
|
||||
{
|
||||
static_assert(N < 256, "InplaceString cannot handle sizes >= 256");
|
||||
|
||||
constexpr operator StringView() const { return {m_data, ByteCount{m_length}}; }
|
||||
operator String() const { return {m_data, ByteCount{m_length}}; }
|
||||
|
||||
unsigned char m_length;
|
||||
char m_data[N];
|
||||
};
|
||||
|
||||
struct Hex { size_t val; };
|
||||
constexpr Hex hex(size_t val) { return {val}; }
|
||||
|
||||
InplaceString<15> to_string(int val);
|
||||
InplaceString<23> to_string(long int val);
|
||||
InplaceString<23> to_string(size_t val);
|
||||
InplaceString<23> to_string(long long int val);
|
||||
InplaceString<23> to_string(Hex val);
|
||||
InplaceString<23> to_string(float val);
|
||||
InplaceString<7> to_string(Codepoint c);
|
||||
|
||||
template<typename RealType, typename ValueType>
|
||||
decltype(auto) to_string(const StronglyTypedNumber<RealType, ValueType>& val)
|
||||
{
|
||||
return to_string((ValueType)val);
|
||||
}
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
template<typename T> constexpr bool is_string = std::is_convertible<T, StringView>::value;
|
||||
|
||||
template<typename T, class = std::enable_if_t<not is_string<T>>>
|
||||
decltype(auto) format_param(const T& val) { return to_string(val); }
|
||||
|
||||
template<typename T, class = std::enable_if_t<is_string<T>>>
|
||||
StringView format_param(const T& val) { return val; }
|
||||
|
||||
}
|
||||
|
||||
String format(StringView fmt, ArrayView<const StringView> params);
|
||||
|
||||
template<typename... Types>
|
||||
String format(StringView fmt, Types&&... params)
|
||||
{
|
||||
return format(fmt, ArrayView<const StringView>{detail::format_param(std::forward<Types>(params))...});
|
||||
}
|
||||
|
||||
StringView format_to(ArrayView<char> buffer, StringView fmt, ArrayView<const StringView> params);
|
||||
|
||||
template<typename... Types>
|
||||
StringView format_to(ArrayView<char> buffer, StringView fmt, Types&&... params)
|
||||
{
|
||||
return format_to(buffer, fmt, ArrayView<const StringView>{detail::format_param(std::forward<Types>(params))...});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // string_hh_INCLUDED
|
||||
|
|
451
src/string_utils.cc
Normal file
451
src/string_utils.cc
Normal file
|
@ -0,0 +1,451 @@
|
|||
#include "string_utils.hh"
|
||||
|
||||
#include "exception.hh"
|
||||
#include "utf8_iterator.hh"
|
||||
#include "unit_tests.hh"
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
||||
Vector<String> split(StringView str, char separator, char escape)
|
||||
{
|
||||
Vector<String> res;
|
||||
auto it = str.begin();
|
||||
auto start = it;
|
||||
while (it != str.end())
|
||||
{
|
||||
res.emplace_back();
|
||||
String& element = res.back();
|
||||
while (it != str.end())
|
||||
{
|
||||
auto c = *it;
|
||||
if (c == escape and it + 1 != str.end() and *(it+1) == separator)
|
||||
{
|
||||
element += StringView{start, it+1};
|
||||
element.back() = separator;
|
||||
it += 2;
|
||||
start = it;
|
||||
}
|
||||
else if (c == separator)
|
||||
{
|
||||
element += StringView{start, it};
|
||||
++it;
|
||||
start = it;
|
||||
break;
|
||||
}
|
||||
else
|
||||
++it;
|
||||
}
|
||||
}
|
||||
if (start != str.end())
|
||||
res.back() += StringView{start, str.end()};
|
||||
return res;
|
||||
}
|
||||
|
||||
Vector<StringView> split(StringView str, char separator)
|
||||
{
|
||||
Vector<StringView> res;
|
||||
if (str.empty())
|
||||
return res;
|
||||
|
||||
auto beg = str.begin();
|
||||
for (auto it = beg; it != str.end(); ++it)
|
||||
{
|
||||
if (*it == separator)
|
||||
{
|
||||
res.emplace_back(beg, it);
|
||||
beg = it + 1;
|
||||
}
|
||||
}
|
||||
res.emplace_back(beg, str.end());
|
||||
return res;
|
||||
}
|
||||
|
||||
StringView trim_whitespaces(StringView str)
|
||||
{
|
||||
auto beg = str.begin(), end = str.end();
|
||||
while (beg != end and is_blank(*beg))
|
||||
++beg;
|
||||
while (beg != end and is_blank(*(end-1)))
|
||||
--end;
|
||||
return {beg, end};
|
||||
}
|
||||
|
||||
|
||||
String escape(StringView str, StringView characters, char escape)
|
||||
{
|
||||
String res;
|
||||
res.reserve(str.length());
|
||||
auto cbeg = characters.begin(), cend = characters.end();
|
||||
for (auto it = str.begin(), end = str.end(); it != end; )
|
||||
{
|
||||
auto next = std::find_first_of(it, end, cbeg, cend);
|
||||
if (next != end)
|
||||
{
|
||||
res += StringView{it, next+1};
|
||||
res.back() = escape;
|
||||
res += *next;
|
||||
it = next+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
res += StringView{it, next};
|
||||
break;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
String unescape(StringView str, StringView characters, char escape)
|
||||
{
|
||||
String res;
|
||||
res.reserve(str.length());
|
||||
for (auto it = str.begin(), end = str.end(); it != end; )
|
||||
{
|
||||
auto next = std::find(it, end, escape);
|
||||
if (next != end and next+1 != end and contains(characters, *(next+1)))
|
||||
{
|
||||
res += StringView{it, next+1};
|
||||
res.back() = *(next+1);
|
||||
it = next + 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
res += StringView{it, next == end ? next : next + 1};
|
||||
it = next == end ? next : next + 1;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
String indent(StringView str, StringView indent)
|
||||
{
|
||||
String res;
|
||||
res.reserve(str.length());
|
||||
bool was_eol = true;
|
||||
for (ByteCount i = 0; i < str.length(); ++i)
|
||||
{
|
||||
if (was_eol)
|
||||
res += indent;
|
||||
res += str[i];
|
||||
was_eol = is_eol(str[i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
String replace(StringView str, StringView substr, StringView replacement)
|
||||
{
|
||||
String res;
|
||||
for (auto it = str.begin(); it != str.end(); )
|
||||
{
|
||||
auto match = std::search(it, str.end(), substr.begin(), substr.end());
|
||||
res += StringView{it, match};
|
||||
if (match == str.end())
|
||||
break;
|
||||
|
||||
res += replacement;
|
||||
it = match + (int)substr.length();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
Optional<int> str_to_int_ifp(StringView str)
|
||||
{
|
||||
bool negative = not str.empty() and str[0] == '-';
|
||||
if (negative)
|
||||
str = str.substr(1_byte);
|
||||
|
||||
unsigned int res = 0;
|
||||
for (auto c : str)
|
||||
{
|
||||
if (c < '0' or c > '9')
|
||||
return {};
|
||||
res = res * 10 + c - '0';
|
||||
}
|
||||
return negative ? -res : res;
|
||||
}
|
||||
|
||||
int str_to_int(StringView str)
|
||||
{
|
||||
if (auto val = str_to_int_ifp(str))
|
||||
return *val;
|
||||
throw runtime_error{str + " is not a number"};
|
||||
}
|
||||
|
||||
InplaceString<15> to_string(int val)
|
||||
{
|
||||
InplaceString<15> res;
|
||||
res.m_length = sprintf(res.m_data, "%i", val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<23> to_string(long int val)
|
||||
{
|
||||
InplaceString<23> res;
|
||||
res.m_length = sprintf(res.m_data, "%li", val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<23> to_string(long long int val)
|
||||
{
|
||||
InplaceString<23> res;
|
||||
res.m_length = sprintf(res.m_data, "%lli", val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<23> to_string(size_t val)
|
||||
{
|
||||
InplaceString<23> res;
|
||||
res.m_length = sprintf(res.m_data, "%zu", val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<23> to_string(Hex val)
|
||||
{
|
||||
InplaceString<23> res;
|
||||
res.m_length = sprintf(res.m_data, "%zx", val.val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<23> to_string(float val)
|
||||
{
|
||||
InplaceString<23> res;
|
||||
res.m_length = sprintf(res.m_data, "%f", val);
|
||||
return res;
|
||||
}
|
||||
|
||||
InplaceString<7> to_string(Codepoint c)
|
||||
{
|
||||
InplaceString<7> res;
|
||||
char* ptr = res.m_data;
|
||||
utf8::dump(ptr, c);
|
||||
res.m_length = (int)(ptr - res.m_data);
|
||||
return res;
|
||||
}
|
||||
|
||||
bool subsequence_match(StringView str, StringView subseq)
|
||||
{
|
||||
auto it = str.begin();
|
||||
for (auto& c : subseq)
|
||||
{
|
||||
if (it == str.end())
|
||||
return false;
|
||||
while (*it != c)
|
||||
{
|
||||
if (++it == str.end())
|
||||
return false;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
String expand_tabs(StringView line, ColumnCount tabstop, ColumnCount col)
|
||||
{
|
||||
String res;
|
||||
res.reserve(line.length());
|
||||
for (auto it = line.begin(), end = line.end(); it != end; )
|
||||
{
|
||||
if (*it == '\t')
|
||||
{
|
||||
ColumnCount end_col = (col / tabstop + 1) * tabstop;
|
||||
res += String{' ', end_col - col};
|
||||
col = end_col;
|
||||
++it;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto char_beg = it;
|
||||
auto cp = utf8::read_codepoint(it, end);
|
||||
res += {char_beg, it};
|
||||
col += codepoint_width(cp);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
Vector<StringView> wrap_lines(StringView text, ColumnCount max_width)
|
||||
{
|
||||
if (max_width <= 0)
|
||||
throw runtime_error("Invalid max width");
|
||||
|
||||
using Utf8It = utf8::iterator<const char*>;
|
||||
Utf8It it{text.begin(), text};
|
||||
Utf8It end{text.end(), text};
|
||||
Utf8It line_begin = it;
|
||||
Utf8It last_word_end = it;
|
||||
|
||||
Vector<StringView> lines;
|
||||
while (it != end)
|
||||
{
|
||||
const CharCategories cat = categorize(*it, {});
|
||||
if (cat == CharCategories::EndOfLine)
|
||||
{
|
||||
lines.emplace_back(line_begin.base(), it.base());
|
||||
line_begin = it = it+1;
|
||||
continue;
|
||||
}
|
||||
|
||||
Utf8It word_end = it+1;
|
||||
while (word_end != end and categorize(*word_end, {}) == cat)
|
||||
++word_end;
|
||||
|
||||
while (word_end > line_begin and
|
||||
utf8::column_distance(line_begin.base(), word_end.base()) >= max_width)
|
||||
{
|
||||
auto line_end = last_word_end <= line_begin ?
|
||||
Utf8It{utf8::advance(line_begin.base(), text.end(), max_width), text}
|
||||
: last_word_end;
|
||||
|
||||
lines.emplace_back(line_begin.base(), line_end.base());
|
||||
|
||||
while (line_end != end and is_horizontal_blank(*line_end))
|
||||
++line_end;
|
||||
|
||||
if (line_end != end and *line_end == '\n')
|
||||
++line_end;
|
||||
|
||||
it = line_begin = line_end;
|
||||
}
|
||||
if (cat == CharCategories::Word or cat == CharCategories::Punctuation)
|
||||
last_word_end = word_end;
|
||||
|
||||
if (word_end > line_begin)
|
||||
it = word_end;
|
||||
}
|
||||
if (line_begin != end)
|
||||
lines.emplace_back(line_begin.base(), text.end());
|
||||
return lines;
|
||||
}
|
||||
|
||||
template<typename AppendFunc>
|
||||
void format_impl(StringView fmt, ArrayView<const StringView> params, AppendFunc append)
|
||||
{
|
||||
int implicitIndex = 0;
|
||||
for (auto it = fmt.begin(), end = fmt.end(); it != end;)
|
||||
{
|
||||
auto opening = std::find(it, end, '{');
|
||||
if (opening == end)
|
||||
{
|
||||
append(StringView{it, opening});
|
||||
break;
|
||||
}
|
||||
else if (opening != it and *(opening-1) == '\\')
|
||||
{
|
||||
append(StringView{it, opening-1});
|
||||
append('{');
|
||||
it = opening + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
append(StringView{it, opening});
|
||||
auto closing = std::find(opening, end, '}');
|
||||
if (closing == end)
|
||||
throw runtime_error("Format string error, unclosed '{'");
|
||||
|
||||
const int index = (closing == opening + 1) ?
|
||||
implicitIndex : str_to_int({opening+1, closing});
|
||||
|
||||
if (index >= params.size())
|
||||
throw runtime_error("Format string parameter index too big");
|
||||
|
||||
append(params[index]);
|
||||
implicitIndex = index+1;
|
||||
it = closing+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
StringView format_to(ArrayView<char> buffer, StringView fmt, ArrayView<const StringView> params)
|
||||
{
|
||||
char* ptr = buffer.begin();
|
||||
const char* end = buffer.end();
|
||||
format_impl(fmt, params, [&](StringView s) mutable {
|
||||
for (auto c : s)
|
||||
{
|
||||
if (ptr == end)
|
||||
throw runtime_error("buffer is too small");
|
||||
*ptr++ = c;
|
||||
}
|
||||
});
|
||||
if (ptr == end)
|
||||
throw runtime_error("buffer is too small");
|
||||
*ptr = 0;
|
||||
|
||||
return { buffer.begin(), ptr };
|
||||
}
|
||||
|
||||
String format(StringView fmt, ArrayView<const StringView> params)
|
||||
{
|
||||
ByteCount size = fmt.length();
|
||||
for (auto& s : params) size += s.length();
|
||||
String res;
|
||||
res.reserve(size);
|
||||
|
||||
format_impl(fmt, params, [&](StringView s) { res += s; });
|
||||
return res;
|
||||
}
|
||||
|
||||
UnitTest test_string{[]()
|
||||
{
|
||||
kak_assert(String("youpi ") + "matin" == "youpi matin");
|
||||
|
||||
Vector<String> splited = split("youpi:matin::tchou\\:kanaky:hihi\\:", ':', '\\');
|
||||
kak_assert(splited[0] == "youpi");
|
||||
kak_assert(splited[1] == "matin");
|
||||
kak_assert(splited[2] == "");
|
||||
kak_assert(splited[3] == "tchou:kanaky");
|
||||
kak_assert(splited[4] == "hihi:");
|
||||
|
||||
Vector<StringView> splitedview = split("youpi:matin::tchou\\:kanaky:hihi\\:", ':');
|
||||
kak_assert(splitedview[0] == "youpi");
|
||||
kak_assert(splitedview[1] == "matin");
|
||||
kak_assert(splitedview[2] == "");
|
||||
kak_assert(splitedview[3] == "tchou\\");
|
||||
kak_assert(splitedview[4] == "kanaky");
|
||||
kak_assert(splitedview[5] == "hihi\\");
|
||||
kak_assert(splitedview[6] == "");
|
||||
|
||||
Vector<StringView> wrapped = wrap_lines("wrap this paragraph\n respecting whitespaces and much_too_long_words", 16);
|
||||
kak_assert(wrapped.size() == 6);
|
||||
kak_assert(wrapped[0] == "wrap this");
|
||||
kak_assert(wrapped[1] == "paragraph");
|
||||
kak_assert(wrapped[2] == " respecting");
|
||||
kak_assert(wrapped[3] == "whitespaces and");
|
||||
kak_assert(wrapped[4] == "much_too_long_wo");
|
||||
kak_assert(wrapped[5] == "rds");
|
||||
|
||||
Vector<StringView> wrapped2 = wrap_lines("error: unknown type", 7);
|
||||
kak_assert(wrapped2.size() == 3);
|
||||
kak_assert(wrapped2[0] == "error:");
|
||||
kak_assert(wrapped2[1] == "unknown");
|
||||
kak_assert(wrapped2[2] == "type");
|
||||
|
||||
kak_assert(escape("youpi:matin:tchou:", ':', '\\') == "youpi\\:matin\\:tchou\\:");
|
||||
kak_assert(unescape("youpi\\:matin\\:tchou\\:", ':', '\\') == "youpi:matin:tchou:");
|
||||
|
||||
kak_assert(prefix_match("tchou kanaky", "tchou"));
|
||||
kak_assert(prefix_match("tchou kanaky", "tchou kanaky"));
|
||||
kak_assert(prefix_match("tchou kanaky", "t"));
|
||||
kak_assert(not prefix_match("tchou kanaky", "c"));
|
||||
|
||||
kak_assert(subsequence_match("tchou kanaky", "tknky"));
|
||||
kak_assert(subsequence_match("tchou kanaky", "knk"));
|
||||
kak_assert(subsequence_match("tchou kanaky", "tchou kanaky"));
|
||||
kak_assert(not subsequence_match("tchou kanaky", "tchou kanaky"));
|
||||
|
||||
kak_assert(format("Youhou {1} {} {0} \\{}", 10, "hehe", 5) == "Youhou hehe 5 10 {}");
|
||||
|
||||
char buffer[20];
|
||||
kak_assert(format_to(buffer, "Hey {}", 15) == "Hey 15");
|
||||
|
||||
kak_assert(str_to_int("5") == 5);
|
||||
kak_assert(str_to_int(to_string(INT_MAX)) == INT_MAX);
|
||||
kak_assert(str_to_int(to_string(INT_MIN)) == INT_MIN);
|
||||
kak_assert(str_to_int("00") == 0);
|
||||
kak_assert(str_to_int("-0") == 0);
|
||||
|
||||
kak_assert(replace("tchou/tcha/tchi", "/", "!!") == "tchou!!tcha!!tchi");
|
||||
}};
|
||||
|
||||
}
|
115
src/string_utils.hh
Normal file
115
src/string_utils.hh
Normal file
|
@ -0,0 +1,115 @@
|
|||
#ifndef string_utils_hh_INCLUDED
|
||||
#define string_utils_hh_INCLUDED
|
||||
|
||||
#include "string.hh"
|
||||
#include "vector.hh"
|
||||
#include "optional.hh"
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
||||
Vector<String> split(StringView str, char separator, char escape);
|
||||
Vector<StringView> split(StringView str, char separator);
|
||||
|
||||
StringView trim_whitespaces(StringView str);
|
||||
|
||||
String escape(StringView str, StringView characters, char escape);
|
||||
String unescape(StringView str, StringView characters, char escape);
|
||||
|
||||
String indent(StringView str, StringView indent = " ");
|
||||
|
||||
String replace(StringView str, StringView substr, StringView replacement);
|
||||
|
||||
template<typename Container>
|
||||
String join(const Container& container, char joiner, bool esc_joiner = true)
|
||||
{
|
||||
const char to_escape[2] = { joiner, '\\' };
|
||||
String res;
|
||||
for (const auto& str : container)
|
||||
{
|
||||
if (not res.empty())
|
||||
res += joiner;
|
||||
res += esc_joiner ? escape(str, {to_escape, 2}, '\\') : str;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
inline bool prefix_match(StringView str, StringView prefix)
|
||||
{
|
||||
return str.substr(0_byte, prefix.length()) == prefix;
|
||||
}
|
||||
|
||||
bool subsequence_match(StringView str, StringView subseq);
|
||||
|
||||
String expand_tabs(StringView line, ColumnCount tabstop, ColumnCount col = 0);
|
||||
|
||||
Vector<StringView> wrap_lines(StringView text, ColumnCount max_width);
|
||||
|
||||
int str_to_int(StringView str); // throws on error
|
||||
Optional<int> str_to_int_ifp(StringView str);
|
||||
|
||||
inline String option_to_string(StringView opt) { return opt.str(); }
|
||||
inline void option_from_string(StringView str, String& opt) { opt = str.str(); }
|
||||
inline bool option_add(String& opt, StringView val) { opt += val; return not val.empty(); }
|
||||
|
||||
template<size_t N>
|
||||
struct InplaceString
|
||||
{
|
||||
static_assert(N < 256, "InplaceString cannot handle sizes >= 256");
|
||||
|
||||
constexpr operator StringView() const { return {m_data, ByteCount{m_length}}; }
|
||||
operator String() const { return {m_data, ByteCount{m_length}}; }
|
||||
|
||||
unsigned char m_length;
|
||||
char m_data[N];
|
||||
};
|
||||
|
||||
struct Hex { size_t val; };
|
||||
constexpr Hex hex(size_t val) { return {val}; }
|
||||
|
||||
InplaceString<15> to_string(int val);
|
||||
InplaceString<23> to_string(long int val);
|
||||
InplaceString<23> to_string(size_t val);
|
||||
InplaceString<23> to_string(long long int val);
|
||||
InplaceString<23> to_string(Hex val);
|
||||
InplaceString<23> to_string(float val);
|
||||
InplaceString<7> to_string(Codepoint c);
|
||||
|
||||
template<typename RealType, typename ValueType>
|
||||
decltype(auto) to_string(const StronglyTypedNumber<RealType, ValueType>& val)
|
||||
{
|
||||
return to_string((ValueType)val);
|
||||
}
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
template<typename T> constexpr bool is_string = std::is_convertible<T, StringView>::value;
|
||||
|
||||
template<typename T, class = std::enable_if_t<not is_string<T>>>
|
||||
decltype(auto) format_param(const T& val) { return to_string(val); }
|
||||
|
||||
template<typename T, class = std::enable_if_t<is_string<T>>>
|
||||
StringView format_param(const T& val) { return val; }
|
||||
|
||||
}
|
||||
|
||||
String format(StringView fmt, ArrayView<const StringView> params);
|
||||
|
||||
template<typename... Types>
|
||||
String format(StringView fmt, Types&&... params)
|
||||
{
|
||||
return format(fmt, ArrayView<const StringView>{detail::format_param(std::forward<Types>(params))...});
|
||||
}
|
||||
|
||||
StringView format_to(ArrayView<char> buffer, StringView fmt, ArrayView<const StringView> params);
|
||||
|
||||
template<typename... Types>
|
||||
StringView format_to(ArrayView<char> buffer, StringView fmt, Types&&... params)
|
||||
{
|
||||
return format_to(buffer, fmt, ArrayView<const StringView>{detail::format_param(std::forward<Types>(params))...});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // string_utils_hh_INCLUDED
|
Loading…
Reference in New Issue
Block a user