From 6ada6e6d773982282031afa43b150299d1fbec87 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Mon, 9 Oct 2017 22:12:42 +0800 Subject: [PATCH] Move all non-core string code to string_utils.{hh,cc} --- src/color.cc | 1 + src/commands.cc | 1 + src/face_registry.cc | 1 + src/file.cc | 1 + src/highlighter_group.cc | 1 + src/json_ui.cc | 1 + src/keys.cc | 1 + src/ncurses_ui.cc | 1 + src/option_manager.hh | 1 + src/parameters_parser.hh | 1 + src/ranked_match.cc | 1 + src/regex.hh | 1 + src/register_manager.cc | 1 + src/remote.hh | 1 + src/shared_string.cc | 2 + src/string.cc | 447 +------------------------------------- src/string.hh | 107 +--------- src/string_utils.cc | 451 +++++++++++++++++++++++++++++++++++++++ src/string_utils.hh | 115 ++++++++++ 19 files changed, 584 insertions(+), 552 deletions(-) create mode 100644 src/string_utils.cc create mode 100644 src/string_utils.hh diff --git a/src/color.cc b/src/color.cc index 03d80ab3..d2335191 100644 --- a/src/color.cc +++ b/src/color.cc @@ -2,6 +2,7 @@ #include "exception.hh" #include "ranges.hh" +#include "string_utils.hh" #include diff --git a/src/commands.cc b/src/commands.cc index 80b49efe..466a84dd 100644 --- a/src/commands.cc +++ b/src/commands.cc @@ -28,6 +28,7 @@ #include "user_interface.hh" #include "window.hh" +#include #include #include diff --git a/src/face_registry.cc b/src/face_registry.cc index 192f9fda..b78f7f60 100644 --- a/src/face_registry.cc +++ b/src/face_registry.cc @@ -2,6 +2,7 @@ #include "exception.hh" #include "ranges.hh" +#include "string_utils.hh" namespace Kakoune { diff --git a/src/file.cc b/src/file.cc index 105e32b7..b95972b3 100644 --- a/src/file.cc +++ b/src/file.cc @@ -10,6 +10,7 @@ #include "unicode.hh" #include +#include #include #include #include diff --git a/src/highlighter_group.cc b/src/highlighter_group.cc index 4860d59d..33723d66 100644 --- a/src/highlighter_group.cc +++ b/src/highlighter_group.cc @@ -1,6 +1,7 @@ #include "highlighter_group.hh" #include "ranges.hh" +#include "string_utils.hh" namespace Kakoune { diff --git a/src/json_ui.cc b/src/json_ui.cc index ca7d3109..962ef693 100644 --- a/src/json_ui.cc +++ b/src/json_ui.cc @@ -6,6 +6,7 @@ #include "file.hh" #include "keys.hh" #include "ranges.hh" +#include "string_utils.hh" #include "unit_tests.hh" #include "value.hh" diff --git a/src/keys.cc b/src/keys.cc index 8ddea2b4..90dfa59f 100644 --- a/src/keys.cc +++ b/src/keys.cc @@ -6,6 +6,7 @@ #include "unit_tests.hh" #include "utf8_iterator.hh" #include "utils.hh" +#include "string_utils.hh" namespace Kakoune { diff --git a/src/ncurses_ui.cc b/src/ncurses_ui.cc index b9131462..9fed7b00 100644 --- a/src/ncurses_ui.cc +++ b/src/ncurses_ui.cc @@ -4,6 +4,7 @@ #include "event_manager.hh" #include "keys.hh" #include "ranges.hh" +#include "string_utils.hh" #include diff --git a/src/option_manager.hh b/src/option_manager.hh index 3e2568cb..0e1ddaab 100644 --- a/src/option_manager.hh +++ b/src/option_manager.hh @@ -8,6 +8,7 @@ #include "ranges.hh" #include "utils.hh" #include "vector.hh" +#include "string_utils.hh" #include #include diff --git a/src/parameters_parser.hh b/src/parameters_parser.hh index 7a1b81ae..8cbaf0fd 100644 --- a/src/parameters_parser.hh +++ b/src/parameters_parser.hh @@ -7,6 +7,7 @@ #include "array_view.hh" #include "optional.hh" #include "string.hh" +#include "string_utils.hh" namespace Kakoune { diff --git a/src/ranked_match.cc b/src/ranked_match.cc index 2bfb8d8b..15d84195 100644 --- a/src/ranked_match.cc +++ b/src/ranked_match.cc @@ -3,6 +3,7 @@ #include "flags.hh" #include "unit_tests.hh" #include "utf8_iterator.hh" +#include "optional.hh" #include diff --git a/src/regex.hh b/src/regex.hh index a76990b0..5e8b13fe 100644 --- a/src/regex.hh +++ b/src/regex.hh @@ -2,6 +2,7 @@ #define regex_hh_INCLUDED #include "string.hh" +#include "string_utils.hh" #include "exception.hh" #include "utf8_iterator.hh" diff --git a/src/register_manager.cc b/src/register_manager.cc index 815d5d4c..3abbd61b 100644 --- a/src/register_manager.cc +++ b/src/register_manager.cc @@ -2,6 +2,7 @@ #include "assert.hh" #include "hash_map.hh" +#include "string_utils.hh" namespace Kakoune { diff --git a/src/remote.hh b/src/remote.hh index f716d243..b270bc47 100644 --- a/src/remote.hh +++ b/src/remote.hh @@ -5,6 +5,7 @@ #include "exception.hh" #include "utils.hh" #include "vector.hh" +#include "optional.hh" #include diff --git a/src/shared_string.cc b/src/shared_string.cc index 74c6c106..73f2b69d 100644 --- a/src/shared_string.cc +++ b/src/shared_string.cc @@ -1,6 +1,8 @@ #include "shared_string.hh" #include "buffer_utils.hh" +#include + namespace Kakoune { diff --git a/src/string.cc b/src/string.cc index baebf29f..6ff89450 100644 --- a/src/string.cc +++ b/src/string.cc @@ -1,11 +1,7 @@ #include "string.hh" -#include "exception.hh" -#include "ranges.hh" -#include "unit_tests.hh" -#include "utf8_iterator.hh" - #include +#include namespace Kakoune { @@ -164,445 +160,4 @@ void String::Data::set_short(const char* data, size_t size) const String String::ms_empty; -Vector split(StringView str, char separator, char escape) -{ - Vector res; - auto it = str.begin(); - auto start = it; - while (it != str.end()) - { - res.emplace_back(); - String& element = res.back(); - while (it != str.end()) - { - auto c = *it; - if (c == escape and it + 1 != str.end() and *(it+1) == separator) - { - element += StringView{start, it+1}; - element.back() = separator; - it += 2; - start = it; - } - else if (c == separator) - { - element += StringView{start, it}; - ++it; - start = it; - break; - } - else - ++it; - } - } - if (start != str.end()) - res.back() += StringView{start, str.end()}; - return res; -} - -Vector split(StringView str, char separator) -{ - Vector res; - if (str.empty()) - return res; - - auto beg = str.begin(); - for (auto it = beg; it != str.end(); ++it) - { - if (*it == separator) - { - res.emplace_back(beg, it); - beg = it + 1; - } - } - res.emplace_back(beg, str.end()); - return res; -} - -StringView trim_whitespaces(StringView str) -{ - auto beg = str.begin(), end = str.end(); - while (beg != end and is_blank(*beg)) - ++beg; - while (beg != end and is_blank(*(end-1))) - --end; - return {beg, end}; -} - - -String escape(StringView str, StringView characters, char escape) -{ - String res; - res.reserve(str.length()); - auto cbeg = characters.begin(), cend = characters.end(); - for (auto it = str.begin(), end = str.end(); it != end; ) - { - auto next = std::find_first_of(it, end, cbeg, cend); - if (next != end) - { - res += StringView{it, next+1}; - res.back() = escape; - res += *next; - it = next+1; - } - else - { - res += StringView{it, next}; - break; - } - } - return res; -} - -String unescape(StringView str, StringView characters, char escape) -{ - String res; - res.reserve(str.length()); - for (auto it = str.begin(), end = str.end(); it != end; ) - { - auto next = std::find(it, end, escape); - if (next != end and next+1 != end and contains(characters, *(next+1))) - { - res += StringView{it, next+1}; - res.back() = *(next+1); - it = next + 2; - } - else - { - res += StringView{it, next == end ? next : next + 1}; - it = next == end ? next : next + 1; - } - } - return res; -} - -String indent(StringView str, StringView indent) -{ - String res; - res.reserve(str.length()); - bool was_eol = true; - for (ByteCount i = 0; i < str.length(); ++i) - { - if (was_eol) - res += indent; - res += str[i]; - was_eol = is_eol(str[i]); - } - return res; -} - -String replace(StringView str, StringView substr, StringView replacement) -{ - String res; - for (auto it = str.begin(); it != str.end(); ) - { - auto match = std::search(it, str.end(), substr.begin(), substr.end()); - res += StringView{it, match}; - if (match == str.end()) - break; - - res += replacement; - it = match + (int)substr.length(); - } - return res; -} - -Optional str_to_int_ifp(StringView str) -{ - bool negative = not str.empty() and str[0] == '-'; - if (negative) - str = str.substr(1_byte); - - unsigned int res = 0; - for (auto c : str) - { - if (c < '0' or c > '9') - return {}; - res = res * 10 + c - '0'; - } - return negative ? -res : res; -} - -int str_to_int(StringView str) -{ - if (auto val = str_to_int_ifp(str)) - return *val; - throw runtime_error{str + " is not a number"}; -} - -InplaceString<15> to_string(int val) -{ - InplaceString<15> res; - res.m_length = sprintf(res.m_data, "%i", val); - return res; -} - -InplaceString<23> to_string(long int val) -{ - InplaceString<23> res; - res.m_length = sprintf(res.m_data, "%li", val); - return res; -} - -InplaceString<23> to_string(long long int val) -{ - InplaceString<23> res; - res.m_length = sprintf(res.m_data, "%lli", val); - return res; -} - -InplaceString<23> to_string(size_t val) -{ - InplaceString<23> res; - res.m_length = sprintf(res.m_data, "%zu", val); - return res; -} - -InplaceString<23> to_string(Hex val) -{ - InplaceString<23> res; - res.m_length = sprintf(res.m_data, "%zx", val.val); - return res; -} - -InplaceString<23> to_string(float val) -{ - InplaceString<23> res; - res.m_length = sprintf(res.m_data, "%f", val); - return res; -} - -InplaceString<7> to_string(Codepoint c) -{ - InplaceString<7> res; - char* ptr = res.m_data; - utf8::dump(ptr, c); - res.m_length = (int)(ptr - res.m_data); - return res; -} - -bool subsequence_match(StringView str, StringView subseq) -{ - auto it = str.begin(); - for (auto& c : subseq) - { - if (it == str.end()) - return false; - while (*it != c) - { - if (++it == str.end()) - return false; - } - ++it; - } - return true; -} - -String expand_tabs(StringView line, ColumnCount tabstop, ColumnCount col) -{ - String res; - res.reserve(line.length()); - for (auto it = line.begin(), end = line.end(); it != end; ) - { - if (*it == '\t') - { - ColumnCount end_col = (col / tabstop + 1) * tabstop; - res += String{' ', end_col - col}; - col = end_col; - ++it; - } - else - { - auto char_beg = it; - auto cp = utf8::read_codepoint(it, end); - res += {char_beg, it}; - col += codepoint_width(cp); - } - } - return res; -} - -Vector wrap_lines(StringView text, ColumnCount max_width) -{ - if (max_width <= 0) - throw runtime_error("Invalid max width"); - - using Utf8It = utf8::iterator; - Utf8It it{text.begin(), text}; - Utf8It end{text.end(), text}; - Utf8It line_begin = it; - Utf8It last_word_end = it; - - Vector lines; - while (it != end) - { - const CharCategories cat = categorize(*it, {}); - if (cat == CharCategories::EndOfLine) - { - lines.emplace_back(line_begin.base(), it.base()); - line_begin = it = it+1; - continue; - } - - Utf8It word_end = it+1; - while (word_end != end and categorize(*word_end, {}) == cat) - ++word_end; - - while (word_end > line_begin and - utf8::column_distance(line_begin.base(), word_end.base()) >= max_width) - { - auto line_end = last_word_end <= line_begin ? - Utf8It{utf8::advance(line_begin.base(), text.end(), max_width), text} - : last_word_end; - - lines.emplace_back(line_begin.base(), line_end.base()); - - while (line_end != end and is_horizontal_blank(*line_end)) - ++line_end; - - if (line_end != end and *line_end == '\n') - ++line_end; - - it = line_begin = line_end; - } - if (cat == CharCategories::Word or cat == CharCategories::Punctuation) - last_word_end = word_end; - - if (word_end > line_begin) - it = word_end; - } - if (line_begin != end) - lines.emplace_back(line_begin.base(), text.end()); - return lines; -} - -template -void format_impl(StringView fmt, ArrayView params, AppendFunc append) -{ - int implicitIndex = 0; - for (auto it = fmt.begin(), end = fmt.end(); it != end;) - { - auto opening = std::find(it, end, '{'); - if (opening == end) - { - append(StringView{it, opening}); - break; - } - else if (opening != it and *(opening-1) == '\\') - { - append(StringView{it, opening-1}); - append('{'); - it = opening + 1; - } - else - { - append(StringView{it, opening}); - auto closing = std::find(opening, end, '}'); - if (closing == end) - throw runtime_error("Format string error, unclosed '{'"); - - const int index = (closing == opening + 1) ? - implicitIndex : str_to_int({opening+1, closing}); - - if (index >= params.size()) - throw runtime_error("Format string parameter index too big"); - - append(params[index]); - implicitIndex = index+1; - it = closing+1; - } - } -} - -StringView format_to(ArrayView buffer, StringView fmt, ArrayView params) -{ - char* ptr = buffer.begin(); - const char* end = buffer.end(); - format_impl(fmt, params, [&](StringView s) mutable { - for (auto c : s) - { - if (ptr == end) - throw runtime_error("buffer is too small"); - *ptr++ = c; - } - }); - if (ptr == end) - throw runtime_error("buffer is too small"); - *ptr = 0; - - return { buffer.begin(), ptr }; -} - -String format(StringView fmt, ArrayView params) -{ - ByteCount size = fmt.length(); - for (auto& s : params) size += s.length(); - String res; - res.reserve(size); - - format_impl(fmt, params, [&](StringView s) { res += s; }); - return res; -} - -UnitTest test_string{[]() -{ - kak_assert(String("youpi ") + "matin" == "youpi matin"); - - Vector splited = split("youpi:matin::tchou\\:kanaky:hihi\\:", ':', '\\'); - kak_assert(splited[0] == "youpi"); - kak_assert(splited[1] == "matin"); - kak_assert(splited[2] == ""); - kak_assert(splited[3] == "tchou:kanaky"); - kak_assert(splited[4] == "hihi:"); - - Vector splitedview = split("youpi:matin::tchou\\:kanaky:hihi\\:", ':'); - kak_assert(splitedview[0] == "youpi"); - kak_assert(splitedview[1] == "matin"); - kak_assert(splitedview[2] == ""); - kak_assert(splitedview[3] == "tchou\\"); - kak_assert(splitedview[4] == "kanaky"); - kak_assert(splitedview[5] == "hihi\\"); - kak_assert(splitedview[6] == ""); - - Vector wrapped = wrap_lines("wrap this paragraph\n respecting whitespaces and much_too_long_words", 16); - kak_assert(wrapped.size() == 6); - kak_assert(wrapped[0] == "wrap this"); - kak_assert(wrapped[1] == "paragraph"); - kak_assert(wrapped[2] == " respecting"); - kak_assert(wrapped[3] == "whitespaces and"); - kak_assert(wrapped[4] == "much_too_long_wo"); - kak_assert(wrapped[5] == "rds"); - - Vector wrapped2 = wrap_lines("error: unknown type", 7); - kak_assert(wrapped2.size() == 3); - kak_assert(wrapped2[0] == "error:"); - kak_assert(wrapped2[1] == "unknown"); - kak_assert(wrapped2[2] == "type"); - - kak_assert(escape("youpi:matin:tchou:", ':', '\\') == "youpi\\:matin\\:tchou\\:"); - kak_assert(unescape("youpi\\:matin\\:tchou\\:", ':', '\\') == "youpi:matin:tchou:"); - - kak_assert(prefix_match("tchou kanaky", "tchou")); - kak_assert(prefix_match("tchou kanaky", "tchou kanaky")); - kak_assert(prefix_match("tchou kanaky", "t")); - kak_assert(not prefix_match("tchou kanaky", "c")); - - kak_assert(subsequence_match("tchou kanaky", "tknky")); - kak_assert(subsequence_match("tchou kanaky", "knk")); - kak_assert(subsequence_match("tchou kanaky", "tchou kanaky")); - kak_assert(not subsequence_match("tchou kanaky", "tchou kanaky")); - - kak_assert(format("Youhou {1} {} {0} \\{}", 10, "hehe", 5) == "Youhou hehe 5 10 {}"); - - char buffer[20]; - kak_assert(format_to(buffer, "Hey {}", 15) == "Hey 15"); - - kak_assert(str_to_int("5") == 5); - kak_assert(str_to_int(to_string(INT_MAX)) == INT_MAX); - kak_assert(str_to_int(to_string(INT_MIN)) == INT_MIN); - kak_assert(str_to_int("00") == 0); - kak_assert(str_to_int("-0") == 0); - - kak_assert(replace("tchou/tcha/tchi", "/", "!!") == "tchou!!tcha!!tchi"); -}}; - } diff --git a/src/string.hh b/src/string.hh index bb026869..a4d62804 100644 --- a/src/string.hh +++ b/src/string.hh @@ -1,14 +1,11 @@ #ifndef string_hh_INCLUDED #define string_hh_INCLUDED -#include "array_view.hh" +#include "memory.hh" #include "hash.hh" -#include "optional.hh" #include "units.hh" #include "utf8.hh" -#include "vector.hh" -#include #include namespace Kakoune @@ -331,108 +328,6 @@ inline StringView operator"" _sv(const char* str, size_t) return StringView{str}; } -Vector split(StringView str, char separator, char escape); -Vector split(StringView str, char separator); - -StringView trim_whitespaces(StringView str); - -String escape(StringView str, StringView characters, char escape); -String unescape(StringView str, StringView characters, char escape); - -String indent(StringView str, StringView indent = " "); - -String replace(StringView str, StringView substr, StringView replacement); - -template -String join(const Container& container, char joiner, bool esc_joiner = true) -{ - const char to_escape[2] = { joiner, '\\' }; - String res; - for (const auto& str : container) - { - if (not res.empty()) - res += joiner; - res += esc_joiner ? escape(str, {to_escape, 2}, '\\') : str; - } - return res; -} - -inline bool prefix_match(StringView str, StringView prefix) -{ - return str.substr(0_byte, prefix.length()) == prefix; -} - -bool subsequence_match(StringView str, StringView subseq); - -String expand_tabs(StringView line, ColumnCount tabstop, ColumnCount col = 0); - -Vector wrap_lines(StringView text, ColumnCount max_width); - -int str_to_int(StringView str); // throws on error -Optional str_to_int_ifp(StringView str); - -inline String option_to_string(StringView opt) { return opt.str(); } -inline void option_from_string(StringView str, String& opt) { opt = str.str(); } -inline bool option_add(String& opt, StringView val) { opt += val; return not val.empty(); } - -template -struct InplaceString -{ - static_assert(N < 256, "InplaceString cannot handle sizes >= 256"); - - constexpr operator StringView() const { return {m_data, ByteCount{m_length}}; } - operator String() const { return {m_data, ByteCount{m_length}}; } - - unsigned char m_length; - char m_data[N]; -}; - -struct Hex { size_t val; }; -constexpr Hex hex(size_t val) { return {val}; } - -InplaceString<15> to_string(int val); -InplaceString<23> to_string(long int val); -InplaceString<23> to_string(size_t val); -InplaceString<23> to_string(long long int val); -InplaceString<23> to_string(Hex val); -InplaceString<23> to_string(float val); -InplaceString<7> to_string(Codepoint c); - -template -decltype(auto) to_string(const StronglyTypedNumber& val) -{ - return to_string((ValueType)val); -} - -namespace detail -{ - -template constexpr bool is_string = std::is_convertible::value; - -template>> -decltype(auto) format_param(const T& val) { return to_string(val); } - -template>> -StringView format_param(const T& val) { return val; } - -} - -String format(StringView fmt, ArrayView params); - -template -String format(StringView fmt, Types&&... params) -{ - return format(fmt, ArrayView{detail::format_param(std::forward(params))...}); -} - -StringView format_to(ArrayView buffer, StringView fmt, ArrayView params); - -template -StringView format_to(ArrayView buffer, StringView fmt, Types&&... params) -{ - return format_to(buffer, fmt, ArrayView{detail::format_param(std::forward(params))...}); -} - } #endif // string_hh_INCLUDED diff --git a/src/string_utils.cc b/src/string_utils.cc new file mode 100644 index 00000000..06e0eb7c --- /dev/null +++ b/src/string_utils.cc @@ -0,0 +1,451 @@ +#include "string_utils.hh" + +#include "exception.hh" +#include "utf8_iterator.hh" +#include "unit_tests.hh" + +namespace Kakoune +{ + +Vector split(StringView str, char separator, char escape) +{ + Vector res; + auto it = str.begin(); + auto start = it; + while (it != str.end()) + { + res.emplace_back(); + String& element = res.back(); + while (it != str.end()) + { + auto c = *it; + if (c == escape and it + 1 != str.end() and *(it+1) == separator) + { + element += StringView{start, it+1}; + element.back() = separator; + it += 2; + start = it; + } + else if (c == separator) + { + element += StringView{start, it}; + ++it; + start = it; + break; + } + else + ++it; + } + } + if (start != str.end()) + res.back() += StringView{start, str.end()}; + return res; +} + +Vector split(StringView str, char separator) +{ + Vector res; + if (str.empty()) + return res; + + auto beg = str.begin(); + for (auto it = beg; it != str.end(); ++it) + { + if (*it == separator) + { + res.emplace_back(beg, it); + beg = it + 1; + } + } + res.emplace_back(beg, str.end()); + return res; +} + +StringView trim_whitespaces(StringView str) +{ + auto beg = str.begin(), end = str.end(); + while (beg != end and is_blank(*beg)) + ++beg; + while (beg != end and is_blank(*(end-1))) + --end; + return {beg, end}; +} + + +String escape(StringView str, StringView characters, char escape) +{ + String res; + res.reserve(str.length()); + auto cbeg = characters.begin(), cend = characters.end(); + for (auto it = str.begin(), end = str.end(); it != end; ) + { + auto next = std::find_first_of(it, end, cbeg, cend); + if (next != end) + { + res += StringView{it, next+1}; + res.back() = escape; + res += *next; + it = next+1; + } + else + { + res += StringView{it, next}; + break; + } + } + return res; +} + +String unescape(StringView str, StringView characters, char escape) +{ + String res; + res.reserve(str.length()); + for (auto it = str.begin(), end = str.end(); it != end; ) + { + auto next = std::find(it, end, escape); + if (next != end and next+1 != end and contains(characters, *(next+1))) + { + res += StringView{it, next+1}; + res.back() = *(next+1); + it = next + 2; + } + else + { + res += StringView{it, next == end ? next : next + 1}; + it = next == end ? next : next + 1; + } + } + return res; +} + +String indent(StringView str, StringView indent) +{ + String res; + res.reserve(str.length()); + bool was_eol = true; + for (ByteCount i = 0; i < str.length(); ++i) + { + if (was_eol) + res += indent; + res += str[i]; + was_eol = is_eol(str[i]); + } + return res; +} + +String replace(StringView str, StringView substr, StringView replacement) +{ + String res; + for (auto it = str.begin(); it != str.end(); ) + { + auto match = std::search(it, str.end(), substr.begin(), substr.end()); + res += StringView{it, match}; + if (match == str.end()) + break; + + res += replacement; + it = match + (int)substr.length(); + } + return res; +} + +Optional str_to_int_ifp(StringView str) +{ + bool negative = not str.empty() and str[0] == '-'; + if (negative) + str = str.substr(1_byte); + + unsigned int res = 0; + for (auto c : str) + { + if (c < '0' or c > '9') + return {}; + res = res * 10 + c - '0'; + } + return negative ? -res : res; +} + +int str_to_int(StringView str) +{ + if (auto val = str_to_int_ifp(str)) + return *val; + throw runtime_error{str + " is not a number"}; +} + +InplaceString<15> to_string(int val) +{ + InplaceString<15> res; + res.m_length = sprintf(res.m_data, "%i", val); + return res; +} + +InplaceString<23> to_string(long int val) +{ + InplaceString<23> res; + res.m_length = sprintf(res.m_data, "%li", val); + return res; +} + +InplaceString<23> to_string(long long int val) +{ + InplaceString<23> res; + res.m_length = sprintf(res.m_data, "%lli", val); + return res; +} + +InplaceString<23> to_string(size_t val) +{ + InplaceString<23> res; + res.m_length = sprintf(res.m_data, "%zu", val); + return res; +} + +InplaceString<23> to_string(Hex val) +{ + InplaceString<23> res; + res.m_length = sprintf(res.m_data, "%zx", val.val); + return res; +} + +InplaceString<23> to_string(float val) +{ + InplaceString<23> res; + res.m_length = sprintf(res.m_data, "%f", val); + return res; +} + +InplaceString<7> to_string(Codepoint c) +{ + InplaceString<7> res; + char* ptr = res.m_data; + utf8::dump(ptr, c); + res.m_length = (int)(ptr - res.m_data); + return res; +} + +bool subsequence_match(StringView str, StringView subseq) +{ + auto it = str.begin(); + for (auto& c : subseq) + { + if (it == str.end()) + return false; + while (*it != c) + { + if (++it == str.end()) + return false; + } + ++it; + } + return true; +} + +String expand_tabs(StringView line, ColumnCount tabstop, ColumnCount col) +{ + String res; + res.reserve(line.length()); + for (auto it = line.begin(), end = line.end(); it != end; ) + { + if (*it == '\t') + { + ColumnCount end_col = (col / tabstop + 1) * tabstop; + res += String{' ', end_col - col}; + col = end_col; + ++it; + } + else + { + auto char_beg = it; + auto cp = utf8::read_codepoint(it, end); + res += {char_beg, it}; + col += codepoint_width(cp); + } + } + return res; +} + +Vector wrap_lines(StringView text, ColumnCount max_width) +{ + if (max_width <= 0) + throw runtime_error("Invalid max width"); + + using Utf8It = utf8::iterator; + Utf8It it{text.begin(), text}; + Utf8It end{text.end(), text}; + Utf8It line_begin = it; + Utf8It last_word_end = it; + + Vector lines; + while (it != end) + { + const CharCategories cat = categorize(*it, {}); + if (cat == CharCategories::EndOfLine) + { + lines.emplace_back(line_begin.base(), it.base()); + line_begin = it = it+1; + continue; + } + + Utf8It word_end = it+1; + while (word_end != end and categorize(*word_end, {}) == cat) + ++word_end; + + while (word_end > line_begin and + utf8::column_distance(line_begin.base(), word_end.base()) >= max_width) + { + auto line_end = last_word_end <= line_begin ? + Utf8It{utf8::advance(line_begin.base(), text.end(), max_width), text} + : last_word_end; + + lines.emplace_back(line_begin.base(), line_end.base()); + + while (line_end != end and is_horizontal_blank(*line_end)) + ++line_end; + + if (line_end != end and *line_end == '\n') + ++line_end; + + it = line_begin = line_end; + } + if (cat == CharCategories::Word or cat == CharCategories::Punctuation) + last_word_end = word_end; + + if (word_end > line_begin) + it = word_end; + } + if (line_begin != end) + lines.emplace_back(line_begin.base(), text.end()); + return lines; +} + +template +void format_impl(StringView fmt, ArrayView params, AppendFunc append) +{ + int implicitIndex = 0; + for (auto it = fmt.begin(), end = fmt.end(); it != end;) + { + auto opening = std::find(it, end, '{'); + if (opening == end) + { + append(StringView{it, opening}); + break; + } + else if (opening != it and *(opening-1) == '\\') + { + append(StringView{it, opening-1}); + append('{'); + it = opening + 1; + } + else + { + append(StringView{it, opening}); + auto closing = std::find(opening, end, '}'); + if (closing == end) + throw runtime_error("Format string error, unclosed '{'"); + + const int index = (closing == opening + 1) ? + implicitIndex : str_to_int({opening+1, closing}); + + if (index >= params.size()) + throw runtime_error("Format string parameter index too big"); + + append(params[index]); + implicitIndex = index+1; + it = closing+1; + } + } +} + +StringView format_to(ArrayView buffer, StringView fmt, ArrayView params) +{ + char* ptr = buffer.begin(); + const char* end = buffer.end(); + format_impl(fmt, params, [&](StringView s) mutable { + for (auto c : s) + { + if (ptr == end) + throw runtime_error("buffer is too small"); + *ptr++ = c; + } + }); + if (ptr == end) + throw runtime_error("buffer is too small"); + *ptr = 0; + + return { buffer.begin(), ptr }; +} + +String format(StringView fmt, ArrayView params) +{ + ByteCount size = fmt.length(); + for (auto& s : params) size += s.length(); + String res; + res.reserve(size); + + format_impl(fmt, params, [&](StringView s) { res += s; }); + return res; +} + +UnitTest test_string{[]() +{ + kak_assert(String("youpi ") + "matin" == "youpi matin"); + + Vector splited = split("youpi:matin::tchou\\:kanaky:hihi\\:", ':', '\\'); + kak_assert(splited[0] == "youpi"); + kak_assert(splited[1] == "matin"); + kak_assert(splited[2] == ""); + kak_assert(splited[3] == "tchou:kanaky"); + kak_assert(splited[4] == "hihi:"); + + Vector splitedview = split("youpi:matin::tchou\\:kanaky:hihi\\:", ':'); + kak_assert(splitedview[0] == "youpi"); + kak_assert(splitedview[1] == "matin"); + kak_assert(splitedview[2] == ""); + kak_assert(splitedview[3] == "tchou\\"); + kak_assert(splitedview[4] == "kanaky"); + kak_assert(splitedview[5] == "hihi\\"); + kak_assert(splitedview[6] == ""); + + Vector wrapped = wrap_lines("wrap this paragraph\n respecting whitespaces and much_too_long_words", 16); + kak_assert(wrapped.size() == 6); + kak_assert(wrapped[0] == "wrap this"); + kak_assert(wrapped[1] == "paragraph"); + kak_assert(wrapped[2] == " respecting"); + kak_assert(wrapped[3] == "whitespaces and"); + kak_assert(wrapped[4] == "much_too_long_wo"); + kak_assert(wrapped[5] == "rds"); + + Vector wrapped2 = wrap_lines("error: unknown type", 7); + kak_assert(wrapped2.size() == 3); + kak_assert(wrapped2[0] == "error:"); + kak_assert(wrapped2[1] == "unknown"); + kak_assert(wrapped2[2] == "type"); + + kak_assert(escape("youpi:matin:tchou:", ':', '\\') == "youpi\\:matin\\:tchou\\:"); + kak_assert(unescape("youpi\\:matin\\:tchou\\:", ':', '\\') == "youpi:matin:tchou:"); + + kak_assert(prefix_match("tchou kanaky", "tchou")); + kak_assert(prefix_match("tchou kanaky", "tchou kanaky")); + kak_assert(prefix_match("tchou kanaky", "t")); + kak_assert(not prefix_match("tchou kanaky", "c")); + + kak_assert(subsequence_match("tchou kanaky", "tknky")); + kak_assert(subsequence_match("tchou kanaky", "knk")); + kak_assert(subsequence_match("tchou kanaky", "tchou kanaky")); + kak_assert(not subsequence_match("tchou kanaky", "tchou kanaky")); + + kak_assert(format("Youhou {1} {} {0} \\{}", 10, "hehe", 5) == "Youhou hehe 5 10 {}"); + + char buffer[20]; + kak_assert(format_to(buffer, "Hey {}", 15) == "Hey 15"); + + kak_assert(str_to_int("5") == 5); + kak_assert(str_to_int(to_string(INT_MAX)) == INT_MAX); + kak_assert(str_to_int(to_string(INT_MIN)) == INT_MIN); + kak_assert(str_to_int("00") == 0); + kak_assert(str_to_int("-0") == 0); + + kak_assert(replace("tchou/tcha/tchi", "/", "!!") == "tchou!!tcha!!tchi"); +}}; + +} diff --git a/src/string_utils.hh b/src/string_utils.hh new file mode 100644 index 00000000..7263c6f6 --- /dev/null +++ b/src/string_utils.hh @@ -0,0 +1,115 @@ +#ifndef string_utils_hh_INCLUDED +#define string_utils_hh_INCLUDED + +#include "string.hh" +#include "vector.hh" +#include "optional.hh" + +namespace Kakoune +{ + +Vector split(StringView str, char separator, char escape); +Vector split(StringView str, char separator); + +StringView trim_whitespaces(StringView str); + +String escape(StringView str, StringView characters, char escape); +String unescape(StringView str, StringView characters, char escape); + +String indent(StringView str, StringView indent = " "); + +String replace(StringView str, StringView substr, StringView replacement); + +template +String join(const Container& container, char joiner, bool esc_joiner = true) +{ + const char to_escape[2] = { joiner, '\\' }; + String res; + for (const auto& str : container) + { + if (not res.empty()) + res += joiner; + res += esc_joiner ? escape(str, {to_escape, 2}, '\\') : str; + } + return res; +} + +inline bool prefix_match(StringView str, StringView prefix) +{ + return str.substr(0_byte, prefix.length()) == prefix; +} + +bool subsequence_match(StringView str, StringView subseq); + +String expand_tabs(StringView line, ColumnCount tabstop, ColumnCount col = 0); + +Vector wrap_lines(StringView text, ColumnCount max_width); + +int str_to_int(StringView str); // throws on error +Optional str_to_int_ifp(StringView str); + +inline String option_to_string(StringView opt) { return opt.str(); } +inline void option_from_string(StringView str, String& opt) { opt = str.str(); } +inline bool option_add(String& opt, StringView val) { opt += val; return not val.empty(); } + +template +struct InplaceString +{ + static_assert(N < 256, "InplaceString cannot handle sizes >= 256"); + + constexpr operator StringView() const { return {m_data, ByteCount{m_length}}; } + operator String() const { return {m_data, ByteCount{m_length}}; } + + unsigned char m_length; + char m_data[N]; +}; + +struct Hex { size_t val; }; +constexpr Hex hex(size_t val) { return {val}; } + +InplaceString<15> to_string(int val); +InplaceString<23> to_string(long int val); +InplaceString<23> to_string(size_t val); +InplaceString<23> to_string(long long int val); +InplaceString<23> to_string(Hex val); +InplaceString<23> to_string(float val); +InplaceString<7> to_string(Codepoint c); + +template +decltype(auto) to_string(const StronglyTypedNumber& val) +{ + return to_string((ValueType)val); +} + +namespace detail +{ + +template constexpr bool is_string = std::is_convertible::value; + +template>> +decltype(auto) format_param(const T& val) { return to_string(val); } + +template>> +StringView format_param(const T& val) { return val; } + +} + +String format(StringView fmt, ArrayView params); + +template +String format(StringView fmt, Types&&... params) +{ + return format(fmt, ArrayView{detail::format_param(std::forward(params))...}); +} + +StringView format_to(ArrayView buffer, StringView fmt, ArrayView params); + +template +StringView format_to(ArrayView buffer, StringView fmt, Types&&... params) +{ + return format_to(buffer, fmt, ArrayView{detail::format_param(std::forward(params))...}); +} + +} + +#endif // string_utils_hh_INCLUDED