#include "string.hh" #include "exception.hh" #include "utils.hh" #include "utf8_iterator.hh" namespace Kakoune { bool operator<(StringView lhs, StringView rhs) { int cmp = strncmp(lhs.data(), rhs.data(), (int)std::min(lhs.length(), rhs.length())); if (cmp == 0) return lhs.length() < rhs.length(); return cmp < 0; } std::vector split(StringView str, char separator, char escape) { std::vector res; auto it = str.begin(); while (it != str.end()) { res.emplace_back(); String& element = res.back(); while (it != str.end()) { auto c = *it; if (c == escape and it + 1 != str.end() and *(it+1) == separator) { element += separator; it += 2; } else if (c == separator) { ++it; break; } else { element += c; ++it; } } } return res; } std::vector split(StringView str, char separator) { std::vector res; auto beg = str.begin(); for (auto it = beg; it != str.end(); ++it) { if (*it == separator) { res.emplace_back(beg, it); beg = it + 1; } } res.emplace_back(beg, str.end()); return res; } String escape(StringView str, StringView characters, char escape) { String res; for (auto& c : str) { if (contains(characters, c)) res += escape; res += c; } return res; } String unescape(StringView str, StringView characters, char escape) { String res; for (auto& c : str) { if (contains(characters, c) and not res.empty() and res.back() == escape) res.back() = c; else res += c; } return res; } int str_to_int(StringView str) { int res = 0; if (sscanf(str.zstr(), "%i", &res) != 1) throw runtime_error(str + "is not a number"); return res; } String to_string(int val) { char buf[16]; sprintf(buf, "%i", val); return buf; } bool prefix_match(StringView str, StringView prefix) { auto it = str.begin(); for (auto& c : prefix) { if (it ==str.end() or *it++ != c) return false; } return true; } bool subsequence_match(StringView str, StringView subseq) { auto it = str.begin(); for (auto& c : subseq) { if (it == str.end()) return false; while (*it != c) { if (++it == str.end()) return false; } ++it; } return true; } String expand_tabs(StringView line, CharCount tabstop, CharCount col) { String res; using Utf8It = utf8::iterator; for (Utf8It it = line.begin(); it.base() < line.end(); ++it) { if (*it == '\t') { CharCount end_col = (col / tabstop + 1) * tabstop; res += String{' ', end_col - col}; } else res += *it; } return res; } std::vector wrap_lines(StringView text, CharCount max_width) { using Utf8It = utf8::iterator; Utf8It word_begin{text.begin()}; Utf8It word_end{word_begin}; Utf8It end{text.end()}; CharCount col = 0; std::vector lines; const char* line_begin = text.begin(); const char* line_end = line_begin; while (word_begin != end) { const CharCategories cat = categorize(*word_begin); do { ++word_end; } while (word_end != end and categorize(*word_end) == cat); col += word_end - word_begin; if (col > max_width or cat == CharCategories::EndOfLine) { lines.emplace_back(line_begin, line_end); line_begin = (cat == CharCategories::EndOfLine or cat == CharCategories::Blank) ? word_end.base() : word_begin.base(); col = word_end - Utf8It{line_begin}; } if (cat == CharCategories::Word or cat == CharCategories::Punctuation) line_end = word_end.base(); word_begin = word_end; } if (line_begin != word_begin.base()) lines.emplace_back(line_begin, word_begin.base()); return lines; } [[gnu::always_inline]] static inline uint32_t rotl(uint32_t x, int8_t r) { return (x << r) | (x >> (32 - r)); } [[gnu::always_inline]] static inline uint32_t fmix(uint32_t h) { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; return h; } // murmur3 hash, based on https://github.com/PeterScott/murmur3 size_t hash_data(const char* input, size_t len) { const uint8_t* data = reinterpret_cast(input); uint32_t hash = 0x1235678; constexpr uint32_t c1 = 0xcc9e2d51; constexpr uint32_t c2 = 0x1b873593; const int nblocks = len / 4; const uint32_t* blocks = reinterpret_cast(data + nblocks*4); for (int i = -nblocks; i; ++i) { uint32_t key = blocks[i]; key *= c1; key = rotl(key, 15); key *= c2; hash ^= key; hash = rotl(hash, 13); hash = hash * 5 + 0xe6546b64; } const uint8_t* tail = data + nblocks * 4; uint32_t key = 0; switch (len & 3) { case 3: key ^= tail[2] << 16; case 2: key ^= tail[1] << 8; case 1: key ^= tail[0]; key *= c1; key = rotl(key,15); key *= c2; hash ^= key; } hash ^= len; hash = fmix(hash); return hash; } }