2012-10-09 19:15:05 +02:00
|
|
|
#ifndef unicode_hh_INCLUDED
|
|
|
|
#define unicode_hh_INCLUDED
|
|
|
|
|
2017-01-08 23:30:15 +01:00
|
|
|
#include <cwctype>
|
|
|
|
#include <cwchar>
|
2012-10-09 19:15:05 +02:00
|
|
|
|
2017-06-26 16:28:41 +02:00
|
|
|
#include "array_view.hh"
|
2017-08-29 10:23:03 +02:00
|
|
|
#include "ranges.hh"
|
|
|
|
#include "units.hh"
|
2016-11-29 00:53:50 +01:00
|
|
|
|
2012-10-09 19:15:05 +02:00
|
|
|
namespace Kakoune
|
|
|
|
{
|
|
|
|
|
2015-04-29 14:51:15 +02:00
|
|
|
using Codepoint = char32_t;
|
2012-10-09 19:15:05 +02:00
|
|
|
|
2017-04-23 13:47:26 +02:00
|
|
|
inline bool is_eol(Codepoint c) noexcept
|
2012-10-09 19:15:05 +02:00
|
|
|
{
|
|
|
|
return c == '\n';
|
|
|
|
}
|
|
|
|
|
2017-04-23 13:47:26 +02:00
|
|
|
inline bool is_horizontal_blank(Codepoint c) noexcept
|
2013-11-17 23:54:26 +01:00
|
|
|
{
|
|
|
|
return c == ' ' or c == '\t';
|
|
|
|
}
|
|
|
|
|
2017-04-23 13:47:26 +02:00
|
|
|
inline bool is_blank(Codepoint c) noexcept
|
2015-07-02 00:47:22 +02:00
|
|
|
{
|
|
|
|
return c == ' ' or c == '\t' or c == '\n';
|
|
|
|
}
|
|
|
|
|
2013-12-14 15:49:10 +01:00
|
|
|
enum WordType { Word, WORD };
|
|
|
|
|
|
|
|
template<WordType word_type = Word>
|
2018-11-27 08:13:29 +01:00
|
|
|
inline bool is_word(Codepoint c, ConstArrayView<Codepoint> extra_word_chars = {'_'}) noexcept
|
2013-12-14 15:49:10 +01:00
|
|
|
{
|
2018-11-27 08:13:29 +01:00
|
|
|
return iswalnum((wchar_t)c) or contains(extra_word_chars, c);
|
2013-12-14 15:49:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template<>
|
2017-06-26 16:28:41 +02:00
|
|
|
inline bool is_word<WORD>(Codepoint c, ConstArrayView<Codepoint>) noexcept
|
2013-12-14 15:49:10 +01:00
|
|
|
{
|
2017-02-23 01:56:40 +01:00
|
|
|
return not is_blank(c);
|
2013-12-14 15:49:10 +01:00
|
|
|
}
|
|
|
|
|
2018-11-27 08:13:29 +01:00
|
|
|
inline bool is_punctuation(Codepoint c, ConstArrayView<Codepoint> extra_word_chars = {'_'}) noexcept
|
2013-12-14 15:49:10 +01:00
|
|
|
{
|
2018-11-27 08:13:29 +01:00
|
|
|
return not (is_word(c, extra_word_chars) or is_blank(c));
|
2013-12-14 15:49:10 +01:00
|
|
|
}
|
|
|
|
|
2017-04-23 13:47:26 +02:00
|
|
|
inline bool is_basic_alpha(Codepoint c) noexcept
|
2015-11-15 14:24:39 +01:00
|
|
|
{
|
|
|
|
return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z');
|
|
|
|
}
|
|
|
|
|
2018-03-25 07:35:33 +02:00
|
|
|
inline bool is_basic_digit(Codepoint c) noexcept
|
|
|
|
{
|
|
|
|
return c >= '0' and c <= '9';
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool is_identifier(Codepoint c) noexcept
|
|
|
|
{
|
|
|
|
return is_basic_alpha(c) or is_basic_digit(c) or
|
|
|
|
c == '_' or c == '-';
|
|
|
|
}
|
|
|
|
|
2017-04-23 13:47:26 +02:00
|
|
|
inline ColumnCount codepoint_width(Codepoint c) noexcept
|
2016-09-22 21:36:26 +02:00
|
|
|
{
|
2017-07-07 03:57:32 +02:00
|
|
|
if (c == '\n')
|
|
|
|
return 1;
|
|
|
|
const auto width = wcwidth((wchar_t)c);
|
2017-10-12 08:38:19 +02:00
|
|
|
return width >= 0 ? width : 1;
|
2016-09-22 21:36:26 +02:00
|
|
|
}
|
|
|
|
|
2013-12-14 15:49:10 +01:00
|
|
|
enum class CharCategories
|
|
|
|
{
|
|
|
|
Blank,
|
|
|
|
EndOfLine,
|
|
|
|
Word,
|
|
|
|
Punctuation,
|
|
|
|
};
|
|
|
|
|
|
|
|
template<WordType word_type = Word>
|
2017-06-26 16:28:41 +02:00
|
|
|
inline CharCategories categorize(Codepoint c, ConstArrayView<Codepoint> extra_word_chars) noexcept
|
2013-12-14 15:49:10 +01:00
|
|
|
{
|
|
|
|
if (is_eol(c))
|
|
|
|
return CharCategories::EndOfLine;
|
2015-04-15 01:34:00 +02:00
|
|
|
if (is_horizontal_blank(c))
|
2013-12-14 15:49:10 +01:00
|
|
|
return CharCategories::Blank;
|
2017-06-26 16:28:41 +02:00
|
|
|
if (word_type == WORD or is_word(c, extra_word_chars))
|
2016-04-03 19:25:48 +02:00
|
|
|
return CharCategories::Word;
|
|
|
|
return CharCategories::Punctuation;
|
2013-12-14 15:49:10 +01:00
|
|
|
}
|
|
|
|
|
2017-04-23 13:47:26 +02:00
|
|
|
inline Codepoint to_lower(Codepoint cp) noexcept { return towlower((wchar_t)cp); }
|
|
|
|
inline Codepoint to_upper(Codepoint cp) noexcept { return towupper((wchar_t)cp); }
|
2015-11-11 01:21:20 +01:00
|
|
|
|
2017-10-06 13:51:09 +02:00
|
|
|
inline bool is_lower(Codepoint cp) noexcept { return iswlower((wchar_t)cp); }
|
|
|
|
inline bool is_upper(Codepoint cp) noexcept { return iswupper((wchar_t)cp); }
|
|
|
|
|
2017-04-23 13:47:26 +02:00
|
|
|
inline char to_lower(char c) noexcept { return c >= 'A' and c <= 'Z' ? c - 'A' + 'a' : c; }
|
|
|
|
inline char to_upper(char c) noexcept { return c >= 'a' and c <= 'z' ? c - 'a' + 'A' : c; }
|
2015-11-11 01:21:20 +01:00
|
|
|
|
2017-10-06 13:51:09 +02:00
|
|
|
inline bool is_lower(char c) noexcept { return c >= 'a' and c <= 'z'; }
|
|
|
|
inline bool is_upper(char c) noexcept { return c >= 'A' and c <= 'Z'; }
|
|
|
|
|
2012-10-09 19:15:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif // unicode_hh_INCLUDED
|