2012-10-09 19:15:05 +02:00
|
|
|
#ifndef unicode_hh_INCLUDED
|
|
|
|
#define unicode_hh_INCLUDED
|
|
|
|
|
|
|
|
#include <cstdint>
|
2013-07-15 14:49:50 +02:00
|
|
|
#include <ctype.h>
|
2012-10-09 19:15:05 +02:00
|
|
|
|
|
|
|
namespace Kakoune
|
|
|
|
{
|
|
|
|
|
|
|
|
using Codepoint = uint32_t;
|
|
|
|
|
|
|
|
inline bool is_eol(Codepoint c)
|
|
|
|
{
|
|
|
|
return c == '\n';
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool is_blank(Codepoint c)
|
|
|
|
{
|
|
|
|
return c == ' ' or c == '\t';
|
|
|
|
}
|
|
|
|
|
2013-11-17 23:54:26 +01:00
|
|
|
inline bool is_horizontal_blank(Codepoint c)
|
|
|
|
{
|
|
|
|
return c == ' ' or c == '\t';
|
|
|
|
}
|
|
|
|
|
2013-12-14 15:49:10 +01:00
|
|
|
enum WordType { Word, WORD };
|
|
|
|
|
|
|
|
template<WordType word_type = Word>
|
|
|
|
inline bool is_word(Codepoint c)
|
|
|
|
{
|
|
|
|
return c == '_' or isalnum(c);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline bool is_word<WORD>(Codepoint c)
|
|
|
|
{
|
|
|
|
return !is_blank(c) and !is_eol(c);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool is_punctuation(Codepoint c)
|
|
|
|
{
|
|
|
|
return not (is_word(c) or is_blank(c) or is_eol(c));
|
|
|
|
}
|
|
|
|
|
|
|
|
enum class CharCategories
|
|
|
|
{
|
|
|
|
Blank,
|
|
|
|
EndOfLine,
|
|
|
|
Word,
|
|
|
|
Punctuation,
|
|
|
|
};
|
|
|
|
|
|
|
|
template<WordType word_type = Word>
|
|
|
|
inline CharCategories categorize(Codepoint c)
|
|
|
|
{
|
|
|
|
if (is_word(c))
|
|
|
|
return CharCategories::Word;
|
|
|
|
if (is_eol(c))
|
|
|
|
return CharCategories::EndOfLine;
|
|
|
|
if (is_blank(c))
|
|
|
|
return CharCategories::Blank;
|
|
|
|
return word_type == WORD ? CharCategories::Word
|
|
|
|
: CharCategories::Punctuation;
|
|
|
|
}
|
|
|
|
|
2012-10-09 19:15:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif // unicode_hh_INCLUDED
|