Support different type for iterators and sentinel in utf8 functions
This commit is contained in:
parent
9fec1b3faf
commit
4cfb46ff2e
|
@ -163,7 +163,7 @@ private:
|
||||||
};
|
};
|
||||||
friend constexpr bool with_bit_ops(Meta::Type<Flags>) { return true; }
|
friend constexpr bool with_bit_ops(Meta::Type<Flags>) { return true; }
|
||||||
|
|
||||||
using Iterator = utf8::iterator<const char*, Codepoint, int, InvalidPolicy>;
|
using Iterator = utf8::iterator<const char*, const char*, Codepoint, int, InvalidPolicy>;
|
||||||
using NodeIndex = ParsedRegex::NodeIndex;
|
using NodeIndex = ParsedRegex::NodeIndex;
|
||||||
|
|
||||||
NodeIndex disjunction(unsigned capture = -1)
|
NodeIndex disjunction(unsigned capture = -1)
|
||||||
|
|
48
src/utf8.hh
48
src/utf8.hh
|
@ -43,8 +43,8 @@ struct Pass
|
||||||
// returns the codepoint of the character whose first byte
|
// returns the codepoint of the character whose first byte
|
||||||
// is pointed by it
|
// is pointed by it
|
||||||
template<typename InvalidPolicy = utf8::InvalidPolicy::Pass,
|
template<typename InvalidPolicy = utf8::InvalidPolicy::Pass,
|
||||||
typename Iterator>
|
typename Iterator, typename Sentinel>
|
||||||
Codepoint read_codepoint(Iterator& it, const Iterator& end)
|
Codepoint read_codepoint(Iterator& it, const Sentinel& end)
|
||||||
noexcept(noexcept(InvalidPolicy{}(0)))
|
noexcept(noexcept(InvalidPolicy{}(0)))
|
||||||
{
|
{
|
||||||
if (it == end)
|
if (it == end)
|
||||||
|
@ -83,8 +83,8 @@ Codepoint read_codepoint(Iterator& it, const Iterator& end)
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename InvalidPolicy = utf8::InvalidPolicy::Pass,
|
template<typename InvalidPolicy = utf8::InvalidPolicy::Pass,
|
||||||
typename Iterator>
|
typename Iterator, typename Sentinel>
|
||||||
Codepoint codepoint(Iterator it, const Iterator& end)
|
Codepoint codepoint(Iterator it, const Sentinel& end)
|
||||||
noexcept(noexcept(read_codepoint<InvalidPolicy>(it, end)))
|
noexcept(noexcept(read_codepoint<InvalidPolicy>(it, end)))
|
||||||
{
|
{
|
||||||
return read_codepoint<InvalidPolicy>(it, end);
|
return read_codepoint<InvalidPolicy>(it, end);
|
||||||
|
@ -125,8 +125,8 @@ inline ByteCount codepoint_size(Codepoint cp)
|
||||||
throw invalid_codepoint{};
|
throw invalid_codepoint{};
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Sentinel>
|
||||||
void to_next(Iterator& it, const Iterator& end) noexcept
|
void to_next(Iterator& it, const Sentinel& end) noexcept
|
||||||
{
|
{
|
||||||
if (it != end)
|
if (it != end)
|
||||||
++it;
|
++it;
|
||||||
|
@ -135,8 +135,8 @@ void to_next(Iterator& it, const Iterator& end) noexcept
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns an iterator to next character first byte
|
// returns an iterator to next character first byte
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Sentinel>
|
||||||
Iterator next(Iterator it, const Iterator& end) noexcept
|
Iterator next(Iterator it, const Sentinel& end) noexcept
|
||||||
{
|
{
|
||||||
to_next(it, end);
|
to_next(it, end);
|
||||||
return it;
|
return it;
|
||||||
|
@ -144,16 +144,16 @@ Iterator next(Iterator it, const Iterator& end) noexcept
|
||||||
|
|
||||||
// returns it's parameter if it points to a character first byte,
|
// returns it's parameter if it points to a character first byte,
|
||||||
// or else returns next character first byte
|
// or else returns next character first byte
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Sentinel>
|
||||||
Iterator finish(Iterator it, const Iterator& end) noexcept
|
Iterator finish(Iterator it, const Sentinel& end) noexcept
|
||||||
{
|
{
|
||||||
while (it != end and (*(it) & 0xC0) == 0x80)
|
while (it != end and (*(it) & 0xC0) == 0x80)
|
||||||
++it;
|
++it;
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Sentinel>
|
||||||
void to_previous(Iterator& it, const Iterator& begin) noexcept
|
void to_previous(Iterator& it, const Sentinel& begin) noexcept
|
||||||
{
|
{
|
||||||
if (it != begin)
|
if (it != begin)
|
||||||
--it;
|
--it;
|
||||||
|
@ -161,8 +161,8 @@ void to_previous(Iterator& it, const Iterator& begin) noexcept
|
||||||
--it;
|
--it;
|
||||||
}
|
}
|
||||||
// returns an iterator to the previous character first byte
|
// returns an iterator to the previous character first byte
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Sentinel>
|
||||||
Iterator previous(Iterator it, const Iterator& begin) noexcept
|
Iterator previous(Iterator it, const Sentinel& begin) noexcept
|
||||||
{
|
{
|
||||||
to_previous(it, begin);
|
to_previous(it, begin);
|
||||||
return it;
|
return it;
|
||||||
|
@ -171,8 +171,8 @@ Iterator previous(Iterator it, const Iterator& begin) noexcept
|
||||||
// returns an iterator pointing to the first byte of the
|
// returns an iterator pointing to the first byte of the
|
||||||
// dth character after (or before if d < 0) the character
|
// dth character after (or before if d < 0) the character
|
||||||
// pointed by it
|
// pointed by it
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Sentinel>
|
||||||
Iterator advance(Iterator it, const Iterator& end, CharCount d) noexcept
|
Iterator advance(Iterator it, const Sentinel& end, CharCount d) noexcept
|
||||||
{
|
{
|
||||||
if (it == end)
|
if (it == end)
|
||||||
return it;
|
return it;
|
||||||
|
@ -193,8 +193,8 @@ Iterator advance(Iterator it, const Iterator& end, CharCount d) noexcept
|
||||||
// returns an iterator pointing to the first byte of the
|
// returns an iterator pointing to the first byte of the
|
||||||
// character at the dth column after (or before if d < 0)
|
// character at the dth column after (or before if d < 0)
|
||||||
// the character pointed by it
|
// the character pointed by it
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Sentinel>
|
||||||
Iterator advance(Iterator it, const Iterator& end, ColumnCount d) noexcept
|
Iterator advance(Iterator it, const Sentinel& end, ColumnCount d) noexcept
|
||||||
{
|
{
|
||||||
if (it == end)
|
if (it == end)
|
||||||
return it;
|
return it;
|
||||||
|
@ -222,8 +222,8 @@ Iterator advance(Iterator it, const Iterator& end, ColumnCount d) noexcept
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns the character count between begin and end
|
// returns the character count between begin and end
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Sentinel>
|
||||||
CharCount distance(Iterator begin, const Iterator& end) noexcept
|
CharCount distance(Iterator begin, const Sentinel& end) noexcept
|
||||||
{
|
{
|
||||||
CharCount dist = 0;
|
CharCount dist = 0;
|
||||||
|
|
||||||
|
@ -236,8 +236,8 @@ CharCount distance(Iterator begin, const Iterator& end) noexcept
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns the column count between begin and end
|
// returns the column count between begin and end
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Sentinel>
|
||||||
ColumnCount column_distance(Iterator begin, const Iterator& end) noexcept
|
ColumnCount column_distance(Iterator begin, const Sentinel& end) noexcept
|
||||||
{
|
{
|
||||||
ColumnCount dist = 0;
|
ColumnCount dist = 0;
|
||||||
|
|
||||||
|
@ -247,8 +247,8 @@ ColumnCount column_distance(Iterator begin, const Iterator& end) noexcept
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns an iterator to the first byte of the character it is into
|
// returns an iterator to the first byte of the character it is into
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Sentinel>
|
||||||
Iterator character_start(Iterator it, const Iterator& begin) noexcept
|
Iterator character_start(Iterator it, const Sentinel& begin) noexcept
|
||||||
{
|
{
|
||||||
while (it != begin and not is_character_start(*it))
|
while (it != begin and not is_character_start(*it))
|
||||||
--it;
|
--it;
|
||||||
|
|
|
@ -14,6 +14,7 @@ namespace utf8
|
||||||
// adapter for an iterator on bytes which permits to iterate
|
// adapter for an iterator on bytes which permits to iterate
|
||||||
// on unicode codepoints instead.
|
// on unicode codepoints instead.
|
||||||
template<typename BaseIt,
|
template<typename BaseIt,
|
||||||
|
typename Sentinel = BaseIt,
|
||||||
typename CodepointType = Codepoint,
|
typename CodepointType = Codepoint,
|
||||||
typename DifferenceType = CharCount,
|
typename DifferenceType = CharCount,
|
||||||
typename InvalidPolicy = utf8::InvalidPolicy::Pass>
|
typename InvalidPolicy = utf8::InvalidPolicy::Pass>
|
||||||
|
@ -25,7 +26,7 @@ public:
|
||||||
iterator() = default;
|
iterator() = default;
|
||||||
constexpr static bool noexcept_policy = noexcept(InvalidPolicy{}(0));
|
constexpr static bool noexcept_policy = noexcept(InvalidPolicy{}(0));
|
||||||
|
|
||||||
iterator(BaseIt it, BaseIt begin, BaseIt end) noexcept
|
iterator(BaseIt it, Sentinel begin, Sentinel end) noexcept
|
||||||
: m_it{std::move(it)}, m_begin{std::move(begin)}, m_end{std::move(end)}
|
: m_it{std::move(it)}, m_begin{std::move(begin)}, m_end{std::move(end)}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
@ -105,8 +106,13 @@ public:
|
||||||
bool operator> (const iterator& other) const noexcept { return m_it > other.m_it; }
|
bool operator> (const iterator& other) const noexcept { return m_it > other.m_it; }
|
||||||
bool operator>= (const iterator& other) const noexcept { return m_it >= other.m_it; }
|
bool operator>= (const iterator& other) const noexcept { return m_it >= other.m_it; }
|
||||||
|
|
||||||
bool operator==(const BaseIt& other) const noexcept { return m_it == other; }
|
template<typename T>
|
||||||
bool operator!=(const BaseIt& other) const noexcept { return m_it != other; }
|
std::enable_if_t<std::is_same<T, BaseIt>::value or std::is_same<T, Sentinel>::value, bool>
|
||||||
|
operator==(const T& other) const noexcept { return m_it == other; }
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
std::enable_if_t<std::is_same<T, BaseIt>::value or std::is_same<T, Sentinel>::value, bool>
|
||||||
|
operator!=(const T& other) const noexcept { return m_it != other; }
|
||||||
|
|
||||||
bool operator< (const BaseIt& other) const noexcept { return m_it < other; }
|
bool operator< (const BaseIt& other) const noexcept { return m_it < other; }
|
||||||
bool operator<= (const BaseIt& other) const noexcept { return m_it <= other; }
|
bool operator<= (const BaseIt& other) const noexcept { return m_it <= other; }
|
||||||
|
@ -136,8 +142,8 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
BaseIt m_it;
|
BaseIt m_it;
|
||||||
BaseIt m_begin;
|
Sentinel m_begin;
|
||||||
BaseIt m_end;
|
Sentinel m_end;
|
||||||
mutable CodepointType m_value = -1;
|
mutable CodepointType m_value = -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user