Regex: Introduce RegexExecFlags::PrevAvailable
Rework assertion code as well.
This commit is contained in:
parent
73e177ec59
commit
9ec376135b
|
@ -31,7 +31,6 @@ void option_from_string(StringView str, Regex& re)
|
|||
re = Regex{str};
|
||||
}
|
||||
|
||||
|
||||
void regex_mismatch(const Regex& re)
|
||||
{
|
||||
write_to_debug_buffer(format("regex mismatch for '{}'", re.str()));
|
||||
|
|
|
@ -133,6 +133,8 @@ inline RegexExecFlags convert_flags(RegexConstant::match_flag_type flags)
|
|||
res |= RegexExecFlags::NotInitialNull;
|
||||
if (flags & RegexConstant::match_any)
|
||||
res |= RegexExecFlags::AnyMatch;
|
||||
if (flags & RegexConstant::match_prev_avail)
|
||||
res |= RegexExecFlags::PrevAvailable;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -261,7 +263,7 @@ private:
|
|||
if (m_results.size() and m_results[0].first == m_results[0].second)
|
||||
additional_flags |= RegexConstant::match_not_initial_null;
|
||||
if (m_begin != m_next_begin)
|
||||
additional_flags |= RegexConstant::match_not_bob;
|
||||
additional_flags |= RegexConstant::match_not_bob | RegexConstant::match_prev_avail;
|
||||
|
||||
if (not regex_search(m_next_begin, m_end, m_results, *m_regex,
|
||||
m_flags | additional_flags))
|
||||
|
|
|
@ -1041,6 +1041,11 @@ auto test_regex = UnitTest{[]{
|
|||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f");
|
||||
}
|
||||
|
||||
{
|
||||
TestVM<> vm{R"((?<!f).)"};
|
||||
kak_assert(vm.exec("f"));
|
||||
}
|
||||
|
||||
{
|
||||
TestVM<> vm{R"((?!foo)...)"};
|
||||
kak_assert(not vm.exec("foo"));
|
||||
|
@ -1110,6 +1115,12 @@ auto test_regex = UnitTest{[]{
|
|||
TestVM<> vm{R"(()*)"};
|
||||
kak_assert(not vm.exec(" "));
|
||||
}
|
||||
|
||||
{
|
||||
TestVM<> vm{R"(\b(?<!-)(a|b|)(?!-)\b)"};
|
||||
kak_assert(vm.exec("# foo bar", RegexExecFlags::Search));
|
||||
kak_assert(*vm.captures()[0] == '#');
|
||||
}
|
||||
}};
|
||||
|
||||
}
|
||||
|
|
|
@ -80,6 +80,7 @@ enum class RegexExecFlags
|
|||
NotInitialNull = 1 << 6,
|
||||
AnyMatch = 1 << 7,
|
||||
NoSaves = 1 << 8,
|
||||
PrevAvailable = 1 << 9,
|
||||
};
|
||||
|
||||
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
|
||||
|
@ -125,8 +126,11 @@ public:
|
|||
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
|
||||
{
|
||||
const bool forward = direction == MatchDirection::Forward;
|
||||
m_begin = Utf8It{utf8::iterator<Iterator>{forward ? begin : end, begin, end}};
|
||||
m_end = Utf8It{utf8::iterator<Iterator>{forward ? end : begin, begin, end}};
|
||||
const bool prev_avail = flags & RegexExecFlags::PrevAvailable;
|
||||
m_begin = Utf8It{utf8::iterator<Iterator>{forward ? begin : end,
|
||||
prev_avail ? begin-1 : begin, end}};
|
||||
m_end = Utf8It{utf8::iterator<Iterator>{forward ? end : begin,
|
||||
prev_avail ? begin-1 : begin, end}};
|
||||
m_flags = flags;
|
||||
|
||||
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
|
||||
|
@ -314,8 +318,8 @@ private:
|
|||
case CompiledRegex::NegativeLookBehind:
|
||||
{
|
||||
auto ref = m_program.lookarounds.begin() + inst.param;
|
||||
for (auto it = pos-1; *ref != -1 and it >= m_begin; --it, ++ref)
|
||||
if (*it != *ref)
|
||||
for (auto it = pos; *ref != -1 and it > m_begin; --it, ++ref)
|
||||
if (*(it-1) != *ref)
|
||||
break;
|
||||
if ((inst.op == CompiledRegex::LookBehind and *ref != -1) or
|
||||
(inst.op == CompiledRegex::NegativeLookBehind and *ref == -1))
|
||||
|
@ -400,21 +404,25 @@ private:
|
|||
|
||||
bool is_line_start(const Utf8It& pos) const
|
||||
{
|
||||
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or
|
||||
*(pos-1) == '\n';
|
||||
if (not (m_flags & RegexExecFlags::PrevAvailable) and pos == m_begin)
|
||||
return not (m_flags & RegexExecFlags::NotBeginOfLine);
|
||||
return *(pos-1) == '\n';
|
||||
}
|
||||
|
||||
bool is_line_end(const Utf8It& pos) const
|
||||
{
|
||||
return (pos == m_end and not (m_flags & RegexExecFlags::NotEndOfLine)) or
|
||||
*pos == '\n';
|
||||
if (pos == m_end)
|
||||
return not (m_flags & RegexExecFlags::NotEndOfLine);
|
||||
return *pos == '\n';
|
||||
}
|
||||
|
||||
bool is_word_boundary(const Utf8It& pos) const
|
||||
{
|
||||
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfWord)) or
|
||||
(pos == m_end and not (m_flags & RegexExecFlags::NotEndOfWord)) or
|
||||
is_word(*(pos-1)) != is_word(*pos);
|
||||
if (not (m_flags & RegexExecFlags::PrevAvailable) and pos == m_begin)
|
||||
return not (m_flags & RegexExecFlags::NotBeginOfWord);
|
||||
if (pos == m_end)
|
||||
return not (m_flags & RegexExecFlags::NotEndOfWord);
|
||||
return is_word(*(pos-1)) != is_word(*pos);
|
||||
}
|
||||
|
||||
static const Iterator& get_base(const utf8::iterator<Iterator>& it) { return it.base(); }
|
||||
|
|
Loading…
Reference in New Issue
Block a user