Regex: Introduce RegexExecFlags::PrevAvailable

Rework assertion code as well.
This commit is contained in:
Maxime Coste 2017-10-08 09:22:24 +08:00
parent 73e177ec59
commit 9ec376135b
4 changed files with 33 additions and 13 deletions

View File

@ -31,7 +31,6 @@ void option_from_string(StringView str, Regex& re)
re = Regex{str};
}
void regex_mismatch(const Regex& re)
{
write_to_debug_buffer(format("regex mismatch for '{}'", re.str()));

View File

@ -133,6 +133,8 @@ inline RegexExecFlags convert_flags(RegexConstant::match_flag_type flags)
res |= RegexExecFlags::NotInitialNull;
if (flags & RegexConstant::match_any)
res |= RegexExecFlags::AnyMatch;
if (flags & RegexConstant::match_prev_avail)
res |= RegexExecFlags::PrevAvailable;
return res;
}
@ -261,7 +263,7 @@ private:
if (m_results.size() and m_results[0].first == m_results[0].second)
additional_flags |= RegexConstant::match_not_initial_null;
if (m_begin != m_next_begin)
additional_flags |= RegexConstant::match_not_bob;
additional_flags |= RegexConstant::match_not_bob | RegexConstant::match_prev_avail;
if (not regex_search(m_next_begin, m_end, m_results, *m_regex,
m_flags | additional_flags))

View File

@ -1041,6 +1041,11 @@ auto test_regex = UnitTest{[]{
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f");
}
{
TestVM<> vm{R"((?<!f).)"};
kak_assert(vm.exec("f"));
}
{
TestVM<> vm{R"((?!foo)...)"};
kak_assert(not vm.exec("foo"));
@ -1110,6 +1115,12 @@ auto test_regex = UnitTest{[]{
TestVM<> vm{R"(()*)"};
kak_assert(not vm.exec(" "));
}
{
TestVM<> vm{R"(\b(?<!-)(a|b|)(?!-)\b)"};
kak_assert(vm.exec("# foo bar", RegexExecFlags::Search));
kak_assert(*vm.captures()[0] == '#');
}
}};
}

View File

@ -80,6 +80,7 @@ enum class RegexExecFlags
NotInitialNull = 1 << 6,
AnyMatch = 1 << 7,
NoSaves = 1 << 8,
PrevAvailable = 1 << 9,
};
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
@ -125,8 +126,11 @@ public:
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
{
const bool forward = direction == MatchDirection::Forward;
m_begin = Utf8It{utf8::iterator<Iterator>{forward ? begin : end, begin, end}};
m_end = Utf8It{utf8::iterator<Iterator>{forward ? end : begin, begin, end}};
const bool prev_avail = flags & RegexExecFlags::PrevAvailable;
m_begin = Utf8It{utf8::iterator<Iterator>{forward ? begin : end,
prev_avail ? begin-1 : begin, end}};
m_end = Utf8It{utf8::iterator<Iterator>{forward ? end : begin,
prev_avail ? begin-1 : begin, end}};
m_flags = flags;
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
@ -314,8 +318,8 @@ private:
case CompiledRegex::NegativeLookBehind:
{
auto ref = m_program.lookarounds.begin() + inst.param;
for (auto it = pos-1; *ref != -1 and it >= m_begin; --it, ++ref)
if (*it != *ref)
for (auto it = pos; *ref != -1 and it > m_begin; --it, ++ref)
if (*(it-1) != *ref)
break;
if ((inst.op == CompiledRegex::LookBehind and *ref != -1) or
(inst.op == CompiledRegex::NegativeLookBehind and *ref == -1))
@ -400,21 +404,25 @@ private:
bool is_line_start(const Utf8It& pos) const
{
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or
*(pos-1) == '\n';
if (not (m_flags & RegexExecFlags::PrevAvailable) and pos == m_begin)
return not (m_flags & RegexExecFlags::NotBeginOfLine);
return *(pos-1) == '\n';
}
bool is_line_end(const Utf8It& pos) const
{
return (pos == m_end and not (m_flags & RegexExecFlags::NotEndOfLine)) or
*pos == '\n';
if (pos == m_end)
return not (m_flags & RegexExecFlags::NotEndOfLine);
return *pos == '\n';
}
bool is_word_boundary(const Utf8It& pos) const
{
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfWord)) or
(pos == m_end and not (m_flags & RegexExecFlags::NotEndOfWord)) or
is_word(*(pos-1)) != is_word(*pos);
if (not (m_flags & RegexExecFlags::PrevAvailable) and pos == m_begin)
return not (m_flags & RegexExecFlags::NotBeginOfWord);
if (pos == m_end)
return not (m_flags & RegexExecFlags::NotEndOfWord);
return is_word(*(pos-1)) != is_word(*pos);
}
static const Iterator& get_base(const utf8::iterator<Iterator>& it) { return it.base(); }