Regex: Support non greedy quantifiers
This commit is contained in:
parent
e4004a7b7f
commit
e96cd29f0e
|
@ -42,6 +42,7 @@ struct ParsedRegex
|
||||||
RepeatMinMax,
|
RepeatMinMax,
|
||||||
};
|
};
|
||||||
Type type = One;
|
Type type = One;
|
||||||
|
bool greedy = true;
|
||||||
int min = -1, max = -1;
|
int min = -1, max = -1;
|
||||||
|
|
||||||
bool allows_none() const
|
bool allows_none() const
|
||||||
|
@ -346,11 +347,18 @@ private:
|
||||||
return res;
|
return res;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
auto check_greedy = [&]() {
|
||||||
|
if (at_end() or *m_pos != '?')
|
||||||
|
return true;
|
||||||
|
++m_pos;
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
switch (*m_pos)
|
switch (*m_pos)
|
||||||
{
|
{
|
||||||
case '*': ++m_pos; return {ParsedRegex::Quantifier::RepeatZeroOrMore};
|
case '*': ++m_pos; return {ParsedRegex::Quantifier::RepeatZeroOrMore, check_greedy()};
|
||||||
case '+': ++m_pos; return {ParsedRegex::Quantifier::RepeatOneOrMore};
|
case '+': ++m_pos; return {ParsedRegex::Quantifier::RepeatOneOrMore, check_greedy()};
|
||||||
case '?': ++m_pos; return {ParsedRegex::Quantifier::Optional};
|
case '?': ++m_pos; return {ParsedRegex::Quantifier::Optional, check_greedy()};
|
||||||
case '{':
|
case '{':
|
||||||
{
|
{
|
||||||
auto it = m_pos+1;
|
auto it = m_pos+1;
|
||||||
|
@ -364,7 +372,7 @@ private:
|
||||||
if (*it++ != '}')
|
if (*it++ != '}')
|
||||||
parse_error("expected closing bracket");
|
parse_error("expected closing bracket");
|
||||||
m_pos = it;
|
m_pos = it;
|
||||||
return {ParsedRegex::Quantifier::RepeatMinMax, min, max};
|
return {ParsedRegex::Quantifier::RepeatMinMax, true, min, max};
|
||||||
}
|
}
|
||||||
default: return {ParsedRegex::Quantifier::One};
|
default: return {ParsedRegex::Quantifier::One};
|
||||||
}
|
}
|
||||||
|
@ -549,27 +557,32 @@ private:
|
||||||
Offset pos = m_program.bytecode.size();
|
Offset pos = m_program.bytecode.size();
|
||||||
Vector<Offset> goto_end_offsets;
|
Vector<Offset> goto_end_offsets;
|
||||||
|
|
||||||
if (node->quantifier.allows_none())
|
auto& quantifier = node->quantifier;
|
||||||
|
|
||||||
|
if (quantifier.allows_none())
|
||||||
{
|
{
|
||||||
push_op(CompiledRegex::Split_PrioritizeParent);
|
push_op(quantifier.greedy ? CompiledRegex::Split_PrioritizeParent
|
||||||
|
: CompiledRegex::Split_PrioritizeChild);
|
||||||
goto_end_offsets.push_back(alloc_offset());
|
goto_end_offsets.push_back(alloc_offset());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto inner_pos = compile_node_inner(node);
|
auto inner_pos = compile_node_inner(node);
|
||||||
// Write the node multiple times when we have a min count quantifier
|
// Write the node multiple times when we have a min count quantifier
|
||||||
for (int i = 1; i < node->quantifier.min; ++i)
|
for (int i = 1; i < quantifier.min; ++i)
|
||||||
inner_pos = compile_node_inner(node);
|
inner_pos = compile_node_inner(node);
|
||||||
|
|
||||||
if (node->quantifier.allows_infinite_repeat())
|
if (quantifier.allows_infinite_repeat())
|
||||||
{
|
{
|
||||||
push_op(CompiledRegex::Split_PrioritizeChild);
|
push_op(quantifier.greedy ? CompiledRegex::Split_PrioritizeChild
|
||||||
|
: CompiledRegex::Split_PrioritizeParent);
|
||||||
get_offset(alloc_offset()) = inner_pos;
|
get_offset(alloc_offset()) = inner_pos;
|
||||||
}
|
}
|
||||||
// Write the node as an optional match for the min -> max counts
|
// Write the node as an optional match for the min -> max counts
|
||||||
else for (int i = std::max(1, node->quantifier.min); // STILL UGLY !
|
else for (int i = std::max(1, quantifier.min); // STILL UGLY !
|
||||||
i < node->quantifier.max; ++i)
|
i < quantifier.max; ++i)
|
||||||
{
|
{
|
||||||
push_op(CompiledRegex::Split_PrioritizeParent);
|
push_op(quantifier.greedy ? CompiledRegex::Split_PrioritizeParent
|
||||||
|
: CompiledRegex::Split_PrioritizeChild);
|
||||||
goto_end_offsets.push_back(alloc_offset());
|
goto_end_offsets.push_back(alloc_offset());
|
||||||
compile_node_inner(node);
|
compile_node_inner(node);
|
||||||
}
|
}
|
||||||
|
@ -1015,6 +1028,12 @@ auto test_regex = UnitTest{[]{
|
||||||
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "bar");
|
kak_assert(StringView{vm.m_captures[0], vm.m_captures[1]} == "bar");
|
||||||
kak_assert(not vm.exec("bar", true, true));
|
kak_assert(not vm.exec("bar", true, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
TestVM vm{R"((fo+?).*)"};
|
||||||
|
kak_assert(vm.exec("foooo", true, true));
|
||||||
|
kak_assert(StringView{vm.m_captures[2], vm.m_captures[3]} == "fo");
|
||||||
|
}
|
||||||
}};
|
}};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user