Use a dedicated vm op for dot when match-newline is false
This commit is contained in:
parent
b5ee1db1c4
commit
67655de947
|
@ -23,6 +23,7 @@ struct ParsedRegex
|
||||||
{
|
{
|
||||||
Literal,
|
Literal,
|
||||||
AnyChar,
|
AnyChar,
|
||||||
|
AnyCharExceptNewLine,
|
||||||
Class,
|
Class,
|
||||||
CharacterType,
|
CharacterType,
|
||||||
Sequence,
|
Sequence,
|
||||||
|
@ -296,14 +297,7 @@ private:
|
||||||
if (m_flags & Flags::DotMatchesNewLine)
|
if (m_flags & Flags::DotMatchesNewLine)
|
||||||
return new_node(ParsedRegex::AnyChar);
|
return new_node(ParsedRegex::AnyChar);
|
||||||
else
|
else
|
||||||
{
|
return new_node(ParsedRegex::AnyCharExceptNewLine);
|
||||||
CharacterClass c;
|
|
||||||
c.negative = true;
|
|
||||||
c.ranges.push_back({ '\n', '\n' });
|
|
||||||
auto class_id = m_parsed_regex.character_classes.size();
|
|
||||||
m_parsed_regex.character_classes.push_back(std::move(c));
|
|
||||||
return new_node(ParsedRegex::Class, class_id);
|
|
||||||
}
|
|
||||||
case '(':
|
case '(':
|
||||||
{
|
{
|
||||||
auto captures = [this, it = (++m_pos).base()]() mutable {
|
auto captures = [this, it = (++m_pos).base()]() mutable {
|
||||||
|
@ -611,7 +605,8 @@ private:
|
||||||
ForEachChild<>::apply(m_parsed_regex, index, [this](NodeIndex child_index) {
|
ForEachChild<>::apply(m_parsed_regex, index, [this](NodeIndex child_index) {
|
||||||
auto& child = get_node(child_index);
|
auto& child = get_node(child_index);
|
||||||
if (child.op != ParsedRegex::Literal and child.op != ParsedRegex::Class and
|
if (child.op != ParsedRegex::Literal and child.op != ParsedRegex::Class and
|
||||||
child.op != ParsedRegex::CharacterType and child.op != ParsedRegex::AnyChar)
|
child.op != ParsedRegex::CharacterType and child.op != ParsedRegex::AnyChar and
|
||||||
|
child.op != ParsedRegex::AnyCharExceptNewLine)
|
||||||
parse_error("Lookaround can only contain literals, any chars or character classes");
|
parse_error("Lookaround can only contain literals, any chars or character classes");
|
||||||
if (child.quantifier.type != ParsedRegex::Quantifier::One)
|
if (child.quantifier.type != ParsedRegex::Quantifier::One)
|
||||||
parse_error("Quantifiers cannot be used in lookarounds");
|
parse_error("Quantifiers cannot be used in lookarounds");
|
||||||
|
@ -714,6 +709,9 @@ private:
|
||||||
case ParsedRegex::AnyChar:
|
case ParsedRegex::AnyChar:
|
||||||
push_inst(CompiledRegex::AnyChar);
|
push_inst(CompiledRegex::AnyChar);
|
||||||
break;
|
break;
|
||||||
|
case ParsedRegex::AnyCharExceptNewLine:
|
||||||
|
push_inst(CompiledRegex::AnyCharExceptNewLine);
|
||||||
|
break;
|
||||||
case ParsedRegex::Class:
|
case ParsedRegex::Class:
|
||||||
push_inst(CompiledRegex::Class, node.value);
|
push_inst(CompiledRegex::Class, node.value);
|
||||||
break;
|
break;
|
||||||
|
@ -887,6 +885,8 @@ private:
|
||||||
: character.value);
|
: character.value);
|
||||||
else if (character.op == ParsedRegex::AnyChar)
|
else if (character.op == ParsedRegex::AnyChar)
|
||||||
m_program.lookarounds.push_back(0xF000);
|
m_program.lookarounds.push_back(0xF000);
|
||||||
|
else if (character.op == ParsedRegex::AnyCharExceptNewLine)
|
||||||
|
m_program.lookarounds.push_back(0xF001);
|
||||||
else if (character.op == ParsedRegex::Class)
|
else if (character.op == ParsedRegex::Class)
|
||||||
m_program.lookarounds.push_back(0xF0001 + character.value);
|
m_program.lookarounds.push_back(0xF0001 + character.value);
|
||||||
else if (character.op == ParsedRegex::CharacterType)
|
else if (character.op == ParsedRegex::CharacterType)
|
||||||
|
@ -930,6 +930,13 @@ private:
|
||||||
for (auto& b : start_desc.map)
|
for (auto& b : start_desc.map)
|
||||||
b = true;
|
b = true;
|
||||||
return node.quantifier.allows_none();
|
return node.quantifier.allows_none();
|
||||||
|
case ParsedRegex::AnyCharExceptNewLine:
|
||||||
|
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::count; ++cp)
|
||||||
|
{
|
||||||
|
if (cp != '\n')
|
||||||
|
start_desc.map[cp] = true;
|
||||||
|
}
|
||||||
|
return node.quantifier.allows_none();
|
||||||
case ParsedRegex::Class:
|
case ParsedRegex::Class:
|
||||||
{
|
{
|
||||||
auto& character_class = m_parsed_regex.character_classes[node.value];
|
auto& character_class = m_parsed_regex.character_classes[node.value];
|
||||||
|
@ -1044,6 +1051,9 @@ String dump_regex(const CompiledRegex& program)
|
||||||
case CompiledRegex::AnyChar:
|
case CompiledRegex::AnyChar:
|
||||||
res += "any char\n";
|
res += "any char\n";
|
||||||
break;
|
break;
|
||||||
|
case CompiledRegex::AnyCharExceptNewLine:
|
||||||
|
res += "anything but newline\n";
|
||||||
|
break;
|
||||||
case CompiledRegex::Jump:
|
case CompiledRegex::Jump:
|
||||||
res += format("jump {}\n", inst.param);
|
res += format("jump {}\n", inst.param);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -59,6 +59,7 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
||||||
Literal,
|
Literal,
|
||||||
Literal_IgnoreCase,
|
Literal_IgnoreCase,
|
||||||
AnyChar,
|
AnyChar,
|
||||||
|
AnyCharExceptNewLine,
|
||||||
Class,
|
Class,
|
||||||
CharacterType,
|
CharacterType,
|
||||||
Jump,
|
Jump,
|
||||||
|
@ -322,6 +323,10 @@ private:
|
||||||
return StepResult::Failed;
|
return StepResult::Failed;
|
||||||
case CompiledRegex::AnyChar:
|
case CompiledRegex::AnyChar:
|
||||||
return StepResult::Consumed;
|
return StepResult::Consumed;
|
||||||
|
case CompiledRegex::AnyCharExceptNewLine:
|
||||||
|
if (pos != config.end and *pos != '\n')
|
||||||
|
return StepResult::Consumed;
|
||||||
|
return StepResult::Failed;
|
||||||
case CompiledRegex::Jump:
|
case CompiledRegex::Jump:
|
||||||
thread.inst = static_cast<int16_t>(inst.param);
|
thread.inst = static_cast<int16_t>(inst.param);
|
||||||
break;
|
break;
|
||||||
|
@ -529,6 +534,11 @@ private:
|
||||||
const Codepoint ref = *it;
|
const Codepoint ref = *it;
|
||||||
if (ref == 0xF000)
|
if (ref == 0xF000)
|
||||||
{} // any character matches
|
{} // any character matches
|
||||||
|
else if (ref == 0xF001)
|
||||||
|
{
|
||||||
|
if (cp == '\n')
|
||||||
|
return false;
|
||||||
|
}
|
||||||
else if (ref > 0xF0000 and ref < 0xF8000)
|
else if (ref > 0xF0000 and ref < 0xF8000)
|
||||||
{
|
{
|
||||||
if (not is_character_class(m_program.character_classes[ref - 0xF0001], cp))
|
if (not is_character_class(m_program.character_classes[ref - 0xF0001], cp))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user