Regex: Use a template argument instead of a regular one for "forward"
forward (which controls if we are compling for forward or backward matching) is always statically known, and compilation will first compile forward, then backward (if needed), so by having separate compiled function we get rid of runtime branches.
This commit is contained in:
parent
aa9f7753e8
commit
3584e00d19
|
@ -86,9 +86,12 @@ struct ParsedRegex
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
|
{
|
||||||
|
template<MatchDirection = MatchDirection::Forward>
|
||||||
|
struct ForEachChild
|
||||||
{
|
{
|
||||||
template<typename Func>
|
template<typename Func>
|
||||||
bool for_each_child(const ParsedRegex& parsed_regex, ParsedRegex::NodeIndex index, Func&& func)
|
static bool apply(const ParsedRegex& parsed_regex, ParsedRegex::NodeIndex index, Func&& func)
|
||||||
{
|
{
|
||||||
const auto end = parsed_regex.nodes[index].children_end;
|
const auto end = parsed_regex.nodes[index].children_end;
|
||||||
for (auto child = index+1; child != end;
|
for (auto child = index+1; child != end;
|
||||||
|
@ -99,9 +102,13 @@ bool for_each_child(const ParsedRegex& parsed_regex, ParsedRegex::NodeIndex inde
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct ForEachChild<MatchDirection::Backward>
|
||||||
|
{
|
||||||
template<typename Func>
|
template<typename Func>
|
||||||
bool for_each_child_reverse(const ParsedRegex& parsed_regex, ParsedRegex::NodeIndex index, Func&& func)
|
static bool apply(const ParsedRegex& parsed_regex, ParsedRegex::NodeIndex index, Func&& func)
|
||||||
{
|
{
|
||||||
auto find_last_child = [&](ParsedRegex::NodeIndex begin, ParsedRegex::NodeIndex end) {
|
auto find_last_child = [&](ParsedRegex::NodeIndex begin, ParsedRegex::NodeIndex end) {
|
||||||
while (parsed_regex.nodes[begin].children_end != end)
|
while (parsed_regex.nodes[begin].children_end != end)
|
||||||
|
@ -119,6 +126,7 @@ bool for_each_child_reverse(const ParsedRegex& parsed_regex, ParsedRegex::NodeIn
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recursive descent parser based on naming used in the ECMAScript
|
// Recursive descent parser based on naming used in the ECMAScript
|
||||||
|
@ -573,7 +581,7 @@ private:
|
||||||
|
|
||||||
void validate_lookaround(NodeIndex index)
|
void validate_lookaround(NodeIndex index)
|
||||||
{
|
{
|
||||||
for_each_child(m_parsed_regex, index, [this](NodeIndex child_index) {
|
ForEachChild<>::apply(m_parsed_regex, index, [this](NodeIndex child_index) {
|
||||||
auto& child = get_node(child_index);
|
auto& child = get_node(child_index);
|
||||||
if (child.op != ParsedRegex::Literal and child.op != ParsedRegex::Class and
|
if (child.op != ParsedRegex::Literal and child.op != ParsedRegex::Class and
|
||||||
child.op != ParsedRegex::CharacterType and child.op != ParsedRegex::AnyChar)
|
child.op != ParsedRegex::CharacterType and child.op != ParsedRegex::AnyChar)
|
||||||
|
@ -627,18 +635,18 @@ struct RegexCompiler
|
||||||
|
|
||||||
if (not (flags & RegexCompileFlags::NoForward))
|
if (not (flags & RegexCompileFlags::NoForward))
|
||||||
{
|
{
|
||||||
m_program.forward_start_desc = compute_start_desc(true);
|
m_program.forward_start_desc = compute_start_desc<MatchDirection::Forward>();
|
||||||
write_search_prefix();
|
write_search_prefix();
|
||||||
compile_node(0, true);
|
compile_node<MatchDirection::Forward>(0);
|
||||||
push_inst(CompiledRegex::Match);
|
push_inst(CompiledRegex::Match);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & RegexCompileFlags::Backward)
|
if (flags & RegexCompileFlags::Backward)
|
||||||
{
|
{
|
||||||
m_program.first_backward_inst = m_program.instructions.size();
|
m_program.first_backward_inst = m_program.instructions.size();
|
||||||
m_program.backward_start_desc = compute_start_desc(false);
|
m_program.backward_start_desc = compute_start_desc<MatchDirection::Backward>();
|
||||||
write_search_prefix();
|
write_search_prefix();
|
||||||
compile_node(0, false);
|
compile_node<MatchDirection::Backward>(0);
|
||||||
push_inst(CompiledRegex::Match);
|
push_inst(CompiledRegex::Match);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -652,7 +660,8 @@ struct RegexCompiler
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
uint32_t compile_node_inner(ParsedRegex::NodeIndex index, bool forward)
|
template<MatchDirection direction>
|
||||||
|
uint32_t compile_node_inner(ParsedRegex::NodeIndex index)
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
|
|
||||||
|
@ -661,6 +670,7 @@ private:
|
||||||
|
|
||||||
const bool save = (node.op == ParsedRegex::Alternation or node.op == ParsedRegex::Sequence) and
|
const bool save = (node.op == ParsedRegex::Alternation or node.op == ParsedRegex::Sequence) and
|
||||||
(node.value == 0 or (node.value != -1 and not (m_flags & RegexCompileFlags::NoSubs)));
|
(node.value == 0 or (node.value != -1 and not (m_flags & RegexCompileFlags::NoSubs)));
|
||||||
|
constexpr bool forward = direction == MatchDirection::Forward;
|
||||||
if (save)
|
if (save)
|
||||||
push_inst(CompiledRegex::Save, node.value * 2 + (forward ? 0 : 1));
|
push_inst(CompiledRegex::Save, node.value * 2 + (forward ? 0 : 1));
|
||||||
|
|
||||||
|
@ -684,28 +694,23 @@ private:
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::Sequence:
|
case ParsedRegex::Sequence:
|
||||||
{
|
{
|
||||||
if (forward)
|
ForEachChild<direction>::apply(m_parsed_regex, index, [this](ParsedRegex::NodeIndex child) {
|
||||||
for_each_child(m_parsed_regex, index, [this](ParsedRegex::NodeIndex child) {
|
compile_node<direction>(child); return true;
|
||||||
compile_node(child, true); return true;
|
|
||||||
});
|
|
||||||
else
|
|
||||||
for_each_child_reverse(m_parsed_regex, index, [this](ParsedRegex::NodeIndex child) {
|
|
||||||
compile_node(child, false); return true;
|
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ParsedRegex::Alternation:
|
case ParsedRegex::Alternation:
|
||||||
{
|
{
|
||||||
auto split_pos = m_program.instructions.size();
|
auto split_pos = m_program.instructions.size();
|
||||||
for_each_child(m_parsed_regex, index, [this, index](ParsedRegex::NodeIndex child) {
|
ForEachChild<>::apply(m_parsed_regex, index, [this, index](ParsedRegex::NodeIndex child) {
|
||||||
if (child != index+1)
|
if (child != index+1)
|
||||||
push_inst(CompiledRegex::Split_PrioritizeParent);
|
push_inst(CompiledRegex::Split_PrioritizeParent);
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
|
||||||
for_each_child(m_parsed_regex, index,
|
ForEachChild<>::apply(m_parsed_regex, index,
|
||||||
[&, end = node.children_end](ParsedRegex::NodeIndex child) {
|
[&, end = node.children_end](ParsedRegex::NodeIndex child) {
|
||||||
auto node = compile_node(child, forward);
|
auto node = compile_node<direction>(child);
|
||||||
if (child != index+1)
|
if (child != index+1)
|
||||||
m_program.instructions[split_pos++].param = node;
|
m_program.instructions[split_pos++].param = node;
|
||||||
if (get_node(child).children_end != end)
|
if (get_node(child).children_end != end)
|
||||||
|
@ -722,28 +727,28 @@ private:
|
||||||
: CompiledRegex::LookAhead)
|
: CompiledRegex::LookAhead)
|
||||||
: (ignore_case ? CompiledRegex::LookBehind_IgnoreCase
|
: (ignore_case ? CompiledRegex::LookBehind_IgnoreCase
|
||||||
: CompiledRegex::LookBehind),
|
: CompiledRegex::LookBehind),
|
||||||
push_lookaround(index, false, ignore_case));
|
push_lookaround<MatchDirection::Forward>(index, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::NegativeLookAhead:
|
case ParsedRegex::NegativeLookAhead:
|
||||||
push_inst(forward ? (ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
|
push_inst(forward ? (ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
|
||||||
: CompiledRegex::NegativeLookAhead)
|
: CompiledRegex::NegativeLookAhead)
|
||||||
: (ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
|
: (ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
|
||||||
: CompiledRegex::NegativeLookBehind),
|
: CompiledRegex::NegativeLookBehind),
|
||||||
push_lookaround(index, false, ignore_case));
|
push_lookaround<MatchDirection::Forward>(index, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::LookBehind:
|
case ParsedRegex::LookBehind:
|
||||||
push_inst(forward ? (ignore_case ? CompiledRegex::LookBehind_IgnoreCase
|
push_inst(forward ? (ignore_case ? CompiledRegex::LookBehind_IgnoreCase
|
||||||
: CompiledRegex::LookBehind)
|
: CompiledRegex::LookBehind)
|
||||||
: (ignore_case ? CompiledRegex::LookAhead_IgnoreCase
|
: (ignore_case ? CompiledRegex::LookAhead_IgnoreCase
|
||||||
: CompiledRegex::LookAhead),
|
: CompiledRegex::LookAhead),
|
||||||
push_lookaround(index, true, ignore_case));
|
push_lookaround<MatchDirection::Backward>(index, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::NegativeLookBehind:
|
case ParsedRegex::NegativeLookBehind:
|
||||||
push_inst(forward ? (ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
|
push_inst(forward ? (ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
|
||||||
: CompiledRegex::NegativeLookBehind)
|
: CompiledRegex::NegativeLookBehind)
|
||||||
: (ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
|
: (ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
|
||||||
: CompiledRegex::NegativeLookAhead),
|
: CompiledRegex::NegativeLookAhead),
|
||||||
push_lookaround(index, true, ignore_case));
|
push_lookaround<MatchDirection::Backward>(index, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::LineStart:
|
case ParsedRegex::LineStart:
|
||||||
push_inst(forward ? CompiledRegex::LineStart
|
push_inst(forward ? CompiledRegex::LineStart
|
||||||
|
@ -781,7 +786,8 @@ private:
|
||||||
return start_pos;
|
return start_pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t compile_node(ParsedRegex::NodeIndex index, bool forward)
|
template<MatchDirection direction>
|
||||||
|
uint32_t compile_node(ParsedRegex::NodeIndex index)
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
|
|
||||||
|
@ -799,10 +805,10 @@ private:
|
||||||
goto_ends.push_back(split_pos);
|
goto_ends.push_back(split_pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto inner_pos = compile_node_inner(index, forward);
|
auto inner_pos = compile_node_inner<direction>(index);
|
||||||
// Write the node multiple times when we have a min count quantifier
|
// Write the node multiple times when we have a min count quantifier
|
||||||
for (int i = 1; i < quantifier.min; ++i)
|
for (int i = 1; i < quantifier.min; ++i)
|
||||||
inner_pos = compile_node_inner(index, forward);
|
inner_pos = compile_node_inner<direction>(index);
|
||||||
|
|
||||||
if (quantifier.allows_infinite_repeat())
|
if (quantifier.allows_infinite_repeat())
|
||||||
push_inst(quantifier.greedy ? CompiledRegex::Split_PrioritizeChild
|
push_inst(quantifier.greedy ? CompiledRegex::Split_PrioritizeChild
|
||||||
|
@ -816,7 +822,7 @@ private:
|
||||||
auto split_pos = push_inst(quantifier.greedy ? CompiledRegex::Split_PrioritizeParent
|
auto split_pos = push_inst(quantifier.greedy ? CompiledRegex::Split_PrioritizeParent
|
||||||
: CompiledRegex::Split_PrioritizeChild);
|
: CompiledRegex::Split_PrioritizeChild);
|
||||||
goto_ends.push_back(split_pos);
|
goto_ends.push_back(split_pos);
|
||||||
compile_node_inner(index, forward);
|
compile_node_inner<direction>(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto offset : goto_ends)
|
for (auto offset : goto_ends)
|
||||||
|
@ -845,7 +851,8 @@ private:
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t push_lookaround(ParsedRegex::NodeIndex index, bool reversed, bool ignore_case)
|
template<MatchDirection direction>
|
||||||
|
uint32_t push_lookaround(ParsedRegex::NodeIndex index, bool ignore_case)
|
||||||
{
|
{
|
||||||
uint32_t res = m_program.lookarounds.size();
|
uint32_t res = m_program.lookarounds.size();
|
||||||
auto write_matcher = [this, ignore_case](ParsedRegex::NodeIndex child) {
|
auto write_matcher = [this, ignore_case](ParsedRegex::NodeIndex child) {
|
||||||
|
@ -864,10 +871,7 @@ private:
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
if (reversed)
|
ForEachChild<direction>::apply(m_parsed_regex, index, write_matcher);
|
||||||
for_each_child_reverse(m_parsed_regex, index, write_matcher);
|
|
||||||
else
|
|
||||||
for_each_child(m_parsed_regex, index, write_matcher);
|
|
||||||
|
|
||||||
m_program.lookarounds.push_back((Codepoint)-1);
|
m_program.lookarounds.push_back((Codepoint)-1);
|
||||||
return res;
|
return res;
|
||||||
|
@ -876,8 +880,9 @@ private:
|
||||||
// Fills accepted and rejected according to which chars can start the given node,
|
// Fills accepted and rejected according to which chars can start the given node,
|
||||||
// returns true if the node did not consume the char, hence a following node in
|
// returns true if the node did not consume the char, hence a following node in
|
||||||
// sequence would be still relevant for the parent node start chars computation.
|
// sequence would be still relevant for the parent node start chars computation.
|
||||||
|
template<MatchDirection direction>
|
||||||
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
||||||
CompiledRegex::StartDesc& start_desc, bool forward) const
|
CompiledRegex::StartDesc& start_desc) const
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
switch (node.op)
|
switch (node.op)
|
||||||
|
@ -939,20 +944,17 @@ private:
|
||||||
{
|
{
|
||||||
bool did_not_consume = false;
|
bool did_not_consume = false;
|
||||||
auto does_not_consume = [&, this](auto child) {
|
auto does_not_consume = [&, this](auto child) {
|
||||||
return this->compute_start_desc(child, start_desc, forward);
|
return this->compute_start_desc<direction>(child, start_desc);
|
||||||
};
|
};
|
||||||
if (forward)
|
did_not_consume = ForEachChild<direction>::apply(m_parsed_regex, index, does_not_consume);
|
||||||
did_not_consume = for_each_child(m_parsed_regex, index, does_not_consume);
|
|
||||||
else
|
|
||||||
did_not_consume = for_each_child_reverse(m_parsed_regex, index, does_not_consume);
|
|
||||||
|
|
||||||
return did_not_consume or node.quantifier.allows_none();
|
return did_not_consume or node.quantifier.allows_none();
|
||||||
}
|
}
|
||||||
case ParsedRegex::Alternation:
|
case ParsedRegex::Alternation:
|
||||||
{
|
{
|
||||||
bool all_consumed = not node.quantifier.allows_none();
|
bool all_consumed = not node.quantifier.allows_none();
|
||||||
for_each_child(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) {
|
ForEachChild<>::apply(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) {
|
||||||
if (compute_start_desc(child, start_desc, forward))
|
if (compute_start_desc<direction>(child, start_desc))
|
||||||
all_consumed = false;
|
all_consumed = false;
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
@ -974,11 +976,12 @@ private:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<MatchDirection direction>
|
||||||
[[gnu::noinline]]
|
[[gnu::noinline]]
|
||||||
std::unique_ptr<CompiledRegex::StartDesc> compute_start_desc(bool forward) const
|
std::unique_ptr<CompiledRegex::StartDesc> compute_start_desc() const
|
||||||
{
|
{
|
||||||
CompiledRegex::StartDesc start_desc{};
|
CompiledRegex::StartDesc start_desc{};
|
||||||
if (compute_start_desc(0, start_desc, forward) or
|
if (compute_start_desc<direction>(0, start_desc) or
|
||||||
not contains(start_desc.map, false))
|
not contains(start_desc.map, false))
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user