Refactor parsed regex children iteration to use regular range-for loops

This commit is contained in:
Maxime Coste 2018-12-19 23:21:47 +11:00
parent 566268d7bc
commit 2afc147b2c

View File

@ -91,46 +91,52 @@ struct ParsedRegex
namespace namespace
{ {
template<MatchDirection = MatchDirection::Forward>
struct ForEachChild template<MatchDirection direction = MatchDirection::Forward>
struct Children
{ {
template<typename Func> using Index = ParsedRegex::NodeIndex;
static bool apply(const ParsedRegex& parsed_regex, ParsedRegex::NodeIndex index, Func&& func) struct Sentinel {};
struct Iterator
{ {
const auto end = parsed_regex.nodes[index].children_end; static constexpr bool forward = direction == MatchDirection::Forward;
for (auto child = index+1; child != end; Iterator(ArrayView<const ParsedRegex::Node> nodes, Index index)
child = parsed_regex.nodes[child].children_end) : m_nodes{nodes},
m_pos(forward ? index+1 : find_prev(index, nodes[index].children_end)),
m_end(forward ? nodes[index].children_end : index)
{}
Iterator& operator++()
{ {
if (func(child) == false) m_pos = forward ? m_nodes[m_pos].children_end : find_prev(m_end, m_pos);
return false; return *this;
} }
return true;
} Index operator*() const { return m_pos; }
bool operator!=(Sentinel) const { return m_pos != m_end; }
Index find_prev(Index parent, Index pos) const
{
Index child = parent+1;
if (child == pos)
return parent;
while (m_nodes[child].children_end != pos)
child = m_nodes[child].children_end;
return child;
}
ArrayView<const ParsedRegex::Node> m_nodes;
Index m_pos;
Index m_end;
};
Iterator begin() const { return {m_parsed_regex.nodes, m_index}; }
Sentinel end() const { return {}; }
const ParsedRegex& m_parsed_regex;
const Index m_index;
}; };
template<>
struct ForEachChild<MatchDirection::Backward>
{
template<typename Func>
static bool apply(const ParsedRegex& parsed_regex, ParsedRegex::NodeIndex index, Func&& func)
{
auto find_last_child = [&](ParsedRegex::NodeIndex begin, ParsedRegex::NodeIndex end) {
while (parsed_regex.nodes[begin].children_end != end)
begin = parsed_regex.nodes[begin].children_end;
return begin;
};
const auto first_child = index+1;
auto end = parsed_regex.nodes[index].children_end;
while (end != first_child)
{
auto child = find_last_child(first_child, end);
if (func(child) == false)
return false;
end = child;
}
return true;
}
};
} }
// Recursive descent parser based on naming used in the ECMAScript // Recursive descent parser based on naming used in the ECMAScript
@ -615,7 +621,8 @@ private:
void validate_lookaround(NodeIndex index) void validate_lookaround(NodeIndex index)
{ {
using Lookaround = CompiledRegex::Lookaround; using Lookaround = CompiledRegex::Lookaround;
ForEachChild<>::apply(m_parsed_regex, index, [this](NodeIndex child_index) { for (auto child_index : Children<>{m_parsed_regex, index})
{
auto& child = get_node(child_index); auto& child = get_node(child_index);
if (child.op != ParsedRegex::Literal and child.op != ParsedRegex::Class and if (child.op != ParsedRegex::Literal and child.op != ParsedRegex::Class and
child.op != ParsedRegex::CharacterType and child.op != ParsedRegex::AnyChar and child.op != ParsedRegex::CharacterType and child.op != ParsedRegex::AnyChar and
@ -627,8 +634,7 @@ private:
parse_error("Lookaround does not support literals codepoint between 0xF0000 and 0xFFFFD"); parse_error("Lookaround does not support literals codepoint between 0xF0000 and 0xFFFFD");
if (child.quantifier.type != ParsedRegex::Quantifier::One) if (child.quantifier.type != ParsedRegex::Quantifier::One)
parse_error("Quantifiers cannot be used in lookarounds"); parse_error("Quantifiers cannot be used in lookarounds");
return true; }
});
} }
ParsedRegex m_parsed_regex; ParsedRegex m_parsed_regex;
@ -738,22 +744,22 @@ private:
break; break;
case ParsedRegex::Sequence: case ParsedRegex::Sequence:
{ {
ForEachChild<direction>::apply(m_parsed_regex, index, [this](ParsedRegex::NodeIndex child) { for (auto child : Children<direction>{m_parsed_regex, index})
compile_node<direction>(child); return true; compile_node<direction>(child);
});
break; break;
} }
case ParsedRegex::Alternation: case ParsedRegex::Alternation:
{ {
auto split_pos = m_program.instructions.size(); auto split_pos = m_program.instructions.size();
ForEachChild<>::apply(m_parsed_regex, index, [this, index](ParsedRegex::NodeIndex child) { for (auto child : Children<>{m_parsed_regex, index})
{
if (child != index+1) if (child != index+1)
push_inst(CompiledRegex::Split_PrioritizeParent); push_inst(CompiledRegex::Split_PrioritizeParent);
return true; }
});
ForEachChild<>::apply(m_parsed_regex, index, const auto end = node.children_end;
[&, end = node.children_end](ParsedRegex::NodeIndex child) { for (auto child : Children<>{m_parsed_regex, index})
{
auto node = compile_node<direction>(child); auto node = compile_node<direction>(child);
if (child != index+1) if (child != index+1)
m_program.instructions[split_pos++].param = node; m_program.instructions[split_pos++].param = node;
@ -762,8 +768,7 @@ private:
auto jump = push_inst(CompiledRegex::Jump); auto jump = push_inst(CompiledRegex::Jump);
goto_inner_end_offsets.push_back(jump); goto_inner_end_offsets.push_back(jump);
} }
return true; }
});
break; break;
} }
case ParsedRegex::LookAhead: case ParsedRegex::LookAhead:
@ -886,26 +891,23 @@ private:
using Lookaround = CompiledRegex::Lookaround; using Lookaround = CompiledRegex::Lookaround;
const uint32_t res = m_program.lookarounds.size(); const uint32_t res = m_program.lookarounds.size();
auto write_matcher = [this, ignore_case](ParsedRegex::NodeIndex child) { for (auto child : Children<direction>{m_parsed_regex, index})
auto& character = get_node(child); {
if (character.op == ParsedRegex::Literal) auto& character = get_node(child);
m_program.lookarounds.push_back( if (character.op == ParsedRegex::Literal)
static_cast<Lookaround>(ignore_case ? to_lower(character.value) : character.value)); m_program.lookarounds.push_back(
else if (character.op == ParsedRegex::AnyChar) static_cast<Lookaround>(ignore_case ? to_lower(character.value) : character.value));
m_program.lookarounds.push_back(Lookaround::AnyChar); else if (character.op == ParsedRegex::AnyChar)
else if (character.op == ParsedRegex::AnyCharExceptNewLine) m_program.lookarounds.push_back(Lookaround::AnyChar);
m_program.lookarounds.push_back(Lookaround::AnyCharExceptNewLine); else if (character.op == ParsedRegex::AnyCharExceptNewLine)
else if (character.op == ParsedRegex::Class) m_program.lookarounds.push_back(Lookaround::AnyCharExceptNewLine);
m_program.lookarounds.push_back(static_cast<Lookaround>(to_underlying(Lookaround::CharacterClass) + character.value)); else if (character.op == ParsedRegex::Class)
else if (character.op == ParsedRegex::CharacterType) m_program.lookarounds.push_back(static_cast<Lookaround>(to_underlying(Lookaround::CharacterClass) + character.value));
m_program.lookarounds.push_back(static_cast<Lookaround>(to_underlying(Lookaround::CharacterType) | character.value)); else if (character.op == ParsedRegex::CharacterType)
else m_program.lookarounds.push_back(static_cast<Lookaround>(to_underlying(Lookaround::CharacterType) | character.value));
kak_assert(false); else
return true; kak_assert(false);
}; }
ForEachChild<direction>::apply(m_parsed_regex, index, write_matcher);
m_program.lookarounds.push_back(Lookaround::EndOfLookaround); m_program.lookarounds.push_back(Lookaround::EndOfLookaround);
return res; return res;
} }
@ -983,22 +985,21 @@ private:
} }
case ParsedRegex::Sequence: case ParsedRegex::Sequence:
{ {
bool did_not_consume = false; for (auto child : Children<direction>{m_parsed_regex, index})
auto does_not_consume = [&, this](auto child) { {
return this->compute_start_desc<direction>(child, start_desc); if (not compute_start_desc<direction>(child, start_desc))
}; return node.quantifier.allows_none();
did_not_consume = ForEachChild<direction>::apply(m_parsed_regex, index, does_not_consume); }
return true;
return did_not_consume or node.quantifier.allows_none();
} }
case ParsedRegex::Alternation: case ParsedRegex::Alternation:
{ {
bool all_consumed = not node.quantifier.allows_none(); bool all_consumed = not node.quantifier.allows_none();
ForEachChild<>::apply(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) { for (auto child : Children<>{m_parsed_regex, index})
{
if (compute_start_desc<direction>(child, start_desc)) if (compute_start_desc<direction>(child, start_desc))
all_consumed = false; all_consumed = false;
return true; }
});
return not all_consumed; return not all_consumed;
} }
case ParsedRegex::LineStart: case ParsedRegex::LineStart: