Regex: Remove "Ast" from names in the ParsedRegex
It does not add much value, and makes names longer.
This commit is contained in:
parent
18a02ccacd
commit
bbd7e604dc
|
@ -67,19 +67,19 @@ struct ParsedRegex
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AstNode;
|
struct Node;
|
||||||
using AstNodeIndex = uint16_t;
|
using NodeIndex = uint16_t;
|
||||||
|
|
||||||
struct AstNode
|
struct Node
|
||||||
{
|
{
|
||||||
Op op;
|
Op op;
|
||||||
bool ignore_case;
|
bool ignore_case;
|
||||||
AstNodeIndex children_end;
|
NodeIndex children_end;
|
||||||
Codepoint value;
|
Codepoint value;
|
||||||
Quantifier quantifier;
|
Quantifier quantifier;
|
||||||
};
|
};
|
||||||
|
|
||||||
Vector<AstNode, MemoryDomain::Regex> nodes;
|
Vector<Node, MemoryDomain::Regex> nodes;
|
||||||
size_t capture_count;
|
size_t capture_count;
|
||||||
Vector<std::function<bool (Codepoint)>, MemoryDomain::Regex> matchers;
|
Vector<std::function<bool (Codepoint)>, MemoryDomain::Regex> matchers;
|
||||||
};
|
};
|
||||||
|
@ -87,7 +87,7 @@ struct ParsedRegex
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template<typename Func>
|
template<typename Func>
|
||||||
bool for_each_child(const ParsedRegex& parsed_regex, ParsedRegex::AstNodeIndex index, Func&& func)
|
bool for_each_child(const ParsedRegex& parsed_regex, ParsedRegex::NodeIndex index, Func&& func)
|
||||||
{
|
{
|
||||||
const auto end = parsed_regex.nodes[index].children_end;
|
const auto end = parsed_regex.nodes[index].children_end;
|
||||||
for (auto child = index+1; child != end;
|
for (auto child = index+1; child != end;
|
||||||
|
@ -100,9 +100,9 @@ bool for_each_child(const ParsedRegex& parsed_regex, ParsedRegex::AstNodeIndex i
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Func>
|
template<typename Func>
|
||||||
bool for_each_child_reverse(const ParsedRegex& parsed_regex, ParsedRegex::AstNodeIndex index, Func&& func)
|
bool for_each_child_reverse(const ParsedRegex& parsed_regex, ParsedRegex::NodeIndex index, Func&& func)
|
||||||
{
|
{
|
||||||
auto find_last_child = [&](ParsedRegex::AstNodeIndex begin, ParsedRegex::AstNodeIndex end) {
|
auto find_last_child = [&](ParsedRegex::NodeIndex begin, ParsedRegex::NodeIndex end) {
|
||||||
while (parsed_regex.nodes[begin].children_end != end)
|
while (parsed_regex.nodes[begin].children_end != end)
|
||||||
begin = parsed_regex.nodes[begin].children_end;
|
begin = parsed_regex.nodes[begin].children_end;
|
||||||
return begin;
|
return begin;
|
||||||
|
@ -128,7 +128,7 @@ struct RegexParser
|
||||||
: m_regex{re}, m_pos{re.begin(), re}
|
: m_regex{re}, m_pos{re.begin(), re}
|
||||||
{
|
{
|
||||||
m_parsed_regex.capture_count = 1;
|
m_parsed_regex.capture_count = 1;
|
||||||
AstNodeIndex root = disjunction(0);
|
NodeIndex root = disjunction(0);
|
||||||
kak_assert(root == 0);
|
kak_assert(root == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,11 +143,11 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
using Iterator = utf8::iterator<const char*, Codepoint, int, InvalidPolicy>;
|
using Iterator = utf8::iterator<const char*, Codepoint, int, InvalidPolicy>;
|
||||||
using AstNodeIndex = ParsedRegex::AstNodeIndex;
|
using NodeIndex = ParsedRegex::NodeIndex;
|
||||||
|
|
||||||
AstNodeIndex disjunction(unsigned capture = -1)
|
NodeIndex disjunction(unsigned capture = -1)
|
||||||
{
|
{
|
||||||
AstNodeIndex index = new_node(ParsedRegex::Alternation);
|
NodeIndex index = new_node(ParsedRegex::Alternation);
|
||||||
get_node(index).value = capture;
|
get_node(index).value = capture;
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
|
@ -161,9 +161,9 @@ private:
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
AstNodeIndex alternative(ParsedRegex::Op op = ParsedRegex::Sequence)
|
NodeIndex alternative(ParsedRegex::Op op = ParsedRegex::Sequence)
|
||||||
{
|
{
|
||||||
AstNodeIndex index = new_node(op);
|
NodeIndex index = new_node(op);
|
||||||
while (auto t = term())
|
while (auto t = term())
|
||||||
{}
|
{}
|
||||||
get_node(index).children_end = m_parsed_regex.nodes.size();
|
get_node(index).children_end = m_parsed_regex.nodes.size();
|
||||||
|
@ -171,7 +171,7 @@ private:
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<AstNodeIndex> term()
|
Optional<NodeIndex> term()
|
||||||
{
|
{
|
||||||
while (modifiers()) // read all modifiers
|
while (modifiers()) // read all modifiers
|
||||||
{}
|
{}
|
||||||
|
@ -212,7 +212,7 @@ private:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<AstNodeIndex> assertion()
|
Optional<NodeIndex> assertion()
|
||||||
{
|
{
|
||||||
if (at_end())
|
if (at_end())
|
||||||
return {};
|
return {};
|
||||||
|
@ -253,7 +253,7 @@ private:
|
||||||
if (not lookaround_op)
|
if (not lookaround_op)
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
AstNodeIndex lookaround = alternative(*lookaround_op);
|
NodeIndex lookaround = alternative(*lookaround_op);
|
||||||
if (at_end() or *m_pos++ != ')')
|
if (at_end() or *m_pos++ != ')')
|
||||||
parse_error("unclosed parenthesis");
|
parse_error("unclosed parenthesis");
|
||||||
|
|
||||||
|
@ -264,7 +264,7 @@ private:
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<AstNodeIndex> atom()
|
Optional<NodeIndex> atom()
|
||||||
{
|
{
|
||||||
if (at_end())
|
if (at_end())
|
||||||
return {};
|
return {};
|
||||||
|
@ -277,7 +277,7 @@ private:
|
||||||
{
|
{
|
||||||
++m_pos;
|
++m_pos;
|
||||||
const bool capture = not accept("?:");
|
const bool capture = not accept("?:");
|
||||||
AstNodeIndex content = disjunction(capture ? m_parsed_regex.capture_count++ : -1);
|
NodeIndex content = disjunction(capture ? m_parsed_regex.capture_count++ : -1);
|
||||||
if (at_end() or *m_pos++ != ')')
|
if (at_end() or *m_pos++ != ')')
|
||||||
parse_error("unclosed parenthesis");
|
parse_error("unclosed parenthesis");
|
||||||
return content;
|
return content;
|
||||||
|
@ -298,7 +298,7 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AstNodeIndex atom_escape()
|
NodeIndex atom_escape()
|
||||||
{
|
{
|
||||||
const Codepoint cp = *m_pos++;
|
const Codepoint cp = *m_pos++;
|
||||||
|
|
||||||
|
@ -409,7 +409,7 @@ private:
|
||||||
ranges.erase(pos+1, ranges.end());
|
ranges.erase(pos+1, ranges.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
AstNodeIndex character_class()
|
NodeIndex character_class()
|
||||||
{
|
{
|
||||||
const bool negative = m_pos != m_regex.end() and *m_pos == '^';
|
const bool negative = m_pos != m_regex.end() and *m_pos == '^';
|
||||||
if (negative)
|
if (negative)
|
||||||
|
@ -580,21 +580,21 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AstNodeIndex new_node(ParsedRegex::Op op, Codepoint value = -1,
|
NodeIndex new_node(ParsedRegex::Op op, Codepoint value = -1,
|
||||||
ParsedRegex::Quantifier quantifier = {ParsedRegex::Quantifier::One})
|
ParsedRegex::Quantifier quantifier = {ParsedRegex::Quantifier::One})
|
||||||
{
|
{
|
||||||
constexpr auto max_nodes = std::numeric_limits<uint16_t>::max();
|
constexpr auto max_nodes = std::numeric_limits<uint16_t>::max();
|
||||||
const AstNodeIndex res = m_parsed_regex.nodes.size();
|
const NodeIndex res = m_parsed_regex.nodes.size();
|
||||||
if (res == max_nodes)
|
if (res == max_nodes)
|
||||||
parse_error(format("regex parsed to more than {} ast nodes", max_nodes));
|
parse_error(format("regex parsed to more than {} ast nodes", max_nodes));
|
||||||
const AstNodeIndex next = res+1;
|
const NodeIndex next = res+1;
|
||||||
m_parsed_regex.nodes.push_back({op, m_ignore_case, next, value, quantifier});
|
m_parsed_regex.nodes.push_back({op, m_ignore_case, next, value, quantifier});
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool at_end() const { return m_pos == m_regex.end(); }
|
bool at_end() const { return m_pos == m_regex.end(); }
|
||||||
|
|
||||||
ParsedRegex::AstNode& get_node(AstNodeIndex index)
|
ParsedRegex::Node& get_node(NodeIndex index)
|
||||||
{
|
{
|
||||||
return m_parsed_regex.nodes[index];
|
return m_parsed_regex.nodes[index];
|
||||||
}
|
}
|
||||||
|
@ -608,9 +608,9 @@ private:
|
||||||
StringView{m_pos.base(), m_regex.end()}));
|
StringView{m_pos.base(), m_regex.end()}));
|
||||||
}
|
}
|
||||||
|
|
||||||
void validate_lookaround(AstNodeIndex index)
|
void validate_lookaround(NodeIndex index)
|
||||||
{
|
{
|
||||||
for_each_child(m_parsed_regex, index, [this](AstNodeIndex child_index) {
|
for_each_child(m_parsed_regex, index, [this](NodeIndex child_index) {
|
||||||
auto& child = get_node(child_index);
|
auto& child = get_node(child_index);
|
||||||
if (child.op != ParsedRegex::Literal and child.op != ParsedRegex::Matcher and
|
if (child.op != ParsedRegex::Literal and child.op != ParsedRegex::Matcher and
|
||||||
child.op != ParsedRegex::AnyChar)
|
child.op != ParsedRegex::AnyChar)
|
||||||
|
@ -671,7 +671,7 @@ struct RegexCompiler
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
uint32_t compile_node_inner(ParsedRegex::AstNodeIndex index)
|
uint32_t compile_node_inner(ParsedRegex::NodeIndex index)
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
|
|
||||||
|
@ -701,11 +701,11 @@ private:
|
||||||
case ParsedRegex::Sequence:
|
case ParsedRegex::Sequence:
|
||||||
{
|
{
|
||||||
if (m_forward)
|
if (m_forward)
|
||||||
for_each_child(m_parsed_regex, index, [this](ParsedRegex::AstNodeIndex child) {
|
for_each_child(m_parsed_regex, index, [this](ParsedRegex::NodeIndex child) {
|
||||||
compile_node(child); return true;
|
compile_node(child); return true;
|
||||||
});
|
});
|
||||||
else
|
else
|
||||||
for_each_child_reverse(m_parsed_regex, index, [this](ParsedRegex::AstNodeIndex child) {
|
for_each_child_reverse(m_parsed_regex, index, [this](ParsedRegex::NodeIndex child) {
|
||||||
compile_node(child); return true;
|
compile_node(child); return true;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
@ -715,14 +715,14 @@ private:
|
||||||
//kak_assert(children.size() > 1);
|
//kak_assert(children.size() > 1);
|
||||||
|
|
||||||
auto split_pos = m_program.instructions.size();
|
auto split_pos = m_program.instructions.size();
|
||||||
for_each_child(m_parsed_regex, index, [this, index](ParsedRegex::AstNodeIndex child) {
|
for_each_child(m_parsed_regex, index, [this, index](ParsedRegex::NodeIndex child) {
|
||||||
if (child != index+1)
|
if (child != index+1)
|
||||||
push_inst(CompiledRegex::Split_PrioritizeParent);
|
push_inst(CompiledRegex::Split_PrioritizeParent);
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
|
||||||
for_each_child(m_parsed_regex, index,
|
for_each_child(m_parsed_regex, index,
|
||||||
[&, end = node.children_end](ParsedRegex::AstNodeIndex child) {
|
[&, end = node.children_end](ParsedRegex::NodeIndex child) {
|
||||||
auto node = compile_node(child);
|
auto node = compile_node(child);
|
||||||
if (child != index+1)
|
if (child != index+1)
|
||||||
m_program.instructions[split_pos++].param = node;
|
m_program.instructions[split_pos++].param = node;
|
||||||
|
@ -799,7 +799,7 @@ private:
|
||||||
return start_pos;
|
return start_pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t compile_node(ParsedRegex::AstNodeIndex index)
|
uint32_t compile_node(ParsedRegex::NodeIndex index)
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
|
|
||||||
|
@ -863,10 +863,10 @@ private:
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t push_lookaround(ParsedRegex::AstNodeIndex index, bool reversed, bool ignore_case)
|
uint32_t push_lookaround(ParsedRegex::NodeIndex index, bool reversed, bool ignore_case)
|
||||||
{
|
{
|
||||||
uint32_t res = m_program.lookarounds.size();
|
uint32_t res = m_program.lookarounds.size();
|
||||||
auto write_matcher = [this, ignore_case](ParsedRegex::AstNodeIndex child) {
|
auto write_matcher = [this, ignore_case](ParsedRegex::NodeIndex child) {
|
||||||
auto& character = get_node(child);
|
auto& character = get_node(child);
|
||||||
if (character.op == ParsedRegex::Literal)
|
if (character.op == ParsedRegex::Literal)
|
||||||
m_program.lookarounds.push_back(ignore_case ? to_lower(character.value)
|
m_program.lookarounds.push_back(ignore_case ? to_lower(character.value)
|
||||||
|
@ -892,7 +892,7 @@ private:
|
||||||
// Fills accepted and rejected according to which chars can start the given node,
|
// Fills accepted and rejected according to which chars can start the given node,
|
||||||
// returns true if the node did not consume the char, hence a following node in
|
// returns true if the node did not consume the char, hence a following node in
|
||||||
// sequence would be still relevant for the parent node start chars computation.
|
// sequence would be still relevant for the parent node start chars computation.
|
||||||
bool compute_start_chars(ParsedRegex::AstNodeIndex index,
|
bool compute_start_chars(ParsedRegex::NodeIndex index,
|
||||||
CompiledRegex::StartChars& start_chars) const
|
CompiledRegex::StartChars& start_chars) const
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
|
@ -939,7 +939,7 @@ private:
|
||||||
case ParsedRegex::Alternation:
|
case ParsedRegex::Alternation:
|
||||||
{
|
{
|
||||||
bool all_consumed = not node.quantifier.allows_none();
|
bool all_consumed = not node.quantifier.allows_none();
|
||||||
for_each_child(m_parsed_regex, index, [&](ParsedRegex::AstNodeIndex child) {
|
for_each_child(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) {
|
||||||
if (compute_start_chars(child, start_chars))
|
if (compute_start_chars(child, start_chars))
|
||||||
all_consumed = false;
|
all_consumed = false;
|
||||||
return true;
|
return true;
|
||||||
|
@ -975,7 +975,7 @@ private:
|
||||||
return std::make_unique<CompiledRegex::StartChars>(start_chars);
|
return std::make_unique<CompiledRegex::StartChars>(start_chars);
|
||||||
}
|
}
|
||||||
|
|
||||||
const ParsedRegex::AstNode& get_node(ParsedRegex::AstNodeIndex index) const
|
const ParsedRegex::Node& get_node(ParsedRegex::NodeIndex index) const
|
||||||
{
|
{
|
||||||
return m_parsed_regex.nodes[index];
|
return m_parsed_regex.nodes[index];
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user