small regex impl code style tweak
This commit is contained in:
parent
08d2a8ba63
commit
16493a99bb
|
@ -179,11 +179,10 @@ private:
|
||||||
|
|
||||||
NodeIndex disjunction(uint32_t capture = -1)
|
NodeIndex disjunction(uint32_t capture = -1)
|
||||||
{
|
{
|
||||||
NodeIndex index = new_node(ParsedRegex::Alternation);
|
NodeIndex index = add_node(ParsedRegex::Alternation, capture);
|
||||||
get_node(index).value = capture;
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
alternative();
|
alternative(ParsedRegex::Sequence);
|
||||||
if (at_end() or *m_pos != '|')
|
if (at_end() or *m_pos != '|')
|
||||||
break;
|
break;
|
||||||
++m_pos;
|
++m_pos;
|
||||||
|
@ -193,10 +192,10 @@ private:
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
NodeIndex alternative(ParsedRegex::Op op = ParsedRegex::Sequence)
|
NodeIndex alternative(ParsedRegex::Op op)
|
||||||
{
|
{
|
||||||
NodeIndex index = new_node(op);
|
NodeIndex index = add_node(op);
|
||||||
while (auto t = term())
|
while (term())
|
||||||
{}
|
{}
|
||||||
get_node(index).children_end = m_parsed_regex.nodes.size();
|
get_node(index).children_end = m_parsed_regex.nodes.size();
|
||||||
|
|
||||||
|
@ -248,18 +247,18 @@ private:
|
||||||
|
|
||||||
switch (*m_pos)
|
switch (*m_pos)
|
||||||
{
|
{
|
||||||
case '^': ++m_pos; return new_node(ParsedRegex::LineStart);
|
case '^': ++m_pos; return add_node(ParsedRegex::LineStart);
|
||||||
case '$': ++m_pos; return new_node(ParsedRegex::LineEnd);
|
case '$': ++m_pos; return add_node(ParsedRegex::LineEnd);
|
||||||
case '\\':
|
case '\\':
|
||||||
if (m_pos+1 == m_regex.end())
|
if (m_pos+1 == m_regex.end())
|
||||||
return {};
|
return {};
|
||||||
switch (*(m_pos+1))
|
switch (*(m_pos+1))
|
||||||
{
|
{
|
||||||
case 'b': m_pos += 2; return new_node(ParsedRegex::WordBoundary);
|
case 'b': m_pos += 2; return add_node(ParsedRegex::WordBoundary);
|
||||||
case 'B': m_pos += 2; return new_node(ParsedRegex::NotWordBoundary);
|
case 'B': m_pos += 2; return add_node(ParsedRegex::NotWordBoundary);
|
||||||
case 'A': m_pos += 2; return new_node(ParsedRegex::SubjectBegin);
|
case 'A': m_pos += 2; return add_node(ParsedRegex::SubjectBegin);
|
||||||
case 'z': m_pos += 2; return new_node(ParsedRegex::SubjectEnd);
|
case 'z': m_pos += 2; return add_node(ParsedRegex::SubjectEnd);
|
||||||
case 'K': m_pos += 2; return new_node(ParsedRegex::ResetStart);
|
case 'K': m_pos += 2; return add_node(ParsedRegex::ResetStart);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case '(':
|
case '(':
|
||||||
|
@ -306,10 +305,7 @@ private:
|
||||||
{
|
{
|
||||||
case '.':
|
case '.':
|
||||||
++m_pos;
|
++m_pos;
|
||||||
if (m_flags & Flags::DotMatchesNewLine)
|
return add_node((m_flags & Flags::DotMatchesNewLine) ? ParsedRegex::AnyChar : ParsedRegex::AnyCharExceptNewLine);
|
||||||
return new_node(ParsedRegex::AnyChar);
|
|
||||||
else
|
|
||||||
return new_node(ParsedRegex::AnyCharExceptNewLine);
|
|
||||||
case '(':
|
case '(':
|
||||||
{
|
{
|
||||||
uint32_t capture_group = -1;
|
uint32_t capture_group = -1;
|
||||||
|
@ -347,7 +343,7 @@ private:
|
||||||
if (contains("^$.*+?[]{}", cp) or (cp >= 0xF0000 and cp <= 0xFFFFF))
|
if (contains("^$.*+?[]{}", cp) or (cp >= 0xF0000 and cp <= 0xFFFFF))
|
||||||
parse_error(format("unexpected '{}'", cp));
|
parse_error(format("unexpected '{}'", cp));
|
||||||
++m_pos;
|
++m_pos;
|
||||||
return new_node(ParsedRegex::Literal, cp);
|
return add_node(ParsedRegex::Literal, cp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -380,12 +376,12 @@ private:
|
||||||
|
|
||||||
if (cp == 'Q')
|
if (cp == 'Q')
|
||||||
{
|
{
|
||||||
auto escaped_sequence = new_node(ParsedRegex::Sequence);
|
auto escaped_sequence = add_node(ParsedRegex::Sequence);
|
||||||
constexpr StringView end_mark{"\\E"};
|
constexpr StringView end_mark{"\\E"};
|
||||||
|
|
||||||
auto quote_end = std::search(m_pos.base(), m_regex.end(), end_mark.begin(), end_mark.end());
|
auto quote_end = std::search(m_pos.base(), m_regex.end(), end_mark.begin(), end_mark.end());
|
||||||
while (m_pos != quote_end)
|
while (m_pos != quote_end)
|
||||||
new_node(ParsedRegex::Literal, *m_pos++);
|
add_node(ParsedRegex::Literal, *m_pos++);
|
||||||
get_node(escaped_sequence).children_end = m_parsed_regex.nodes.size();
|
get_node(escaped_sequence).children_end = m_parsed_regex.nodes.size();
|
||||||
|
|
||||||
if (quote_end != m_regex.end())
|
if (quote_end != m_regex.end())
|
||||||
|
@ -397,33 +393,33 @@ private:
|
||||||
// CharacterClassEscape
|
// CharacterClassEscape
|
||||||
auto class_it = find_if(character_class_escapes, [cp](auto& c) { return c.cp == cp; });
|
auto class_it = find_if(character_class_escapes, [cp](auto& c) { return c.cp == cp; });
|
||||||
if (class_it != std::end(character_class_escapes))
|
if (class_it != std::end(character_class_escapes))
|
||||||
return new_node(ParsedRegex::CharType, (Codepoint)class_it->ctype);
|
return add_node(ParsedRegex::CharType, (Codepoint)class_it->ctype);
|
||||||
|
|
||||||
// CharacterEscape
|
// CharacterEscape
|
||||||
for (auto& control : control_escapes)
|
for (auto& control : control_escapes)
|
||||||
{
|
{
|
||||||
if (control.name == cp)
|
if (control.name == cp)
|
||||||
return new_node(ParsedRegex::Literal, control.value);
|
return add_node(ParsedRegex::Literal, control.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cp == '0')
|
if (cp == '0')
|
||||||
return new_node(ParsedRegex::Literal, '\0');
|
return add_node(ParsedRegex::Literal, '\0');
|
||||||
else if (cp == 'c')
|
else if (cp == 'c')
|
||||||
{
|
{
|
||||||
if (at_end())
|
if (at_end())
|
||||||
parse_error("unterminated control escape");
|
parse_error("unterminated control escape");
|
||||||
Codepoint ctrl = *m_pos++;
|
Codepoint ctrl = *m_pos++;
|
||||||
if (('a' <= ctrl and ctrl <= 'z') or ('A' <= ctrl and ctrl <= 'Z'))
|
if (('a' <= ctrl and ctrl <= 'z') or ('A' <= ctrl and ctrl <= 'Z'))
|
||||||
return new_node(ParsedRegex::Literal, ctrl % 32);
|
return add_node(ParsedRegex::Literal, ctrl % 32);
|
||||||
parse_error(format("Invalid control escape character '{}'", ctrl));
|
parse_error(format("Invalid control escape character '{}'", ctrl));
|
||||||
}
|
}
|
||||||
else if (cp == 'x')
|
else if (cp == 'x')
|
||||||
return new_node(ParsedRegex::Literal, read_hex(2));
|
return add_node(ParsedRegex::Literal, read_hex(2));
|
||||||
else if (cp == 'u')
|
else if (cp == 'u')
|
||||||
return new_node(ParsedRegex::Literal, read_hex(6));
|
return add_node(ParsedRegex::Literal, read_hex(6));
|
||||||
|
|
||||||
if (contains("^$\\.*+?()[]{}|", cp)) // SyntaxCharacter
|
if (contains("^$\\.*+?()[]{}|", cp)) // SyntaxCharacter
|
||||||
return new_node(ParsedRegex::Literal, cp);
|
return add_node(ParsedRegex::Literal, cp);
|
||||||
parse_error(format("unknown atom escape '{}'", cp));
|
parse_error(format("unknown atom escape '{}'", cp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -542,16 +538,16 @@ private:
|
||||||
if (character_class.ctypes == CharacterType::None and not character_class.negative and
|
if (character_class.ctypes == CharacterType::None and not character_class.negative and
|
||||||
character_class.ranges.size() == 1 and
|
character_class.ranges.size() == 1 and
|
||||||
character_class.ranges.front().min == character_class.ranges.front().max)
|
character_class.ranges.front().min == character_class.ranges.front().max)
|
||||||
return new_node(ParsedRegex::Literal, character_class.ranges.front().min);
|
return add_node(ParsedRegex::Literal, character_class.ranges.front().min);
|
||||||
|
|
||||||
if (character_class.ctypes != CharacterType::None and not character_class.negative and
|
if (character_class.ctypes != CharacterType::None and not character_class.negative and
|
||||||
character_class.ranges.empty())
|
character_class.ranges.empty())
|
||||||
return new_node(ParsedRegex::CharType, (Codepoint)character_class.ctypes);
|
return add_node(ParsedRegex::CharType, (Codepoint)character_class.ctypes);
|
||||||
|
|
||||||
auto class_id = m_parsed_regex.character_classes.size();
|
auto class_id = m_parsed_regex.character_classes.size();
|
||||||
m_parsed_regex.character_classes.push_back(std::move(character_class));
|
m_parsed_regex.character_classes.push_back(std::move(character_class));
|
||||||
|
|
||||||
return new_node(ParsedRegex::CharClass, class_id);
|
return add_node(ParsedRegex::CharClass, class_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
ParsedRegex::Quantifier quantifier()
|
ParsedRegex::Quantifier quantifier()
|
||||||
|
@ -604,8 +600,7 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NodeIndex new_node(ParsedRegex::Op op, Codepoint value = -1,
|
NodeIndex add_node(ParsedRegex::Op op, Codepoint value = -1, ParsedRegex::Quantifier quantifier = {ParsedRegex::Quantifier::One})
|
||||||
ParsedRegex::Quantifier quantifier = {ParsedRegex::Quantifier::One})
|
|
||||||
{
|
{
|
||||||
constexpr auto max_nodes = std::numeric_limits<int16_t>::max();
|
constexpr auto max_nodes = std::numeric_limits<int16_t>::max();
|
||||||
const NodeIndex res = m_parsed_regex.nodes.size();
|
const NodeIndex res = m_parsed_regex.nodes.size();
|
||||||
|
@ -616,13 +611,12 @@ private:
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool at_end() const { return m_pos == m_regex.end(); }
|
|
||||||
|
|
||||||
ParsedRegex::Node& get_node(NodeIndex index)
|
ParsedRegex::Node& get_node(NodeIndex index)
|
||||||
{
|
{
|
||||||
return m_parsed_regex.nodes[index];
|
return m_parsed_regex.nodes[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool at_end() const { return m_pos == m_regex.end(); }
|
||||||
|
|
||||||
[[gnu::noreturn]]
|
[[gnu::noreturn]]
|
||||||
void parse_error(StringView error) const
|
void parse_error(StringView error) const
|
||||||
|
|
Loading…
Reference in New Issue
Block a user