Regex: optimize parsing a bit

This commit is contained in:
Maxime Coste 2017-11-30 14:32:29 +08:00
parent c1f0efa3f4
commit b91f43b031

View File

@ -189,28 +189,16 @@ private:
return {}; return {};
} }
bool accept(StringView expected)
{
auto it = m_pos.base();
for (auto expected_it = expected.begin(); expected_it != expected.end(); ++expected_it)
{
if (it == m_regex.end() or *it++ != *expected_it)
return false;
}
m_pos = Iterator{it, m_regex};
return true;
}
bool modifiers() bool modifiers()
{ {
if (accept("(?i)")) auto it = m_pos.base();
if (m_regex.end() - it >= 4 and *it++ == '(' and *it++ == '?')
{ {
m_ignore_case = true; auto m = *it++;
return true; if ((m != 'i' and m != 'I') or *it++ != ')')
} return false;
if (accept("(?I)")) m_ignore_case = (m == 'i');
{ m_pos = Iterator{it, m_regex};
m_ignore_case = false;
return true; return true;
} }
return false; return false;
@ -239,25 +227,29 @@ private:
break; break;
case '(': case '(':
{ {
Optional<ParsedRegex::Op> lookaround_op; auto it = m_pos.base()+1;
constexpr struct { StringView prefix; ParsedRegex::Op op; } lookarounds[] = { if (m_regex.end() - it <= 2 or *it++ != '?')
{ "(?=", ParsedRegex::LookAhead },
{ "(?!", ParsedRegex::NegativeLookAhead },
{ "(?<=", ParsedRegex::LookBehind },
{ "(?<!", ParsedRegex::NegativeLookBehind }
};
for (auto& lookaround : lookarounds)
{
if (accept(lookaround.prefix))
{
lookaround_op = lookaround.op;
break;
}
}
if (not lookaround_op)
return {}; return {};
NodeIndex lookaround = alternative(*lookaround_op); ParsedRegex::Op op;
switch (*it++)
{
case '=': op = ParsedRegex::LookAhead; break;
case '!': op = ParsedRegex::NegativeLookAhead; break;
case '<':
{
switch (*it++)
{
case '=': op = ParsedRegex::LookBehind; break;
case '!': op = ParsedRegex::NegativeLookBehind; break;
default: return {};
}
break;
}
default: return {};
}
m_pos = Iterator{it, m_regex};
NodeIndex lookaround = alternative(op);
if (at_end() or *m_pos++ != ')') if (at_end() or *m_pos++ != ')')
parse_error("unclosed parenthesis"); parse_error("unclosed parenthesis");
@ -273,15 +265,20 @@ private:
if (at_end()) if (at_end())
return {}; return {};
const Codepoint cp = *m_pos; switch (const Codepoint cp = *m_pos)
switch (cp)
{ {
case '.': ++m_pos; return new_node(ParsedRegex::AnyChar); case '.': ++m_pos; return new_node(ParsedRegex::AnyChar);
case '(': case '(':
{ {
++m_pos; auto captures = [this, it = (++m_pos).base()]() mutable {
const bool capture = not accept("?:"); if (m_regex.end() - it >= 2 and *it++ == '?' and *it++ == ':')
NodeIndex content = disjunction(capture ? m_parsed_regex.capture_count++ : -1); {
m_pos = Iterator{it, m_regex};
return false;
}
return true;
};
NodeIndex content = disjunction(captures() ? m_parsed_regex.capture_count++ : -1);
if (at_end() or *m_pos++ != ')') if (at_end() or *m_pos++ != ')')
parse_error("unclosed parenthesis"); parse_error("unclosed parenthesis");
return content; return content;