Regex: compute start chars from matchers, do not compute it from lookarounds

Computing potential start characters from lookarounds is more complex
than expected, and not worth the complexity.
This commit is contained in:
Maxime Coste 2017-10-14 16:50:49 +08:00
parent 621b0d3ab8
commit 6e65589a34

View File

@ -9,6 +9,8 @@
#include "string_utils.hh" #include "string_utils.hh"
#include "vector.hh" #include "vector.hh"
#include <cstring>
namespace Kakoune namespace Kakoune
{ {
@ -763,8 +765,7 @@ private:
// returns true if the node did not consume the char, hence a following node in // returns true if the node did not consume the char, hence a following node in
// sequence would be still relevant for the parent node start chars computation. // sequence would be still relevant for the parent node start chars computation.
bool compute_start_chars(const ParsedRegex::AstNodePtr& node, bool compute_start_chars(const ParsedRegex::AstNodePtr& node,
bool (&accepted)[start_chars_count], bool (&accepted)[start_chars_count]) const
bool (&rejected)[start_chars_count]) const
{ {
switch (node->op) switch (node->op)
{ {
@ -785,14 +786,15 @@ private:
b = true; b = true;
return node->quantifier.allows_none(); return node->quantifier.allows_none();
case ParsedRegex::Matcher: case ParsedRegex::Matcher:
for (auto& b : accepted) // treat matcher as everything can match for now for (Codepoint c = 0; c < start_chars_count; ++c)
b = true; if (m_program.matchers[node->value](c))
accepted[c] = true;
return node->quantifier.allows_none(); return node->quantifier.allows_none();
case ParsedRegex::Sequence: case ParsedRegex::Sequence:
{ {
bool consumed = false; bool consumed = false;
auto consumes = [&, this](auto& child) { auto consumes = [&, this](auto& child) {
return not this->compute_start_chars(child, accepted, rejected); return not this->compute_start_chars(child, accepted);
}; };
if (m_forward) if (m_forward)
consumed = contains_that(node->children, consumes); consumed = contains_that(node->children, consumes);
@ -806,7 +808,7 @@ private:
bool all_consumed = not node->quantifier.allows_none(); bool all_consumed = not node->quantifier.allows_none();
for (auto& child : node->children) for (auto& child : node->children)
{ {
if (compute_start_chars(child, accepted, rejected)) if (compute_start_chars(child, accepted))
all_consumed = false; all_consumed = false;
} }
return not all_consumed; return not all_consumed;
@ -818,42 +820,10 @@ private:
case ParsedRegex::SubjectBegin: case ParsedRegex::SubjectBegin:
case ParsedRegex::SubjectEnd: case ParsedRegex::SubjectEnd:
case ParsedRegex::ResetStart: case ParsedRegex::ResetStart:
return true;
case ParsedRegex::LookAhead: case ParsedRegex::LookAhead:
case ParsedRegex::LookBehind: case ParsedRegex::LookBehind:
if (not node->children.empty() and
m_forward == (node->op == ParsedRegex::LookAhead))
{
auto& child = m_forward ? node->children.front() : node->children.back();
if (child->op == ParsedRegex::Literal and child->value < start_chars_count)
{
// Every other char is rejected
for (Codepoint c = 0; c < start_chars_count; ++c)
{
if ((not child->ignore_case and c != child->value) or
(c != to_lower(child->value) and c != to_upper(child->value)))
rejected[c] = true;
}
}
}
return true;
case ParsedRegex::NegativeLookAhead: case ParsedRegex::NegativeLookAhead:
case ParsedRegex::NegativeLookBehind: case ParsedRegex::NegativeLookBehind:
if (node->children.size() == 1 and
m_forward == (node->op == ParsedRegex::NegativeLookAhead))
{
auto& child = node->children.front();
if (child->op == ParsedRegex::Literal and child->value < start_chars_count)
{
if (child->ignore_case)
{
rejected[to_lower(child->value)] = true;
rejected[to_upper(child->value)] = true;
}
else
rejected[child->value] = true;
}
}
return true; return true;
} }
return false; return false;
@ -863,17 +833,14 @@ private:
std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const
{ {
bool accepted[start_chars_count] = {}; bool accepted[start_chars_count] = {};
bool rejected[start_chars_count] = {}; if (compute_start_chars(m_parsed_regex.ast, accepted))
if (compute_start_chars(m_parsed_regex.ast, accepted, rejected))
return nullptr; return nullptr;
if (not contains(accepted, false) and not contains(rejected, true)) if (not contains(accepted, false))
return nullptr; return nullptr;
auto start_chars = std::make_unique<CompiledRegex::StartChars>(); auto start_chars = std::make_unique<CompiledRegex::StartChars>();
for (int i = 0; i < start_chars_count; ++i) memcpy(start_chars->map, accepted, sizeof(bool[start_chars_count]));
start_chars->map[i] = accepted[i] and not rejected[i];
return start_chars; return start_chars;
} }