Regex: compute start chars from matchers, do not compute it from lookarounds
Computing potential start characters from lookarounds is more complex than expected, and not worth the complexity.
This commit is contained in:
parent
621b0d3ab8
commit
6e65589a34
|
@ -9,6 +9,8 @@
|
||||||
#include "string_utils.hh"
|
#include "string_utils.hh"
|
||||||
#include "vector.hh"
|
#include "vector.hh"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -763,8 +765,7 @@ private:
|
||||||
// returns true if the node did not consume the char, hence a following node in
|
// returns true if the node did not consume the char, hence a following node in
|
||||||
// sequence would be still relevant for the parent node start chars computation.
|
// sequence would be still relevant for the parent node start chars computation.
|
||||||
bool compute_start_chars(const ParsedRegex::AstNodePtr& node,
|
bool compute_start_chars(const ParsedRegex::AstNodePtr& node,
|
||||||
bool (&accepted)[start_chars_count],
|
bool (&accepted)[start_chars_count]) const
|
||||||
bool (&rejected)[start_chars_count]) const
|
|
||||||
{
|
{
|
||||||
switch (node->op)
|
switch (node->op)
|
||||||
{
|
{
|
||||||
|
@ -785,14 +786,15 @@ private:
|
||||||
b = true;
|
b = true;
|
||||||
return node->quantifier.allows_none();
|
return node->quantifier.allows_none();
|
||||||
case ParsedRegex::Matcher:
|
case ParsedRegex::Matcher:
|
||||||
for (auto& b : accepted) // treat matcher as everything can match for now
|
for (Codepoint c = 0; c < start_chars_count; ++c)
|
||||||
b = true;
|
if (m_program.matchers[node->value](c))
|
||||||
|
accepted[c] = true;
|
||||||
return node->quantifier.allows_none();
|
return node->quantifier.allows_none();
|
||||||
case ParsedRegex::Sequence:
|
case ParsedRegex::Sequence:
|
||||||
{
|
{
|
||||||
bool consumed = false;
|
bool consumed = false;
|
||||||
auto consumes = [&, this](auto& child) {
|
auto consumes = [&, this](auto& child) {
|
||||||
return not this->compute_start_chars(child, accepted, rejected);
|
return not this->compute_start_chars(child, accepted);
|
||||||
};
|
};
|
||||||
if (m_forward)
|
if (m_forward)
|
||||||
consumed = contains_that(node->children, consumes);
|
consumed = contains_that(node->children, consumes);
|
||||||
|
@ -806,7 +808,7 @@ private:
|
||||||
bool all_consumed = not node->quantifier.allows_none();
|
bool all_consumed = not node->quantifier.allows_none();
|
||||||
for (auto& child : node->children)
|
for (auto& child : node->children)
|
||||||
{
|
{
|
||||||
if (compute_start_chars(child, accepted, rejected))
|
if (compute_start_chars(child, accepted))
|
||||||
all_consumed = false;
|
all_consumed = false;
|
||||||
}
|
}
|
||||||
return not all_consumed;
|
return not all_consumed;
|
||||||
|
@ -818,42 +820,10 @@ private:
|
||||||
case ParsedRegex::SubjectBegin:
|
case ParsedRegex::SubjectBegin:
|
||||||
case ParsedRegex::SubjectEnd:
|
case ParsedRegex::SubjectEnd:
|
||||||
case ParsedRegex::ResetStart:
|
case ParsedRegex::ResetStart:
|
||||||
return true;
|
|
||||||
case ParsedRegex::LookAhead:
|
case ParsedRegex::LookAhead:
|
||||||
case ParsedRegex::LookBehind:
|
case ParsedRegex::LookBehind:
|
||||||
if (not node->children.empty() and
|
|
||||||
m_forward == (node->op == ParsedRegex::LookAhead))
|
|
||||||
{
|
|
||||||
auto& child = m_forward ? node->children.front() : node->children.back();
|
|
||||||
if (child->op == ParsedRegex::Literal and child->value < start_chars_count)
|
|
||||||
{
|
|
||||||
// Every other char is rejected
|
|
||||||
for (Codepoint c = 0; c < start_chars_count; ++c)
|
|
||||||
{
|
|
||||||
if ((not child->ignore_case and c != child->value) or
|
|
||||||
(c != to_lower(child->value) and c != to_upper(child->value)))
|
|
||||||
rejected[c] = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
case ParsedRegex::NegativeLookAhead:
|
case ParsedRegex::NegativeLookAhead:
|
||||||
case ParsedRegex::NegativeLookBehind:
|
case ParsedRegex::NegativeLookBehind:
|
||||||
if (node->children.size() == 1 and
|
|
||||||
m_forward == (node->op == ParsedRegex::NegativeLookAhead))
|
|
||||||
{
|
|
||||||
auto& child = node->children.front();
|
|
||||||
if (child->op == ParsedRegex::Literal and child->value < start_chars_count)
|
|
||||||
{
|
|
||||||
if (child->ignore_case)
|
|
||||||
{
|
|
||||||
rejected[to_lower(child->value)] = true;
|
|
||||||
rejected[to_upper(child->value)] = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
rejected[child->value] = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -863,17 +833,14 @@ private:
|
||||||
std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const
|
std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const
|
||||||
{
|
{
|
||||||
bool accepted[start_chars_count] = {};
|
bool accepted[start_chars_count] = {};
|
||||||
bool rejected[start_chars_count] = {};
|
if (compute_start_chars(m_parsed_regex.ast, accepted))
|
||||||
if (compute_start_chars(m_parsed_regex.ast, accepted, rejected))
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
if (not contains(accepted, false) and not contains(rejected, true))
|
if (not contains(accepted, false))
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
auto start_chars = std::make_unique<CompiledRegex::StartChars>();
|
auto start_chars = std::make_unique<CompiledRegex::StartChars>();
|
||||||
for (int i = 0; i < start_chars_count; ++i)
|
memcpy(start_chars->map, accepted, sizeof(bool[start_chars_count]));
|
||||||
start_chars->map[i] = accepted[i] and not rejected[i];
|
|
||||||
|
|
||||||
return start_chars;
|
return start_chars;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user