Regex: rename StartChars to StartDesc
It only contains chars for now, but its still more generally describing where matches can start.
This commit is contained in:
parent
b91f43b031
commit
65b057f261
|
@ -14,7 +14,7 @@
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
constexpr Codepoint CompiledRegex::StartChars::other;
|
constexpr Codepoint CompiledRegex::StartDesc::other;
|
||||||
|
|
||||||
struct ParsedRegex
|
struct ParsedRegex
|
||||||
{
|
{
|
||||||
|
@ -623,7 +623,7 @@ struct RegexCompiler
|
||||||
{
|
{
|
||||||
// Approximation of the number of instructions generated
|
// Approximation of the number of instructions generated
|
||||||
m_program.instructions.reserve(CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1);
|
m_program.instructions.reserve(CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1);
|
||||||
m_program.start_chars = compute_start_chars();
|
m_program.start_desc = compute_start_desc();
|
||||||
|
|
||||||
write_search_prefix();
|
write_search_prefix();
|
||||||
compile_node(0);
|
compile_node(0);
|
||||||
|
@ -861,28 +861,28 @@ private:
|
||||||
// Fills accepted and rejected according to which chars can start the given node,
|
// Fills accepted and rejected according to which chars can start the given node,
|
||||||
// returns true if the node did not consume the char, hence a following node in
|
// returns true if the node did not consume the char, hence a following node in
|
||||||
// sequence would be still relevant for the parent node start chars computation.
|
// sequence would be still relevant for the parent node start chars computation.
|
||||||
bool compute_start_chars(ParsedRegex::NodeIndex index,
|
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
||||||
CompiledRegex::StartChars& start_chars) const
|
CompiledRegex::StartDesc& start_desc) const
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
switch (node.op)
|
switch (node.op)
|
||||||
{
|
{
|
||||||
case ParsedRegex::Literal:
|
case ParsedRegex::Literal:
|
||||||
if (node.value < CompiledRegex::StartChars::count)
|
if (node.value < CompiledRegex::StartDesc::count)
|
||||||
{
|
{
|
||||||
if (node.ignore_case)
|
if (node.ignore_case)
|
||||||
{
|
{
|
||||||
start_chars.map[to_lower(node.value)] = true;
|
start_desc.map[to_lower(node.value)] = true;
|
||||||
start_chars.map[to_upper(node.value)] = true;
|
start_desc.map[to_upper(node.value)] = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
start_chars.map[node.value] = true;
|
start_desc.map[node.value] = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
start_chars.map[CompiledRegex::StartChars::other] = true;
|
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
||||||
return node.quantifier.allows_none();
|
return node.quantifier.allows_none();
|
||||||
case ParsedRegex::AnyChar:
|
case ParsedRegex::AnyChar:
|
||||||
for (auto& b : start_chars.map)
|
for (auto& b : start_desc.map)
|
||||||
b = true;
|
b = true;
|
||||||
return node.quantifier.allows_none();
|
return node.quantifier.allows_none();
|
||||||
case ParsedRegex::Class:
|
case ParsedRegex::Class:
|
||||||
|
@ -892,39 +892,39 @@ private:
|
||||||
{
|
{
|
||||||
for (auto& range : character_class.ranges)
|
for (auto& range : character_class.ranges)
|
||||||
{
|
{
|
||||||
auto min = std::min(CompiledRegex::StartChars::other, range.min);
|
auto min = std::min(CompiledRegex::StartDesc::other, range.min);
|
||||||
auto max = std::min(CompiledRegex::StartChars::other, range.max);
|
auto max = std::min(CompiledRegex::StartDesc::other, range.max);
|
||||||
for (Codepoint cp = min; cp <= max; ++cp)
|
for (Codepoint cp = min; cp <= max; ++cp)
|
||||||
start_chars.map[cp] = true;
|
start_desc.map[cp] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (Codepoint cp = 0; cp < CompiledRegex::StartChars::other; ++cp)
|
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
|
||||||
{
|
{
|
||||||
if (start_chars.map[cp] or is_character_class(character_class, cp))
|
if (start_desc.map[cp] or is_character_class(character_class, cp))
|
||||||
start_chars.map[cp] = true;
|
start_desc.map[cp] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
start_chars.map[CompiledRegex::StartChars::other] = true;
|
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
||||||
return node.quantifier.allows_none();
|
return node.quantifier.allows_none();
|
||||||
}
|
}
|
||||||
case ParsedRegex::CharacterType:
|
case ParsedRegex::CharacterType:
|
||||||
{
|
{
|
||||||
const CharacterType ctype = (CharacterType)node.value;
|
const CharacterType ctype = (CharacterType)node.value;
|
||||||
for (Codepoint cp = 0; cp < CompiledRegex::StartChars::other; ++cp)
|
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
|
||||||
{
|
{
|
||||||
if (is_ctype(ctype, cp))
|
if (is_ctype(ctype, cp))
|
||||||
start_chars.map[cp] = true;
|
start_desc.map[cp] = true;
|
||||||
}
|
}
|
||||||
start_chars.map[CompiledRegex::StartChars::other] = true;
|
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
||||||
return node.quantifier.allows_none();
|
return node.quantifier.allows_none();
|
||||||
}
|
}
|
||||||
case ParsedRegex::Sequence:
|
case ParsedRegex::Sequence:
|
||||||
{
|
{
|
||||||
bool did_not_consume = false;
|
bool did_not_consume = false;
|
||||||
auto does_not_consume = [&, this](auto child) {
|
auto does_not_consume = [&, this](auto child) {
|
||||||
return this->compute_start_chars(child, start_chars);
|
return this->compute_start_desc(child, start_desc);
|
||||||
};
|
};
|
||||||
if (m_forward)
|
if (m_forward)
|
||||||
did_not_consume = for_each_child(m_parsed_regex, index, does_not_consume);
|
did_not_consume = for_each_child(m_parsed_regex, index, does_not_consume);
|
||||||
|
@ -937,7 +937,7 @@ private:
|
||||||
{
|
{
|
||||||
bool all_consumed = not node.quantifier.allows_none();
|
bool all_consumed = not node.quantifier.allows_none();
|
||||||
for_each_child(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) {
|
for_each_child(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) {
|
||||||
if (compute_start_chars(child, start_chars))
|
if (compute_start_desc(child, start_desc))
|
||||||
all_consumed = false;
|
all_consumed = false;
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
@ -960,14 +960,14 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
[[gnu::noinline]]
|
[[gnu::noinline]]
|
||||||
std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const
|
std::unique_ptr<CompiledRegex::StartDesc> compute_start_desc() const
|
||||||
{
|
{
|
||||||
CompiledRegex::StartChars start_chars{};
|
CompiledRegex::StartDesc start_desc{};
|
||||||
if (compute_start_chars(0, start_chars) or
|
if (compute_start_desc(0, start_desc) or
|
||||||
not contains(start_chars.map, false))
|
not contains(start_desc.map, false))
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
return std::make_unique<CompiledRegex::StartChars>(start_chars);
|
return std::make_unique<CompiledRegex::StartDesc>(start_desc);
|
||||||
}
|
}
|
||||||
|
|
||||||
const ParsedRegex::Node& get_node(ParsedRegex::NodeIndex index) const
|
const ParsedRegex::Node& get_node(ParsedRegex::NodeIndex index) const
|
||||||
|
|
|
@ -101,14 +101,14 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
||||||
MatchDirection direction;
|
MatchDirection direction;
|
||||||
size_t save_count;
|
size_t save_count;
|
||||||
|
|
||||||
struct StartChars
|
struct StartDesc
|
||||||
{
|
{
|
||||||
static constexpr size_t count = 256;
|
static constexpr size_t count = 256;
|
||||||
static constexpr Codepoint other = 256;
|
static constexpr Codepoint other = 256;
|
||||||
bool map[count+1];
|
bool map[count+1];
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<StartChars> start_chars;
|
std::unique_ptr<StartDesc> start_desc;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class RegexCompileFlags
|
enum class RegexCompileFlags
|
||||||
|
@ -183,8 +183,8 @@ public:
|
||||||
|
|
||||||
const bool search = (flags & RegexExecFlags::Search);
|
const bool search = (flags & RegexExecFlags::Search);
|
||||||
Utf8It start{m_begin};
|
Utf8It start{m_begin};
|
||||||
if (search)
|
if (search and m_program.start_desc)
|
||||||
to_next_start(start, m_end, m_program.start_chars.get());
|
to_next_start(start, m_end, *m_program.start_desc);
|
||||||
|
|
||||||
return exec_program(start, Thread{&m_program.instructions[search ? 0 : CompiledRegex::search_prefix_size], nullptr});
|
return exec_program(start, Thread{&m_program.instructions[search ? 0 : CompiledRegex::search_prefix_size], nullptr});
|
||||||
}
|
}
|
||||||
|
@ -460,19 +460,16 @@ private:
|
||||||
std::reverse(state.current_threads.begin(), state.current_threads.end());
|
std::reverse(state.current_threads.begin(), state.current_threads.end());
|
||||||
++pos;
|
++pos;
|
||||||
|
|
||||||
if (find_next_start)
|
if (find_next_start and m_program.start_desc)
|
||||||
to_next_start(pos, m_end, m_program.start_chars.get());
|
to_next_start(pos, m_end, *m_program.start_desc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void to_next_start(Utf8It& start, const Utf8It& end,
|
void to_next_start(Utf8It& start, const Utf8It& end,
|
||||||
const CompiledRegex::StartChars* start_chars)
|
const CompiledRegex::StartDesc& start_desc)
|
||||||
{
|
{
|
||||||
if (not start_chars)
|
|
||||||
return;
|
|
||||||
|
|
||||||
while (start != end and *start >= 0 and
|
while (start != end and *start >= 0 and
|
||||||
not start_chars->map[std::min(*start, CompiledRegex::StartChars::other)])
|
not start_desc.map[std::min(*start, CompiledRegex::StartDesc::other)])
|
||||||
++start;
|
++start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user