Regex: rename StartChars to StartDesc
It only contains chars for now, but its still more generally describing where matches can start.
This commit is contained in:
parent
b91f43b031
commit
65b057f261
|
@ -14,7 +14,7 @@
|
|||
namespace Kakoune
|
||||
{
|
||||
|
||||
constexpr Codepoint CompiledRegex::StartChars::other;
|
||||
constexpr Codepoint CompiledRegex::StartDesc::other;
|
||||
|
||||
struct ParsedRegex
|
||||
{
|
||||
|
@ -623,7 +623,7 @@ struct RegexCompiler
|
|||
{
|
||||
// Approximation of the number of instructions generated
|
||||
m_program.instructions.reserve(CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1);
|
||||
m_program.start_chars = compute_start_chars();
|
||||
m_program.start_desc = compute_start_desc();
|
||||
|
||||
write_search_prefix();
|
||||
compile_node(0);
|
||||
|
@ -861,28 +861,28 @@ private:
|
|||
// Fills accepted and rejected according to which chars can start the given node,
|
||||
// returns true if the node did not consume the char, hence a following node in
|
||||
// sequence would be still relevant for the parent node start chars computation.
|
||||
bool compute_start_chars(ParsedRegex::NodeIndex index,
|
||||
CompiledRegex::StartChars& start_chars) const
|
||||
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
||||
CompiledRegex::StartDesc& start_desc) const
|
||||
{
|
||||
auto& node = get_node(index);
|
||||
switch (node.op)
|
||||
{
|
||||
case ParsedRegex::Literal:
|
||||
if (node.value < CompiledRegex::StartChars::count)
|
||||
if (node.value < CompiledRegex::StartDesc::count)
|
||||
{
|
||||
if (node.ignore_case)
|
||||
{
|
||||
start_chars.map[to_lower(node.value)] = true;
|
||||
start_chars.map[to_upper(node.value)] = true;
|
||||
start_desc.map[to_lower(node.value)] = true;
|
||||
start_desc.map[to_upper(node.value)] = true;
|
||||
}
|
||||
else
|
||||
start_chars.map[node.value] = true;
|
||||
start_desc.map[node.value] = true;
|
||||
}
|
||||
else
|
||||
start_chars.map[CompiledRegex::StartChars::other] = true;
|
||||
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
||||
return node.quantifier.allows_none();
|
||||
case ParsedRegex::AnyChar:
|
||||
for (auto& b : start_chars.map)
|
||||
for (auto& b : start_desc.map)
|
||||
b = true;
|
||||
return node.quantifier.allows_none();
|
||||
case ParsedRegex::Class:
|
||||
|
@ -892,39 +892,39 @@ private:
|
|||
{
|
||||
for (auto& range : character_class.ranges)
|
||||
{
|
||||
auto min = std::min(CompiledRegex::StartChars::other, range.min);
|
||||
auto max = std::min(CompiledRegex::StartChars::other, range.max);
|
||||
auto min = std::min(CompiledRegex::StartDesc::other, range.min);
|
||||
auto max = std::min(CompiledRegex::StartDesc::other, range.max);
|
||||
for (Codepoint cp = min; cp <= max; ++cp)
|
||||
start_chars.map[cp] = true;
|
||||
start_desc.map[cp] = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (Codepoint cp = 0; cp < CompiledRegex::StartChars::other; ++cp)
|
||||
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
|
||||
{
|
||||
if (start_chars.map[cp] or is_character_class(character_class, cp))
|
||||
start_chars.map[cp] = true;
|
||||
if (start_desc.map[cp] or is_character_class(character_class, cp))
|
||||
start_desc.map[cp] = true;
|
||||
}
|
||||
}
|
||||
start_chars.map[CompiledRegex::StartChars::other] = true;
|
||||
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
||||
return node.quantifier.allows_none();
|
||||
}
|
||||
case ParsedRegex::CharacterType:
|
||||
{
|
||||
const CharacterType ctype = (CharacterType)node.value;
|
||||
for (Codepoint cp = 0; cp < CompiledRegex::StartChars::other; ++cp)
|
||||
for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
|
||||
{
|
||||
if (is_ctype(ctype, cp))
|
||||
start_chars.map[cp] = true;
|
||||
start_desc.map[cp] = true;
|
||||
}
|
||||
start_chars.map[CompiledRegex::StartChars::other] = true;
|
||||
start_desc.map[CompiledRegex::StartDesc::other] = true;
|
||||
return node.quantifier.allows_none();
|
||||
}
|
||||
case ParsedRegex::Sequence:
|
||||
{
|
||||
bool did_not_consume = false;
|
||||
auto does_not_consume = [&, this](auto child) {
|
||||
return this->compute_start_chars(child, start_chars);
|
||||
return this->compute_start_desc(child, start_desc);
|
||||
};
|
||||
if (m_forward)
|
||||
did_not_consume = for_each_child(m_parsed_regex, index, does_not_consume);
|
||||
|
@ -937,7 +937,7 @@ private:
|
|||
{
|
||||
bool all_consumed = not node.quantifier.allows_none();
|
||||
for_each_child(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) {
|
||||
if (compute_start_chars(child, start_chars))
|
||||
if (compute_start_desc(child, start_desc))
|
||||
all_consumed = false;
|
||||
return true;
|
||||
});
|
||||
|
@ -960,14 +960,14 @@ private:
|
|||
}
|
||||
|
||||
[[gnu::noinline]]
|
||||
std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const
|
||||
std::unique_ptr<CompiledRegex::StartDesc> compute_start_desc() const
|
||||
{
|
||||
CompiledRegex::StartChars start_chars{};
|
||||
if (compute_start_chars(0, start_chars) or
|
||||
not contains(start_chars.map, false))
|
||||
CompiledRegex::StartDesc start_desc{};
|
||||
if (compute_start_desc(0, start_desc) or
|
||||
not contains(start_desc.map, false))
|
||||
return nullptr;
|
||||
|
||||
return std::make_unique<CompiledRegex::StartChars>(start_chars);
|
||||
return std::make_unique<CompiledRegex::StartDesc>(start_desc);
|
||||
}
|
||||
|
||||
const ParsedRegex::Node& get_node(ParsedRegex::NodeIndex index) const
|
||||
|
|
|
@ -101,14 +101,14 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
|||
MatchDirection direction;
|
||||
size_t save_count;
|
||||
|
||||
struct StartChars
|
||||
struct StartDesc
|
||||
{
|
||||
static constexpr size_t count = 256;
|
||||
static constexpr Codepoint other = 256;
|
||||
bool map[count+1];
|
||||
};
|
||||
|
||||
std::unique_ptr<StartChars> start_chars;
|
||||
std::unique_ptr<StartDesc> start_desc;
|
||||
};
|
||||
|
||||
enum class RegexCompileFlags
|
||||
|
@ -183,8 +183,8 @@ public:
|
|||
|
||||
const bool search = (flags & RegexExecFlags::Search);
|
||||
Utf8It start{m_begin};
|
||||
if (search)
|
||||
to_next_start(start, m_end, m_program.start_chars.get());
|
||||
if (search and m_program.start_desc)
|
||||
to_next_start(start, m_end, *m_program.start_desc);
|
||||
|
||||
return exec_program(start, Thread{&m_program.instructions[search ? 0 : CompiledRegex::search_prefix_size], nullptr});
|
||||
}
|
||||
|
@ -460,19 +460,16 @@ private:
|
|||
std::reverse(state.current_threads.begin(), state.current_threads.end());
|
||||
++pos;
|
||||
|
||||
if (find_next_start)
|
||||
to_next_start(pos, m_end, m_program.start_chars.get());
|
||||
if (find_next_start and m_program.start_desc)
|
||||
to_next_start(pos, m_end, *m_program.start_desc);
|
||||
}
|
||||
}
|
||||
|
||||
void to_next_start(Utf8It& start, const Utf8It& end,
|
||||
const CompiledRegex::StartChars* start_chars)
|
||||
const CompiledRegex::StartDesc& start_desc)
|
||||
{
|
||||
if (not start_chars)
|
||||
return;
|
||||
|
||||
while (start != end and *start >= 0 and
|
||||
not start_chars->map[std::min(*start, CompiledRegex::StartChars::other)])
|
||||
not start_desc.map[std::min(*start, CompiledRegex::StartDesc::other)])
|
||||
++start;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user