Regex: rename StartChars to StartDesc

It only contains chars for now, but its still more generally
describing where matches can start.
This commit is contained in:
Maxime Coste 2017-12-01 14:46:18 +08:00
parent b91f43b031
commit 65b057f261
2 changed files with 35 additions and 38 deletions

View File

@ -14,7 +14,7 @@
namespace Kakoune namespace Kakoune
{ {
constexpr Codepoint CompiledRegex::StartChars::other; constexpr Codepoint CompiledRegex::StartDesc::other;
struct ParsedRegex struct ParsedRegex
{ {
@ -623,7 +623,7 @@ struct RegexCompiler
{ {
// Approximation of the number of instructions generated // Approximation of the number of instructions generated
m_program.instructions.reserve(CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1); m_program.instructions.reserve(CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1);
m_program.start_chars = compute_start_chars(); m_program.start_desc = compute_start_desc();
write_search_prefix(); write_search_prefix();
compile_node(0); compile_node(0);
@ -861,28 +861,28 @@ private:
// Fills accepted and rejected according to which chars can start the given node, // Fills accepted and rejected according to which chars can start the given node,
// returns true if the node did not consume the char, hence a following node in // returns true if the node did not consume the char, hence a following node in
// sequence would be still relevant for the parent node start chars computation. // sequence would be still relevant for the parent node start chars computation.
bool compute_start_chars(ParsedRegex::NodeIndex index, bool compute_start_desc(ParsedRegex::NodeIndex index,
CompiledRegex::StartChars& start_chars) const CompiledRegex::StartDesc& start_desc) const
{ {
auto& node = get_node(index); auto& node = get_node(index);
switch (node.op) switch (node.op)
{ {
case ParsedRegex::Literal: case ParsedRegex::Literal:
if (node.value < CompiledRegex::StartChars::count) if (node.value < CompiledRegex::StartDesc::count)
{ {
if (node.ignore_case) if (node.ignore_case)
{ {
start_chars.map[to_lower(node.value)] = true; start_desc.map[to_lower(node.value)] = true;
start_chars.map[to_upper(node.value)] = true; start_desc.map[to_upper(node.value)] = true;
} }
else else
start_chars.map[node.value] = true; start_desc.map[node.value] = true;
} }
else else
start_chars.map[CompiledRegex::StartChars::other] = true; start_desc.map[CompiledRegex::StartDesc::other] = true;
return node.quantifier.allows_none(); return node.quantifier.allows_none();
case ParsedRegex::AnyChar: case ParsedRegex::AnyChar:
for (auto& b : start_chars.map) for (auto& b : start_desc.map)
b = true; b = true;
return node.quantifier.allows_none(); return node.quantifier.allows_none();
case ParsedRegex::Class: case ParsedRegex::Class:
@ -892,39 +892,39 @@ private:
{ {
for (auto& range : character_class.ranges) for (auto& range : character_class.ranges)
{ {
auto min = std::min(CompiledRegex::StartChars::other, range.min); auto min = std::min(CompiledRegex::StartDesc::other, range.min);
auto max = std::min(CompiledRegex::StartChars::other, range.max); auto max = std::min(CompiledRegex::StartDesc::other, range.max);
for (Codepoint cp = min; cp <= max; ++cp) for (Codepoint cp = min; cp <= max; ++cp)
start_chars.map[cp] = true; start_desc.map[cp] = true;
} }
} }
else else
{ {
for (Codepoint cp = 0; cp < CompiledRegex::StartChars::other; ++cp) for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
{ {
if (start_chars.map[cp] or is_character_class(character_class, cp)) if (start_desc.map[cp] or is_character_class(character_class, cp))
start_chars.map[cp] = true; start_desc.map[cp] = true;
} }
} }
start_chars.map[CompiledRegex::StartChars::other] = true; start_desc.map[CompiledRegex::StartDesc::other] = true;
return node.quantifier.allows_none(); return node.quantifier.allows_none();
} }
case ParsedRegex::CharacterType: case ParsedRegex::CharacterType:
{ {
const CharacterType ctype = (CharacterType)node.value; const CharacterType ctype = (CharacterType)node.value;
for (Codepoint cp = 0; cp < CompiledRegex::StartChars::other; ++cp) for (Codepoint cp = 0; cp < CompiledRegex::StartDesc::other; ++cp)
{ {
if (is_ctype(ctype, cp)) if (is_ctype(ctype, cp))
start_chars.map[cp] = true; start_desc.map[cp] = true;
} }
start_chars.map[CompiledRegex::StartChars::other] = true; start_desc.map[CompiledRegex::StartDesc::other] = true;
return node.quantifier.allows_none(); return node.quantifier.allows_none();
} }
case ParsedRegex::Sequence: case ParsedRegex::Sequence:
{ {
bool did_not_consume = false; bool did_not_consume = false;
auto does_not_consume = [&, this](auto child) { auto does_not_consume = [&, this](auto child) {
return this->compute_start_chars(child, start_chars); return this->compute_start_desc(child, start_desc);
}; };
if (m_forward) if (m_forward)
did_not_consume = for_each_child(m_parsed_regex, index, does_not_consume); did_not_consume = for_each_child(m_parsed_regex, index, does_not_consume);
@ -937,7 +937,7 @@ private:
{ {
bool all_consumed = not node.quantifier.allows_none(); bool all_consumed = not node.quantifier.allows_none();
for_each_child(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) { for_each_child(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) {
if (compute_start_chars(child, start_chars)) if (compute_start_desc(child, start_desc))
all_consumed = false; all_consumed = false;
return true; return true;
}); });
@ -960,14 +960,14 @@ private:
} }
[[gnu::noinline]] [[gnu::noinline]]
std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const std::unique_ptr<CompiledRegex::StartDesc> compute_start_desc() const
{ {
CompiledRegex::StartChars start_chars{}; CompiledRegex::StartDesc start_desc{};
if (compute_start_chars(0, start_chars) or if (compute_start_desc(0, start_desc) or
not contains(start_chars.map, false)) not contains(start_desc.map, false))
return nullptr; return nullptr;
return std::make_unique<CompiledRegex::StartChars>(start_chars); return std::make_unique<CompiledRegex::StartDesc>(start_desc);
} }
const ParsedRegex::Node& get_node(ParsedRegex::NodeIndex index) const const ParsedRegex::Node& get_node(ParsedRegex::NodeIndex index) const

View File

@ -101,14 +101,14 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
MatchDirection direction; MatchDirection direction;
size_t save_count; size_t save_count;
struct StartChars struct StartDesc
{ {
static constexpr size_t count = 256; static constexpr size_t count = 256;
static constexpr Codepoint other = 256; static constexpr Codepoint other = 256;
bool map[count+1]; bool map[count+1];
}; };
std::unique_ptr<StartChars> start_chars; std::unique_ptr<StartDesc> start_desc;
}; };
enum class RegexCompileFlags enum class RegexCompileFlags
@ -183,8 +183,8 @@ public:
const bool search = (flags & RegexExecFlags::Search); const bool search = (flags & RegexExecFlags::Search);
Utf8It start{m_begin}; Utf8It start{m_begin};
if (search) if (search and m_program.start_desc)
to_next_start(start, m_end, m_program.start_chars.get()); to_next_start(start, m_end, *m_program.start_desc);
return exec_program(start, Thread{&m_program.instructions[search ? 0 : CompiledRegex::search_prefix_size], nullptr}); return exec_program(start, Thread{&m_program.instructions[search ? 0 : CompiledRegex::search_prefix_size], nullptr});
} }
@ -460,19 +460,16 @@ private:
std::reverse(state.current_threads.begin(), state.current_threads.end()); std::reverse(state.current_threads.begin(), state.current_threads.end());
++pos; ++pos;
if (find_next_start) if (find_next_start and m_program.start_desc)
to_next_start(pos, m_end, m_program.start_chars.get()); to_next_start(pos, m_end, *m_program.start_desc);
} }
} }
void to_next_start(Utf8It& start, const Utf8It& end, void to_next_start(Utf8It& start, const Utf8It& end,
const CompiledRegex::StartChars* start_chars) const CompiledRegex::StartDesc& start_desc)
{ {
if (not start_chars)
return;
while (start != end and *start >= 0 and while (start != end and *start >= 0 and
not start_chars->map[std::min(*start, CompiledRegex::StartChars::other)]) not start_desc.map[std::min(*start, CompiledRegex::StartDesc::other)])
++start; ++start;
} }