Regex: optimize compilation by reserving data

This commit is contained in:
Maxime Coste 2017-11-28 00:59:57 +08:00
parent a52da6fe34
commit 4598832ed5

View File

@ -130,6 +130,7 @@ struct RegexParser
: m_regex{re}, m_pos{re.begin(), re} : m_regex{re}, m_pos{re.begin(), re}
{ {
m_parsed_regex.capture_count = 1; m_parsed_regex.capture_count = 1;
m_parsed_regex.nodes.reserve((size_t)re.length());
NodeIndex root = disjunction(0); NodeIndex root = disjunction(0);
kak_assert(root == 0); kak_assert(root == 0);
} }
@ -620,16 +621,19 @@ constexpr RegexParser::ControlEscape RegexParser::control_escapes[];
struct RegexCompiler struct RegexCompiler
{ {
RegexCompiler(const ParsedRegex& parsed_regex, RegexCompileFlags flags, MatchDirection direction) RegexCompiler(ParsedRegex&& parsed_regex, RegexCompileFlags flags, MatchDirection direction)
: m_parsed_regex{parsed_regex}, m_flags(flags), m_forward{direction == MatchDirection::Forward} : m_parsed_regex{parsed_regex}, m_flags(flags), m_forward{direction == MatchDirection::Forward}
{ {
// Approximation of the number of instructions generated
m_program.instructions.reserve(CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1);
m_program.start_chars = compute_start_chars();
write_search_prefix(); write_search_prefix();
compile_node(0); compile_node(0);
push_inst(CompiledRegex::Match); push_inst(CompiledRegex::Match);
m_program.character_classes = m_parsed_regex.character_classes; m_program.character_classes = std::move(m_parsed_regex.character_classes);
m_program.save_count = m_parsed_regex.capture_count * 2; m_program.save_count = m_parsed_regex.capture_count * 2;
m_program.direction = direction; m_program.direction = direction;
m_program.start_chars = compute_start_chars();
} }
CompiledRegex get_compiled_regex() { return std::move(m_program); } CompiledRegex get_compiled_regex() { return std::move(m_program); }
@ -899,7 +903,7 @@ private:
case ParsedRegex::CharacterType: case ParsedRegex::CharacterType:
{ {
const CharacterType ctype = (CharacterType)node.value; const CharacterType ctype = (CharacterType)node.value;
for (Codepoint cp = 0; cp < CompiledRegex::StartChars::count; ++cp) for (Codepoint cp = 0; cp < CompiledRegex::StartChars::other; ++cp)
{ {
if (is_ctype(ctype, cp)) if (is_ctype(ctype, cp))
start_chars.map[cp] = true; start_chars.map[cp] = true;
@ -950,10 +954,8 @@ private:
std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const std::unique_ptr<CompiledRegex::StartChars> compute_start_chars() const
{ {
CompiledRegex::StartChars start_chars{}; CompiledRegex::StartChars start_chars{};
if (compute_start_chars(0, start_chars)) if (compute_start_chars(0, start_chars) or
return nullptr; not contains(start_chars.map, false))
if (not contains(start_chars.map, false))
return nullptr; return nullptr;
return std::make_unique<CompiledRegex::StartChars>(start_chars); return std::make_unique<CompiledRegex::StartChars>(start_chars);
@ -966,7 +968,7 @@ private:
CompiledRegex m_program; CompiledRegex m_program;
RegexCompileFlags m_flags; RegexCompileFlags m_flags;
const ParsedRegex& m_parsed_regex; ParsedRegex& m_parsed_regex;
const bool m_forward; const bool m_forward;
}; };