Regex: Support forward and backward matching code in the same CompiledRegex
No need to have two separate regexes to handle forward and backward matching, just passing RegexCompileFlags::Backward will add support for backward matching to the regex. For backward only regex, pass RegexCompileFlags::NoForward as well to disable generation of forward matching code.
This commit is contained in:
parent
e9e3dc862c
commit
413f880e9e
|
@ -693,6 +693,12 @@ void paste_all(Context& context, NormalParams params)
|
||||||
selections = std::move(result);
|
selections = std::move(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr RegexCompileFlags direction_flags(MatchDirection direction)
|
||||||
|
{
|
||||||
|
return (direction == MatchDirection::Forward) ?
|
||||||
|
RegexCompileFlags::None : RegexCompileFlags::Backward | RegexCompileFlags::NoForward;
|
||||||
|
}
|
||||||
|
|
||||||
template<MatchDirection direction = MatchDirection::Forward, typename T>
|
template<MatchDirection direction = MatchDirection::Forward, typename T>
|
||||||
void regex_prompt(Context& context, String prompt, String default_regex, T func)
|
void regex_prompt(Context& context, String prompt, String default_regex, T func)
|
||||||
{
|
{
|
||||||
|
@ -725,7 +731,7 @@ void regex_prompt(Context& context, String prompt, String default_regex, T func)
|
||||||
context.push_jump();
|
context.push_jump();
|
||||||
|
|
||||||
if (not str.empty() or event == PromptEvent::Validate)
|
if (not str.empty() or event == PromptEvent::Validate)
|
||||||
func(Regex{str.empty() ? default_regex : str, RegexCompileFlags::None, direction}, event, context);
|
func(Regex{str.empty() ? default_regex : str, direction_flags(direction)}, event, context);
|
||||||
}
|
}
|
||||||
catch (regex_error& err)
|
catch (regex_error& err)
|
||||||
{
|
{
|
||||||
|
@ -795,7 +801,7 @@ void search_next(Context& context, NormalParams params)
|
||||||
StringView str = context.main_sel_register_value(reg);
|
StringView str = context.main_sel_register_value(reg);
|
||||||
if (not str.empty())
|
if (not str.empty())
|
||||||
{
|
{
|
||||||
Regex regex{str, RegexCompileFlags::None, direction};
|
Regex regex{str, direction_flags(direction)};
|
||||||
auto& selections = context.selections();
|
auto& selections = context.selections();
|
||||||
bool main_wrapped = false;
|
bool main_wrapped = false;
|
||||||
do {
|
do {
|
||||||
|
|
|
@ -3,8 +3,8 @@
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
Regex::Regex(StringView re, RegexCompileFlags flags, MatchDirection direction)
|
Regex::Regex(StringView re, RegexCompileFlags flags)
|
||||||
: m_impl{new CompiledRegex{compile_regex(re, flags, direction)}},
|
: m_impl{new CompiledRegex{compile_regex(re, flags)}},
|
||||||
m_str{re.str()}
|
m_str{re.str()}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
|
|
@ -13,8 +13,7 @@ class Regex
|
||||||
public:
|
public:
|
||||||
Regex() = default;
|
Regex() = default;
|
||||||
|
|
||||||
explicit Regex(StringView re, RegexCompileFlags flags = RegexCompileFlags::None,
|
explicit Regex(StringView re, RegexCompileFlags flags = RegexCompileFlags::None);
|
||||||
MatchDirection direction = MatchDirection::Forward);
|
|
||||||
bool empty() const { return m_str.empty(); }
|
bool empty() const { return m_str.empty(); }
|
||||||
bool operator==(const Regex& other) const { return m_str == other.m_str; }
|
bool operator==(const Regex& other) const { return m_str == other.m_str; }
|
||||||
bool operator!=(const Regex& other) const { return m_str != other.m_str; }
|
bool operator!=(const Regex& other) const { return m_str != other.m_str; }
|
||||||
|
|
|
@ -618,26 +618,43 @@ constexpr RegexParser::ControlEscape RegexParser::control_escapes[];
|
||||||
|
|
||||||
struct RegexCompiler
|
struct RegexCompiler
|
||||||
{
|
{
|
||||||
RegexCompiler(ParsedRegex&& parsed_regex, RegexCompileFlags flags, MatchDirection direction)
|
RegexCompiler(ParsedRegex&& parsed_regex, RegexCompileFlags flags)
|
||||||
: m_parsed_regex{parsed_regex}, m_flags(flags), m_forward{direction == MatchDirection::Forward}
|
: m_parsed_regex{parsed_regex}, m_flags(flags)
|
||||||
{
|
{
|
||||||
|
kak_assert(not (flags & RegexCompileFlags::NoForward) or flags & RegexCompileFlags::Backward);
|
||||||
// Approximation of the number of instructions generated
|
// Approximation of the number of instructions generated
|
||||||
m_program.instructions.reserve(CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1);
|
m_program.instructions.reserve((CompiledRegex::search_prefix_size + parsed_regex.nodes.size() + 1)
|
||||||
m_program.start_desc = compute_start_desc();
|
* (((flags & RegexCompileFlags::Backward) and
|
||||||
|
not (flags & RegexCompileFlags::NoForward)) ? 2 : 1));
|
||||||
|
|
||||||
|
if (not (flags & RegexCompileFlags::NoForward))
|
||||||
|
{
|
||||||
|
m_program.forward_start_desc = compute_start_desc(true);
|
||||||
write_search_prefix();
|
write_search_prefix();
|
||||||
compile_node(0);
|
compile_node(0, true);
|
||||||
push_inst(CompiledRegex::Match);
|
push_inst(CompiledRegex::Match);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & RegexCompileFlags::Backward)
|
||||||
|
{
|
||||||
|
m_program.first_backward_inst = m_program.instructions.size();
|
||||||
|
m_program.backward_start_desc = compute_start_desc(false);
|
||||||
|
write_search_prefix();
|
||||||
|
compile_node(0, false);
|
||||||
|
push_inst(CompiledRegex::Match);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
m_program.first_backward_inst = -1;
|
||||||
|
|
||||||
m_program.character_classes = std::move(m_parsed_regex.character_classes);
|
m_program.character_classes = std::move(m_parsed_regex.character_classes);
|
||||||
m_program.save_count = m_parsed_regex.capture_count * 2;
|
m_program.save_count = m_parsed_regex.capture_count * 2;
|
||||||
m_program.direction = direction;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CompiledRegex get_compiled_regex() { return std::move(m_program); }
|
CompiledRegex get_compiled_regex() { return std::move(m_program); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
uint32_t compile_node_inner(ParsedRegex::NodeIndex index)
|
uint32_t compile_node_inner(ParsedRegex::NodeIndex index, bool forward)
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
|
|
||||||
|
@ -647,7 +664,7 @@ private:
|
||||||
const bool save = (node.op == ParsedRegex::Alternation or node.op == ParsedRegex::Sequence) and
|
const bool save = (node.op == ParsedRegex::Alternation or node.op == ParsedRegex::Sequence) and
|
||||||
(node.value == 0 or (node.value != -1 and not (m_flags & RegexCompileFlags::NoSubs)));
|
(node.value == 0 or (node.value != -1 and not (m_flags & RegexCompileFlags::NoSubs)));
|
||||||
if (save)
|
if (save)
|
||||||
push_inst(CompiledRegex::Save, node.value * 2 + (m_forward ? 0 : 1));
|
push_inst(CompiledRegex::Save, node.value * 2 + (forward ? 0 : 1));
|
||||||
|
|
||||||
Vector<uint32_t> goto_inner_end_offsets;
|
Vector<uint32_t> goto_inner_end_offsets;
|
||||||
switch (node.op)
|
switch (node.op)
|
||||||
|
@ -669,13 +686,13 @@ private:
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::Sequence:
|
case ParsedRegex::Sequence:
|
||||||
{
|
{
|
||||||
if (m_forward)
|
if (forward)
|
||||||
for_each_child(m_parsed_regex, index, [this](ParsedRegex::NodeIndex child) {
|
for_each_child(m_parsed_regex, index, [this](ParsedRegex::NodeIndex child) {
|
||||||
compile_node(child); return true;
|
compile_node(child, true); return true;
|
||||||
});
|
});
|
||||||
else
|
else
|
||||||
for_each_child_reverse(m_parsed_regex, index, [this](ParsedRegex::NodeIndex child) {
|
for_each_child_reverse(m_parsed_regex, index, [this](ParsedRegex::NodeIndex child) {
|
||||||
compile_node(child); return true;
|
compile_node(child, false); return true;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -690,7 +707,7 @@ private:
|
||||||
|
|
||||||
for_each_child(m_parsed_regex, index,
|
for_each_child(m_parsed_regex, index,
|
||||||
[&, end = node.children_end](ParsedRegex::NodeIndex child) {
|
[&, end = node.children_end](ParsedRegex::NodeIndex child) {
|
||||||
auto node = compile_node(child);
|
auto node = compile_node(child, forward);
|
||||||
if (child != index+1)
|
if (child != index+1)
|
||||||
m_program.instructions[split_pos++].param = node;
|
m_program.instructions[split_pos++].param = node;
|
||||||
if (get_node(child).children_end != end)
|
if (get_node(child).children_end != end)
|
||||||
|
@ -703,39 +720,39 @@ private:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ParsedRegex::LookAhead:
|
case ParsedRegex::LookAhead:
|
||||||
push_inst(m_forward ? (ignore_case ? CompiledRegex::LookAhead_IgnoreCase
|
push_inst(forward ? (ignore_case ? CompiledRegex::LookAhead_IgnoreCase
|
||||||
: CompiledRegex::LookAhead)
|
: CompiledRegex::LookAhead)
|
||||||
: (ignore_case ? CompiledRegex::LookBehind_IgnoreCase
|
: (ignore_case ? CompiledRegex::LookBehind_IgnoreCase
|
||||||
: CompiledRegex::LookBehind),
|
: CompiledRegex::LookBehind),
|
||||||
push_lookaround(index, false, ignore_case));
|
push_lookaround(index, false, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::NegativeLookAhead:
|
case ParsedRegex::NegativeLookAhead:
|
||||||
push_inst(m_forward ? (ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
|
push_inst(forward ? (ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
|
||||||
: CompiledRegex::NegativeLookAhead)
|
: CompiledRegex::NegativeLookAhead)
|
||||||
: (ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
|
: (ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
|
||||||
: CompiledRegex::NegativeLookBehind),
|
: CompiledRegex::NegativeLookBehind),
|
||||||
push_lookaround(index, false, ignore_case));
|
push_lookaround(index, false, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::LookBehind:
|
case ParsedRegex::LookBehind:
|
||||||
push_inst(m_forward ? (ignore_case ? CompiledRegex::LookBehind_IgnoreCase
|
push_inst(forward ? (ignore_case ? CompiledRegex::LookBehind_IgnoreCase
|
||||||
: CompiledRegex::LookBehind)
|
: CompiledRegex::LookBehind)
|
||||||
: (ignore_case ? CompiledRegex::LookAhead_IgnoreCase
|
: (ignore_case ? CompiledRegex::LookAhead_IgnoreCase
|
||||||
: CompiledRegex::LookAhead),
|
: CompiledRegex::LookAhead),
|
||||||
push_lookaround(index, true, ignore_case));
|
push_lookaround(index, true, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::NegativeLookBehind:
|
case ParsedRegex::NegativeLookBehind:
|
||||||
push_inst(m_forward ? (ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
|
push_inst(forward ? (ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
|
||||||
: CompiledRegex::NegativeLookBehind)
|
: CompiledRegex::NegativeLookBehind)
|
||||||
: (ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
|
: (ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
|
||||||
: CompiledRegex::NegativeLookAhead),
|
: CompiledRegex::NegativeLookAhead),
|
||||||
push_lookaround(index, true, ignore_case));
|
push_lookaround(index, true, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::LineStart:
|
case ParsedRegex::LineStart:
|
||||||
push_inst(m_forward ? CompiledRegex::LineStart
|
push_inst(forward ? CompiledRegex::LineStart
|
||||||
: CompiledRegex::LineEnd);
|
: CompiledRegex::LineEnd);
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::LineEnd:
|
case ParsedRegex::LineEnd:
|
||||||
push_inst(m_forward ? CompiledRegex::LineEnd
|
push_inst(forward ? CompiledRegex::LineEnd
|
||||||
: CompiledRegex::LineStart);
|
: CompiledRegex::LineStart);
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::WordBoundary:
|
case ParsedRegex::WordBoundary:
|
||||||
|
@ -745,11 +762,11 @@ private:
|
||||||
push_inst(CompiledRegex::NotWordBoundary);
|
push_inst(CompiledRegex::NotWordBoundary);
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::SubjectBegin:
|
case ParsedRegex::SubjectBegin:
|
||||||
push_inst(m_forward ? CompiledRegex::SubjectBegin
|
push_inst(forward ? CompiledRegex::SubjectBegin
|
||||||
: CompiledRegex::SubjectEnd);
|
: CompiledRegex::SubjectEnd);
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::SubjectEnd:
|
case ParsedRegex::SubjectEnd:
|
||||||
push_inst(m_forward ? CompiledRegex::SubjectEnd
|
push_inst(forward ? CompiledRegex::SubjectEnd
|
||||||
: CompiledRegex::SubjectBegin);
|
: CompiledRegex::SubjectBegin);
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::ResetStart:
|
case ParsedRegex::ResetStart:
|
||||||
|
@ -761,12 +778,12 @@ private:
|
||||||
m_program.instructions[offset].param = m_program.instructions.size();
|
m_program.instructions[offset].param = m_program.instructions.size();
|
||||||
|
|
||||||
if (save)
|
if (save)
|
||||||
push_inst(CompiledRegex::Save, node.value * 2 + (m_forward ? 1 : 0));
|
push_inst(CompiledRegex::Save, node.value * 2 + (forward ? 1 : 0));
|
||||||
|
|
||||||
return start_pos;
|
return start_pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t compile_node(ParsedRegex::NodeIndex index)
|
uint32_t compile_node(ParsedRegex::NodeIndex index, bool forward)
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
|
|
||||||
|
@ -784,10 +801,10 @@ private:
|
||||||
goto_ends.push_back(split_pos);
|
goto_ends.push_back(split_pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto inner_pos = compile_node_inner(index);
|
auto inner_pos = compile_node_inner(index, forward);
|
||||||
// Write the node multiple times when we have a min count quantifier
|
// Write the node multiple times when we have a min count quantifier
|
||||||
for (int i = 1; i < quantifier.min; ++i)
|
for (int i = 1; i < quantifier.min; ++i)
|
||||||
inner_pos = compile_node_inner(index);
|
inner_pos = compile_node_inner(index, forward);
|
||||||
|
|
||||||
if (quantifier.allows_infinite_repeat())
|
if (quantifier.allows_infinite_repeat())
|
||||||
push_inst(quantifier.greedy ? CompiledRegex::Split_PrioritizeChild
|
push_inst(quantifier.greedy ? CompiledRegex::Split_PrioritizeChild
|
||||||
|
@ -801,7 +818,7 @@ private:
|
||||||
auto split_pos = push_inst(quantifier.greedy ? CompiledRegex::Split_PrioritizeParent
|
auto split_pos = push_inst(quantifier.greedy ? CompiledRegex::Split_PrioritizeParent
|
||||||
: CompiledRegex::Split_PrioritizeChild);
|
: CompiledRegex::Split_PrioritizeChild);
|
||||||
goto_ends.push_back(split_pos);
|
goto_ends.push_back(split_pos);
|
||||||
compile_node_inner(index);
|
compile_node_inner(index, forward);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto offset : goto_ends)
|
for (auto offset : goto_ends)
|
||||||
|
@ -813,11 +830,11 @@ private:
|
||||||
// Add an set of instruction prefix used in the search use case
|
// Add an set of instruction prefix used in the search use case
|
||||||
void write_search_prefix()
|
void write_search_prefix()
|
||||||
{
|
{
|
||||||
kak_assert(m_program.instructions.empty());
|
const uint32_t first_inst = m_program.instructions.size();
|
||||||
push_inst(CompiledRegex::Split_PrioritizeChild, CompiledRegex::search_prefix_size);
|
push_inst(CompiledRegex::Split_PrioritizeChild, first_inst + CompiledRegex::search_prefix_size);
|
||||||
push_inst(CompiledRegex::FindNextStart);
|
push_inst(CompiledRegex::FindNextStart);
|
||||||
push_inst(CompiledRegex::Split_PrioritizeParent, 1);
|
push_inst(CompiledRegex::Split_PrioritizeParent, first_inst + 1);
|
||||||
kak_assert(m_program.instructions.size() == CompiledRegex::search_prefix_size);
|
kak_assert(m_program.instructions.size() == first_inst + CompiledRegex::search_prefix_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t push_inst(CompiledRegex::Op op, uint32_t param = 0)
|
uint32_t push_inst(CompiledRegex::Op op, uint32_t param = 0)
|
||||||
|
@ -862,7 +879,7 @@ private:
|
||||||
// returns true if the node did not consume the char, hence a following node in
|
// returns true if the node did not consume the char, hence a following node in
|
||||||
// sequence would be still relevant for the parent node start chars computation.
|
// sequence would be still relevant for the parent node start chars computation.
|
||||||
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
||||||
CompiledRegex::StartDesc& start_desc) const
|
CompiledRegex::StartDesc& start_desc, bool forward) const
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
switch (node.op)
|
switch (node.op)
|
||||||
|
@ -924,9 +941,9 @@ private:
|
||||||
{
|
{
|
||||||
bool did_not_consume = false;
|
bool did_not_consume = false;
|
||||||
auto does_not_consume = [&, this](auto child) {
|
auto does_not_consume = [&, this](auto child) {
|
||||||
return this->compute_start_desc(child, start_desc);
|
return this->compute_start_desc(child, start_desc, forward);
|
||||||
};
|
};
|
||||||
if (m_forward)
|
if (forward)
|
||||||
did_not_consume = for_each_child(m_parsed_regex, index, does_not_consume);
|
did_not_consume = for_each_child(m_parsed_regex, index, does_not_consume);
|
||||||
else
|
else
|
||||||
did_not_consume = for_each_child_reverse(m_parsed_regex, index, does_not_consume);
|
did_not_consume = for_each_child_reverse(m_parsed_regex, index, does_not_consume);
|
||||||
|
@ -937,7 +954,7 @@ private:
|
||||||
{
|
{
|
||||||
bool all_consumed = not node.quantifier.allows_none();
|
bool all_consumed = not node.quantifier.allows_none();
|
||||||
for_each_child(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) {
|
for_each_child(m_parsed_regex, index, [&](ParsedRegex::NodeIndex child) {
|
||||||
if (compute_start_desc(child, start_desc))
|
if (compute_start_desc(child, start_desc, forward))
|
||||||
all_consumed = false;
|
all_consumed = false;
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
@ -960,10 +977,10 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
[[gnu::noinline]]
|
[[gnu::noinline]]
|
||||||
std::unique_ptr<CompiledRegex::StartDesc> compute_start_desc() const
|
std::unique_ptr<CompiledRegex::StartDesc> compute_start_desc(bool forward) const
|
||||||
{
|
{
|
||||||
CompiledRegex::StartDesc start_desc{};
|
CompiledRegex::StartDesc start_desc{};
|
||||||
if (compute_start_desc(0, start_desc) or
|
if (compute_start_desc(0, start_desc, forward) or
|
||||||
not contains(start_desc.map, false))
|
not contains(start_desc.map, false))
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
|
@ -978,7 +995,6 @@ private:
|
||||||
CompiledRegex m_program;
|
CompiledRegex m_program;
|
||||||
RegexCompileFlags m_flags;
|
RegexCompileFlags m_flags;
|
||||||
ParsedRegex& m_parsed_regex;
|
ParsedRegex& m_parsed_regex;
|
||||||
const bool m_forward;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void dump_regex(const CompiledRegex& program)
|
void dump_regex(const CompiledRegex& program)
|
||||||
|
@ -1079,9 +1095,9 @@ void dump_regex(const CompiledRegex& program)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CompiledRegex compile_regex(StringView re, RegexCompileFlags flags, MatchDirection direction)
|
CompiledRegex compile_regex(StringView re, RegexCompileFlags flags)
|
||||||
{
|
{
|
||||||
return RegexCompiler{RegexParser::parse(re), flags, direction}.get_compiled_regex();
|
return RegexCompiler{RegexParser::parse(re), flags}.get_compiled_regex();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_character_class(const CharacterClass& character_class, Codepoint cp)
|
bool is_character_class(const CharacterClass& character_class, Codepoint cp)
|
||||||
|
@ -1120,7 +1136,8 @@ struct TestVM : CompiledRegex, ThreadedRegexVM<const char*, dir>
|
||||||
using VMType = ThreadedRegexVM<const char*, dir>;
|
using VMType = ThreadedRegexVM<const char*, dir>;
|
||||||
|
|
||||||
TestVM(StringView re, bool dump = false)
|
TestVM(StringView re, bool dump = false)
|
||||||
: CompiledRegex{compile_regex(re, RegexCompileFlags::None, dir)},
|
: CompiledRegex{compile_regex(re, dir == MatchDirection::Forward ?
|
||||||
|
RegexCompileFlags::None : RegexCompileFlags::Backward)},
|
||||||
VMType{(const CompiledRegex&)*this}
|
VMType{(const CompiledRegex&)*this}
|
||||||
{ if (dump) dump_regex(*this); }
|
{ if (dump) dump_regex(*this); }
|
||||||
|
|
||||||
|
|
|
@ -98,8 +98,8 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
||||||
Vector<Instruction, MemoryDomain::Regex> instructions;
|
Vector<Instruction, MemoryDomain::Regex> instructions;
|
||||||
Vector<CharacterClass, MemoryDomain::Regex> character_classes;
|
Vector<CharacterClass, MemoryDomain::Regex> character_classes;
|
||||||
Vector<Codepoint, MemoryDomain::Regex> lookarounds;
|
Vector<Codepoint, MemoryDomain::Regex> lookarounds;
|
||||||
MatchDirection direction;
|
uint32_t first_backward_inst; // -1 if no backward support, 0 if only backward, >0 if both forward and backward
|
||||||
size_t save_count;
|
uint32_t save_count;
|
||||||
|
|
||||||
struct StartDesc
|
struct StartDesc
|
||||||
{
|
{
|
||||||
|
@ -108,18 +108,21 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
||||||
bool map[count+1];
|
bool map[count+1];
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<StartDesc> start_desc;
|
std::unique_ptr<StartDesc> forward_start_desc;
|
||||||
|
std::unique_ptr<StartDesc> backward_start_desc;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class RegexCompileFlags
|
enum class RegexCompileFlags
|
||||||
{
|
{
|
||||||
None = 0,
|
None = 0,
|
||||||
NoSubs = 1 << 0,
|
NoSubs = 1 << 0,
|
||||||
Optimize = 1 << 1
|
Optimize = 1 << 1,
|
||||||
|
Backward = 1 << 1,
|
||||||
|
NoForward = 1 << 2,
|
||||||
};
|
};
|
||||||
constexpr bool with_bit_ops(Meta::Type<RegexCompileFlags>) { return true; }
|
constexpr bool with_bit_ops(Meta::Type<RegexCompileFlags>) { return true; }
|
||||||
|
|
||||||
CompiledRegex compile_regex(StringView re, RegexCompileFlags flags, MatchDirection direction = MatchDirection::Forward);
|
CompiledRegex compile_regex(StringView re, RegexCompileFlags flags);
|
||||||
|
|
||||||
enum class RegexExecFlags
|
enum class RegexExecFlags
|
||||||
{
|
{
|
||||||
|
@ -145,7 +148,8 @@ public:
|
||||||
ThreadedRegexVM(const CompiledRegex& program)
|
ThreadedRegexVM(const CompiledRegex& program)
|
||||||
: m_program{program}
|
: m_program{program}
|
||||||
{
|
{
|
||||||
kak_assert(m_program and direction == m_program.direction);
|
kak_assert((direction == MatchDirection::Forward and program.first_backward_inst != 0) or
|
||||||
|
(direction == MatchDirection::Backward and program.first_backward_inst != -1));
|
||||||
}
|
}
|
||||||
|
|
||||||
ThreadedRegexVM(const ThreadedRegexVM&) = delete;
|
ThreadedRegexVM(const ThreadedRegexVM&) = delete;
|
||||||
|
@ -183,20 +187,30 @@ public:
|
||||||
|
|
||||||
const bool search = (flags & RegexExecFlags::Search);
|
const bool search = (flags & RegexExecFlags::Search);
|
||||||
Utf8It start{m_begin};
|
Utf8It start{m_begin};
|
||||||
if (m_program.start_desc)
|
const auto& start_desc = direction == MatchDirection::Forward ? m_program.forward_start_desc
|
||||||
|
: m_program.backward_start_desc;
|
||||||
|
if (start_desc)
|
||||||
{
|
{
|
||||||
if (search)
|
if (search)
|
||||||
{
|
{
|
||||||
to_next_start(start, m_end, *m_program.start_desc);
|
to_next_start(start, m_end, *start_desc);
|
||||||
if (start == m_end) // If start_desc is not null, it means we consume at least one char
|
if (start == m_end) // If start_desc is not null, it means we consume at least one char
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
else if (start != m_end and
|
else if (start != m_end and
|
||||||
not m_program.start_desc->map[std::min(*start, CompiledRegex::StartDesc::other)])
|
not start_desc->map[std::min(*start, CompiledRegex::StartDesc::other)])
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return exec_program(start, Thread{&m_program.instructions[search ? 0 : CompiledRegex::search_prefix_size], nullptr});
|
ConstArrayView<CompiledRegex::Instruction> instructions{m_program.instructions};
|
||||||
|
if (direction == MatchDirection::Forward)
|
||||||
|
instructions = instructions.subrange(0, m_program.first_backward_inst);
|
||||||
|
else
|
||||||
|
instructions = instructions.subrange(m_program.first_backward_inst);
|
||||||
|
if (not search)
|
||||||
|
instructions = instructions.subrange(CompiledRegex::search_prefix_size);
|
||||||
|
|
||||||
|
return exec_program(start, instructions);
|
||||||
}
|
}
|
||||||
|
|
||||||
ArrayView<const Iterator> captures() const
|
ArrayView<const Iterator> captures() const
|
||||||
|
@ -397,10 +411,13 @@ private:
|
||||||
return StepResult::Failed;
|
return StepResult::Failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool exec_program(Utf8It pos, Thread init_thread)
|
bool exec_program(Utf8It pos, ConstArrayView<CompiledRegex::Instruction> instructions)
|
||||||
{
|
{
|
||||||
ExecState state;
|
ExecState state;
|
||||||
state.current_threads.push_back(init_thread);
|
state.current_threads.push_back({instructions.begin(), nullptr});
|
||||||
|
|
||||||
|
const auto& start_desc = direction == MatchDirection::Forward ? m_program.forward_start_desc
|
||||||
|
: m_program.backward_start_desc;
|
||||||
|
|
||||||
bool found_match = false;
|
bool found_match = false;
|
||||||
while (true) // Iterate on all codepoints and once at the end
|
while (true) // Iterate on all codepoints and once at the end
|
||||||
|
@ -408,7 +425,7 @@ private:
|
||||||
if (++state.step == 0)
|
if (++state.step == 0)
|
||||||
{
|
{
|
||||||
// We wrapped, avoid potential collision on inst.last_step by resetting them
|
// We wrapped, avoid potential collision on inst.last_step by resetting them
|
||||||
for (auto& inst : m_program.instructions)
|
for (auto& inst : instructions)
|
||||||
inst.last_step = 0;
|
inst.last_step = 0;
|
||||||
state.step = 1; // step 0 is never valid
|
state.step = 1; // step 0 is never valid
|
||||||
}
|
}
|
||||||
|
@ -470,8 +487,8 @@ private:
|
||||||
std::reverse(state.current_threads.begin(), state.current_threads.end());
|
std::reverse(state.current_threads.begin(), state.current_threads.end());
|
||||||
++pos;
|
++pos;
|
||||||
|
|
||||||
if (find_next_start and m_program.start_desc)
|
if (find_next_start and start_desc)
|
||||||
to_next_start(pos, m_end, *m_program.start_desc);
|
to_next_start(pos, m_end, *start_desc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user