Regex: abandon bytecode and just use a simple list of instructions

Makes the code simpler.
This commit is contained in:
Maxime Coste 2017-10-07 18:51:32 +08:00
parent 6434bca325
commit 732b8bc2a4
2 changed files with 129 additions and 179 deletions

View File

@ -505,7 +505,7 @@ struct RegexCompiler
: m_parsed_regex{parsed_regex}, m_forward{direction == MatchDirection::Forward} : m_parsed_regex{parsed_regex}, m_forward{direction == MatchDirection::Forward}
{ {
compile_node(m_parsed_regex.ast); compile_node(m_parsed_regex.ast);
push_op(CompiledRegex::Match); push_inst(CompiledRegex::Match);
m_program.matchers = m_parsed_regex.matchers; m_program.matchers = m_parsed_regex.matchers;
m_program.save_count = m_parsed_regex.capture_count * 2; m_program.save_count = m_parsed_regex.capture_count * 2;
m_program.direction = direction; m_program.direction = direction;
@ -515,34 +515,30 @@ struct RegexCompiler
CompiledRegex get_compiled_regex() { return std::move(m_program); } CompiledRegex get_compiled_regex() { return std::move(m_program); }
private: private:
using Offset = CompiledRegex::Offset;
Offset compile_node_inner(const ParsedRegex::AstNodePtr& node) uint32_t compile_node_inner(const ParsedRegex::AstNodePtr& node)
{ {
const auto start_pos = m_program.bytecode.size(); const auto start_pos = m_program.instructions.size();
const Codepoint capture = (node->op == ParsedRegex::Alternation or node->op == ParsedRegex::Sequence) ? node->value : -1; const Codepoint capture = (node->op == ParsedRegex::Alternation or node->op == ParsedRegex::Sequence) ? node->value : -1;
if (capture != -1) if (capture != -1)
{ push_inst(CompiledRegex::Save, capture * 2 + (m_forward ? 0 : 1));
push_op(CompiledRegex::Save);
push_byte(capture * 2 + (m_forward ? 0 : 1));
}
Vector<Offset> goto_inner_end_offsets; Vector<uint32_t> goto_inner_end_offsets;
switch (node->op) switch (node->op)
{ {
case ParsedRegex::Literal: case ParsedRegex::Literal:
push_op(node->ignore_case ? CompiledRegex::LiteralIgnoreCase if (node->ignore_case)
: CompiledRegex::Literal); push_inst(CompiledRegex::LiteralIgnoreCase, to_lower(node->value));
push_codepoint(node->ignore_case ? to_lower(node->value) else
: node->value); push_inst(CompiledRegex::Literal, node->value);
break; break;
case ParsedRegex::AnyChar: case ParsedRegex::AnyChar:
push_op(CompiledRegex::AnyChar); push_inst(CompiledRegex::AnyChar);
break; break;
case ParsedRegex::Matcher: case ParsedRegex::Matcher:
push_op(CompiledRegex::Matcher); push_inst(CompiledRegex::Matcher, node->value);
push_byte(node->value); break;
case ParsedRegex::Sequence: case ParsedRegex::Sequence:
{ {
if (m_forward) if (m_forward)
@ -558,82 +554,77 @@ private:
auto& children = node->children; auto& children = node->children;
kak_assert(children.size() == 2); kak_assert(children.size() == 2);
push_op(CompiledRegex::Split_PrioritizeParent); auto split_pos = push_inst(CompiledRegex::Split_PrioritizeParent);
auto offset = alloc_offset();
compile_node(children[m_forward ? 0 : 1]); compile_node(children[m_forward ? 0 : 1]);
push_op(CompiledRegex::Jump); auto left_pos = push_inst(CompiledRegex::Jump);
goto_inner_end_offsets.push_back(alloc_offset()); goto_inner_end_offsets.push_back(left_pos);
auto right_pos = compile_node(children[m_forward ? 1 : 0]); auto right_pos = compile_node(children[m_forward ? 1 : 0]);
set_offset(offset, right_pos); m_program.instructions[split_pos].param = right_pos;
break; break;
} }
case ParsedRegex::LookAhead: case ParsedRegex::LookAhead:
push_op(m_forward ? CompiledRegex::LookAhead push_inst(m_forward ? CompiledRegex::LookAhead
: CompiledRegex::LookBehind); : CompiledRegex::LookBehind,
push_string(node->children, false); push_lookaround(node->children, false));
break; break;
case ParsedRegex::NegativeLookAhead: case ParsedRegex::NegativeLookAhead:
push_op(m_forward ? CompiledRegex::NegativeLookAhead push_inst(m_forward ? CompiledRegex::NegativeLookAhead
: CompiledRegex::NegativeLookBehind); : CompiledRegex::NegativeLookBehind,
push_string(node->children, false); push_lookaround(node->children, false));
break; break;
case ParsedRegex::LookBehind: case ParsedRegex::LookBehind:
push_op(m_forward ? CompiledRegex::LookBehind push_inst(m_forward ? CompiledRegex::LookBehind
: CompiledRegex::LookAhead); : CompiledRegex::LookAhead,
push_string(node->children, true); push_lookaround(node->children, true));
break; break;
case ParsedRegex::NegativeLookBehind: case ParsedRegex::NegativeLookBehind:
push_op(m_forward ? CompiledRegex::NegativeLookBehind push_inst(m_forward ? CompiledRegex::NegativeLookBehind
: CompiledRegex::NegativeLookAhead); : CompiledRegex::NegativeLookAhead,
push_string(node->children, true); push_lookaround(node->children, true));
break; break;
case ParsedRegex::LineStart: case ParsedRegex::LineStart:
push_op(m_forward ? CompiledRegex::LineStart push_inst(m_forward ? CompiledRegex::LineStart
: CompiledRegex::LineEnd); : CompiledRegex::LineEnd);
break; break;
case ParsedRegex::LineEnd: case ParsedRegex::LineEnd:
push_op(m_forward ? CompiledRegex::LineEnd push_inst(m_forward ? CompiledRegex::LineEnd
: CompiledRegex::LineStart); : CompiledRegex::LineStart);
break; break;
case ParsedRegex::WordBoundary: case ParsedRegex::WordBoundary:
push_op(CompiledRegex::WordBoundary); push_inst(CompiledRegex::WordBoundary);
break; break;
case ParsedRegex::NotWordBoundary: case ParsedRegex::NotWordBoundary:
push_op(CompiledRegex::NotWordBoundary); push_inst(CompiledRegex::NotWordBoundary);
break; break;
case ParsedRegex::SubjectBegin: case ParsedRegex::SubjectBegin:
push_op(m_forward ? CompiledRegex::SubjectBegin push_inst(m_forward ? CompiledRegex::SubjectBegin
: CompiledRegex::SubjectEnd); : CompiledRegex::SubjectEnd);
break; break;
case ParsedRegex::SubjectEnd: case ParsedRegex::SubjectEnd:
push_op(m_forward ? CompiledRegex::SubjectEnd push_inst(m_forward ? CompiledRegex::SubjectEnd
: CompiledRegex::SubjectBegin); : CompiledRegex::SubjectBegin);
break; break;
case ParsedRegex::ResetStart: case ParsedRegex::ResetStart:
push_op(CompiledRegex::Save); push_inst(CompiledRegex::Save, 0);
push_byte(0);
break; break;
} }
for (auto& offset : goto_inner_end_offsets) for (auto& offset : goto_inner_end_offsets)
set_offset(offset, m_program.bytecode.size()); m_program.instructions[offset].param = m_program.instructions.size();
if (capture != -1) if (capture != -1)
{ push_inst(CompiledRegex::Save, capture * 2 + (m_forward ? 1 : 0));
push_op(CompiledRegex::Save);
push_byte(capture * 2 + (m_forward ? 1 : 0));
}
return start_pos; return start_pos;
} }
Offset compile_node(const ParsedRegex::AstNodePtr& node) uint32_t compile_node(const ParsedRegex::AstNodePtr& node)
{ {
Offset pos = m_program.bytecode.size(); uint32_t pos = m_program.instructions.size();
Vector<Offset> goto_end_offsets; Vector<uint32_t> goto_ends;
auto& quantifier = node->quantifier; auto& quantifier = node->quantifier;
@ -641,9 +632,9 @@ private:
if (quantifier.allows_none()) if (quantifier.allows_none())
{ {
push_op(quantifier.greedy ? CompiledRegex::Split_PrioritizeParent auto split_pos = push_inst(quantifier.greedy ? CompiledRegex::Split_PrioritizeParent
: CompiledRegex::Split_PrioritizeChild); : CompiledRegex::Split_PrioritizeChild);
goto_end_offsets.push_back(alloc_offset()); goto_ends.push_back(split_pos);
} }
auto inner_pos = compile_node_inner(node); auto inner_pos = compile_node_inner(node);
@ -652,66 +643,45 @@ private:
inner_pos = compile_node_inner(node); inner_pos = compile_node_inner(node);
if (quantifier.allows_infinite_repeat()) if (quantifier.allows_infinite_repeat())
{ push_inst(quantifier.greedy ? CompiledRegex::Split_PrioritizeChild
push_op(quantifier.greedy ? CompiledRegex::Split_PrioritizeChild : CompiledRegex::Split_PrioritizeParent,
: CompiledRegex::Split_PrioritizeParent); inner_pos);
set_offset(alloc_offset(), inner_pos);
}
// Write the node as an optional match for the min -> max counts // Write the node as an optional match for the min -> max counts
else for (int i = std::max(1, quantifier.min); // STILL UGLY ! else for (int i = std::max(1, quantifier.min); // STILL UGLY !
i < quantifier.max; ++i) i < quantifier.max; ++i)
{ {
push_op(quantifier.greedy ? CompiledRegex::Split_PrioritizeParent auto split_pos = push_inst(quantifier.greedy ? CompiledRegex::Split_PrioritizeParent
: CompiledRegex::Split_PrioritizeChild); : CompiledRegex::Split_PrioritizeChild);
goto_end_offsets.push_back(alloc_offset()); goto_ends.push_back(split_pos);
compile_node_inner(node); compile_node_inner(node);
} }
for (auto offset : goto_end_offsets) for (auto offset : goto_ends)
set_offset(offset, m_program.bytecode.size()); m_program.instructions[offset].param = m_program.instructions.size();
return pos; return pos;
} }
Offset alloc_offset() uint32_t push_inst(CompiledRegex::Op op, uint32_t param = 0)
{ {
auto pos = m_program.bytecode.size(); uint32_t res = m_program.instructions.size();
m_program.bytecode.resize(pos + sizeof(Offset)); m_program.instructions.push_back({ op, param });
return pos; return res;
} }
void set_offset(Offset pos, Offset value) uint32_t push_lookaround(const Vector<ParsedRegex::AstNodePtr>& literals, bool reversed = false)
{ {
memcpy(&m_program.bytecode[pos], &value, sizeof(Offset)); uint32_t res = m_program.lookarounds.size();
}
void push_op(CompiledRegex::Op op)
{
m_program.bytecode.push_back(op);
}
void push_byte(char byte)
{
m_program.bytecode.push_back(byte);
}
void push_codepoint(Codepoint cp)
{
utf8::dump(std::back_inserter(m_program.bytecode), cp);
}
void push_string(const Vector<ParsedRegex::AstNodePtr>& codepoints, bool reversed = false)
{
if (codepoints.size() > 127)
throw runtime_error{"Too long literal string"};
push_byte(codepoints.size());
if (reversed) if (reversed)
for (auto& cp : codepoints | reverse()) for (auto& literal : literals | reverse())
push_codepoint(cp->value); m_program.lookarounds.push_back(literal->value);
else else
for (auto& cp : codepoints) for (auto& literal : literals)
push_codepoint(cp->value); m_program.lookarounds.push_back(literal->value);
m_program.lookarounds.push_back((Codepoint)-1);
return res;
} }
// Fills accepted and rejected according to which chars can start the given node, // Fills accepted and rejected according to which chars can start the given node,
@ -804,40 +774,35 @@ private:
void dump_regex(const CompiledRegex& program) void dump_regex(const CompiledRegex& program)
{ {
for (auto pos = program.bytecode.data(), end = program.bytecode.data() + program.bytecode.size(); for (auto& inst : program.instructions)
pos < end; )
{ {
printf("%4zd ", pos - program.bytecode.data()); switch (inst.op)
const auto op = (CompiledRegex::Op)*pos++;
switch (op)
{ {
case CompiledRegex::Literal: case CompiledRegex::Literal:
printf("literal %lc\n", utf8::read_codepoint(pos, (const char*)nullptr)); printf("literal %lc\n", inst.param);
break; break;
case CompiledRegex::LiteralIgnoreCase: case CompiledRegex::LiteralIgnoreCase:
printf("literal (ignore case) %lc\n", utf8::read_codepoint(pos, (const char*)nullptr)); printf("literal (ignore case) %lc\n", inst.param);
break; break;
case CompiledRegex::AnyChar: case CompiledRegex::AnyChar:
printf("any char\n"); printf("any char\n");
break; break;
case CompiledRegex::Jump: case CompiledRegex::Jump:
printf("jump %u\n", *reinterpret_cast<const CompiledRegex::Offset*>(&*pos)); printf("jump %u\n", inst.param);
pos += sizeof(CompiledRegex::Offset);
break; break;
case CompiledRegex::Split_PrioritizeParent: case CompiledRegex::Split_PrioritizeParent:
case CompiledRegex::Split_PrioritizeChild: case CompiledRegex::Split_PrioritizeChild:
{ {
printf("split (prioritize %s) %u\n", printf("split (prioritize %s) %u\n",
op == CompiledRegex::Split_PrioritizeParent ? "parent" : "child", inst.op == CompiledRegex::Split_PrioritizeParent ? "parent" : "child",
*reinterpret_cast<const CompiledRegex::Offset*>(&*pos)); inst.param);
pos += sizeof(CompiledRegex::Offset);
break; break;
} }
case CompiledRegex::Save: case CompiledRegex::Save:
printf("save %d\n", *pos++); printf("save %d\n", inst.param);
break; break;
case CompiledRegex::Matcher: case CompiledRegex::Matcher:
printf("matcher %d\n", *pos++); printf("matcher %d\n", inst.param);
break; break;
case CompiledRegex::LineStart: case CompiledRegex::LineStart:
printf("line start\n"); printf("line start\n");
@ -862,20 +827,20 @@ void dump_regex(const CompiledRegex& program)
case CompiledRegex::LookBehind: case CompiledRegex::LookBehind:
case CompiledRegex::NegativeLookBehind: case CompiledRegex::NegativeLookBehind:
{ {
int count = *pos++;
StringView str{pos, pos + count};
const char* name = nullptr; const char* name = nullptr;
if (op == CompiledRegex::LookAhead) if (inst.op == CompiledRegex::LookAhead)
name = "look ahead"; name = "look ahead";
if (op == CompiledRegex::NegativeLookAhead) if (inst.op == CompiledRegex::NegativeLookAhead)
name = "negative look ahead"; name = "negative look ahead";
if (op == CompiledRegex::LookBehind) if (inst.op == CompiledRegex::LookBehind)
name = "look behind"; name = "look behind";
if (op == CompiledRegex::NegativeLookBehind) if (inst.op == CompiledRegex::NegativeLookBehind)
name = "negative look behind"; name = "negative look behind";
printf("%s (%s)\n", name, (const char*)str.zstr()); String str;
pos += count; for (auto it = program.lookarounds.begin() + inst.param; *it != -1; ++it)
utf8::dump(std::back_inserter(str), *it);
printf("%s (%s)\n", name, str.c_str());
break; break;
} }
case CompiledRegex::Match: case CompiledRegex::Match:

View File

@ -45,11 +45,17 @@ struct CompiledRegex : RefCountable
NegativeLookBehind, NegativeLookBehind,
}; };
using Offset = unsigned; struct Instruction
explicit operator bool() const { return not bytecode.empty(); } {
Op op;
uint32_t param;
};
Vector<char> bytecode; explicit operator bool() const { return not instructions.empty(); }
Vector<Instruction> instructions;
Vector<std::function<bool (Codepoint)>> matchers; Vector<std::function<bool (Codepoint)>> matchers;
Vector<Codepoint> lookarounds;
MatchDirection direction; MatchDirection direction;
size_t save_count; size_t save_count;
@ -123,7 +129,7 @@ public:
return false; return false;
Vector<Thread> current_threads, next_threads; Vector<Thread> current_threads, next_threads;
std::unique_ptr<bool[]> inst_processed{new bool[m_program.bytecode.size()]}; std::unique_ptr<bool[]> processed_inst{new bool[m_program.instructions.size()]};
const bool no_saves = (m_flags & RegexExecFlags::NoSaves); const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
Utf8It start{m_begin}; Utf8It start{m_begin};
@ -134,7 +140,7 @@ public:
to_next_start(start, m_end, start_chars); to_next_start(start, m_end, start_chars);
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr), if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
current_threads, next_threads, inst_processed.get())) current_threads, next_threads, processed_inst.get()))
return true; return true;
if (not (flags & RegexExecFlags::Search)) if (not (flags & RegexExecFlags::Search))
@ -144,7 +150,7 @@ public:
{ {
to_next_start(++start, m_end, start_chars); to_next_start(++start, m_end, start_chars);
if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr), if (exec_from(start, no_saves ? nullptr : new_saves<false>(nullptr),
current_threads, next_threads, inst_processed.get())) current_threads, next_threads, processed_inst.get()))
return true; return true;
} }
while (start != m_end); while (start != m_end);
@ -200,7 +206,7 @@ private:
struct Thread struct Thread
{ {
const char* inst; uint32_t inst;
Saves* saves; Saves* saves;
}; };
@ -209,58 +215,49 @@ private:
enum class StepResult { Consumed, Matched, Failed }; enum class StepResult { Consumed, Matched, Failed };
// Steps a thread until it consumes the current character, matches or fail // Steps a thread until it consumes the current character, matches or fail
StepResult step(const Utf8It& pos, Thread& thread, Vector<Thread>& threads, bool* inst_processed) StepResult step(const Utf8It& pos, Thread& thread, Vector<Thread>& threads, bool* processed_inst)
{ {
const auto prog_start = m_program.bytecode.data();
const auto prog_end = prog_start + m_program.bytecode.size();
while (true) while (true)
{ {
// If we have hit this instruction on this character, in this thread or another, do not try again if (processed_inst[thread.inst])
const auto inst_offset = thread.inst - prog_start;
if (inst_processed[inst_offset])
return StepResult::Failed; return StepResult::Failed;
inst_processed[inst_offset] = true; processed_inst[thread.inst] = true;
auto& inst = m_program.instructions[thread.inst++];
const Codepoint cp = pos == m_end ? 0 : *pos; const Codepoint cp = pos == m_end ? 0 : *pos;
const CompiledRegex::Op op = (CompiledRegex::Op)*thread.inst++; switch (inst.op)
switch (op)
{ {
case CompiledRegex::Literal: case CompiledRegex::Literal:
if (utf8::read_codepoint(thread.inst, prog_end) == cp) if (inst.param == cp)
return StepResult::Consumed; return StepResult::Consumed;
return StepResult::Failed; return StepResult::Failed;
case CompiledRegex::LiteralIgnoreCase: case CompiledRegex::LiteralIgnoreCase:
if (utf8::read_codepoint(thread.inst, prog_end) == to_lower(cp)) if (inst.param == to_lower(cp))
return StepResult::Consumed; return StepResult::Consumed;
return StepResult::Failed; return StepResult::Failed;
case CompiledRegex::AnyChar: case CompiledRegex::AnyChar:
return StepResult::Consumed; return StepResult::Consumed;
case CompiledRegex::Jump: case CompiledRegex::Jump:
thread.inst = prog_start + get_offset(thread.inst); thread.inst = inst.param;
break; break;
case CompiledRegex::Split_PrioritizeParent: case CompiledRegex::Split_PrioritizeParent:
{ {
auto parent = thread.inst + sizeof(CompiledRegex::Offset);
auto child = prog_start + get_offset(thread.inst);
thread.inst = parent;
if (thread.saves) if (thread.saves)
++thread.saves->refcount; ++thread.saves->refcount;
threads.push_back({child, thread.saves}); threads.push_back({inst.param, thread.saves});
break; break;
} }
case CompiledRegex::Split_PrioritizeChild: case CompiledRegex::Split_PrioritizeChild:
{ {
auto parent = thread.inst + sizeof(CompiledRegex::Offset);
auto child = prog_start + get_offset(thread.inst);
thread.inst = child;
if (thread.saves) if (thread.saves)
++thread.saves->refcount; ++thread.saves->refcount;
threads.push_back({parent, thread.saves}); threads.push_back({thread.inst, thread.saves});
thread.inst = inst.param;
break; break;
} }
case CompiledRegex::Save: case CompiledRegex::Save:
{ {
const size_t index = *thread.inst++;
if (thread.saves == nullptr) if (thread.saves == nullptr)
break; break;
if (thread.saves->refcount > 1) if (thread.saves->refcount > 1)
@ -268,15 +265,12 @@ private:
--thread.saves->refcount; --thread.saves->refcount;
thread.saves = new_saves<true>(thread.saves->pos); thread.saves = new_saves<true>(thread.saves->pos);
} }
thread.saves->pos[index] = get_base(pos); thread.saves->pos[inst.param] = get_base(pos);
break; break;
} }
case CompiledRegex::Matcher: case CompiledRegex::Matcher:
{ return m_program.matchers[inst.param](cp) ?
const int matcher_id = *thread.inst++;
return m_program.matchers[matcher_id](cp) ?
StepResult::Consumed : StepResult::Failed; StepResult::Consumed : StepResult::Failed;
}
case CompiledRegex::LineStart: case CompiledRegex::LineStart:
if (not is_line_start(pos)) if (not is_line_start(pos))
return StepResult::Failed; return StepResult::Failed;
@ -304,27 +298,25 @@ private:
case CompiledRegex::LookAhead: case CompiledRegex::LookAhead:
case CompiledRegex::NegativeLookAhead: case CompiledRegex::NegativeLookAhead:
{ {
int count = *thread.inst++; auto ref = m_program.lookarounds.begin() + inst.param;
for (auto it = pos; count and it != m_end; ++it, --count) for (auto it = pos; *ref != -1 and it != m_end; ++it, ++ref)
if (*it != utf8::read(thread.inst)) if (*it != *ref)
break; break;
if ((op == CompiledRegex::LookAhead and count != 0) or if ((inst.op == CompiledRegex::LookAhead and *ref != -1) or
(op == CompiledRegex::NegativeLookAhead and count == 0)) (inst.op == CompiledRegex::NegativeLookAhead and *ref == -1))
return StepResult::Failed; return StepResult::Failed;
thread.inst = utf8::advance(thread.inst, prog_end, CharCount{count - 1});
break; break;
} }
case CompiledRegex::LookBehind: case CompiledRegex::LookBehind:
case CompiledRegex::NegativeLookBehind: case CompiledRegex::NegativeLookBehind:
{ {
int count = *thread.inst++; auto ref = m_program.lookarounds.begin() + inst.param;
for (auto it = pos-1; count and it >= m_begin; --it, --count) for (auto it = pos-1; *ref != -1 and it >= m_begin; --it, ++ref)
if (*it != utf8::read(thread.inst)) if (*it != *ref)
break; break;
if ((op == CompiledRegex::LookBehind and count != 0) or if ((inst.op == CompiledRegex::LookBehind and *ref != -1) or
(op == CompiledRegex::NegativeLookBehind and count == 0)) (inst.op == CompiledRegex::NegativeLookBehind and *ref == -1))
return StepResult::Failed; return StepResult::Failed;
thread.inst = utf8::advance(thread.inst, prog_end, CharCount{count - 1});
break; break;
} }
case CompiledRegex::Match: case CompiledRegex::Match:
@ -334,20 +326,20 @@ private:
return StepResult::Failed; return StepResult::Failed;
} }
bool exec_from(const Utf8It& start, Saves* initial_saves, Vector<Thread>& current_threads, Vector<Thread>& next_threads, bool* inst_processed) bool exec_from(const Utf8It& start, Saves* initial_saves, Vector<Thread>& current_threads, Vector<Thread>& next_threads, bool* processed_inst)
{ {
current_threads.push_back({m_program.bytecode.data(), initial_saves}); current_threads.push_back({0, initial_saves});
next_threads.clear(); next_threads.clear();
bool found_match = false; bool found_match = false;
for (Utf8It pos = start; pos != m_end; ++pos) for (Utf8It pos = start; pos != m_end; ++pos)
{ {
memset(inst_processed, 0, m_program.bytecode.size() * sizeof(bool)); memset(processed_inst, 0, sizeof(bool) * m_program.instructions.size());
while (not current_threads.empty()) while (not current_threads.empty())
{ {
auto thread = current_threads.back(); auto thread = current_threads.back();
current_threads.pop_back(); current_threads.pop_back();
switch (step(pos, thread, current_threads, inst_processed)) switch (step(pos, thread, current_threads, processed_inst))
{ {
case StepResult::Matched: case StepResult::Matched:
if (not (m_flags & RegexExecFlags::Search) or // We are not at end, this is not a full match if (not (m_flags & RegexExecFlags::Search) or // We are not at end, this is not a full match
@ -385,13 +377,13 @@ private:
if (found_match) if (found_match)
return true; return true;
memset(inst_processed, 0, m_program.bytecode.size() * sizeof(bool)); memset(processed_inst, 0, sizeof(bool) * m_program.instructions.size());
// Step remaining threads to see if they match without consuming anything else // Step remaining threads to see if they match without consuming anything else
while (not current_threads.empty()) while (not current_threads.empty())
{ {
auto thread = current_threads.back(); auto thread = current_threads.back();
current_threads.pop_back(); current_threads.pop_back();
if (step(m_end, thread, current_threads, inst_processed) == StepResult::Matched) if (step(m_end, thread, current_threads, processed_inst) == StepResult::Matched)
{ {
release_saves(m_captures); release_saves(m_captures);
m_captures = thread.saves; m_captures = thread.saves;
@ -411,13 +403,6 @@ private:
++start; ++start;
} }
static CompiledRegex::Offset get_offset(const char* ptr)
{
CompiledRegex::Offset res;
memcpy(&res, ptr, sizeof(CompiledRegex::Offset));
return res;
}
bool is_line_start(const Utf8It& pos) const bool is_line_start(const Utf8It& pos) const
{ {
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or