Regex: Use memcpy to write/read offsets from bytecode

reinterpret_cast was undefined behaviour as we do not guarantee
that offsets are going to be stored properly aligned.
This commit is contained in:
Maxime Coste 2017-10-07 10:22:50 +08:00
parent b53227d62c
commit c375268c2d
2 changed files with 18 additions and 9 deletions

View File

@ -566,7 +566,7 @@ private:
goto_inner_end_offsets.push_back(alloc_offset()); goto_inner_end_offsets.push_back(alloc_offset());
auto right_pos = compile_node(children[1]); auto right_pos = compile_node(children[1]);
get_offset(offset) = right_pos; set_offset(offset, right_pos);
break; break;
} }
@ -611,7 +611,7 @@ private:
} }
for (auto& offset : goto_inner_end_offsets) for (auto& offset : goto_inner_end_offsets)
get_offset(offset) = m_program.bytecode.size(); set_offset(offset, m_program.bytecode.size());
if (capture != -1) if (capture != -1)
{ {
@ -645,7 +645,7 @@ private:
{ {
push_op(quantifier.greedy ? CompiledRegex::Split_PrioritizeChild push_op(quantifier.greedy ? CompiledRegex::Split_PrioritizeChild
: CompiledRegex::Split_PrioritizeParent); : CompiledRegex::Split_PrioritizeParent);
get_offset(alloc_offset()) = inner_pos; set_offset(alloc_offset(), inner_pos);
} }
// Write the node as an optional match for the min -> max counts // Write the node as an optional match for the min -> max counts
else for (int i = std::max(1, quantifier.min); // STILL UGLY ! else for (int i = std::max(1, quantifier.min); // STILL UGLY !
@ -658,7 +658,7 @@ private:
} }
for (auto offset : goto_end_offsets) for (auto offset : goto_end_offsets)
get_offset(offset) = m_program.bytecode.size(); set_offset(offset, m_program.bytecode.size());
return pos; return pos;
} }
@ -670,9 +670,9 @@ private:
return pos; return pos;
} }
Offset& get_offset(Offset pos) void set_offset(Offset pos, Offset value)
{ {
return *reinterpret_cast<Offset*>(&m_program.bytecode[pos]); memcpy(&m_program.bytecode[pos], &value, sizeof(Offset));
} }
void push_op(CompiledRegex::Op op) void push_op(CompiledRegex::Op op)

View File

@ -8,6 +8,8 @@
#include "flags.hh" #include "flags.hh"
#include "ref_ptr.hh" #include "ref_ptr.hh"
#include <string.h>
namespace Kakoune namespace Kakoune
{ {
@ -199,12 +201,12 @@ private:
case CompiledRegex::AnyChar: case CompiledRegex::AnyChar:
return StepResult::Consumed; return StepResult::Consumed;
case CompiledRegex::Jump: case CompiledRegex::Jump:
thread.inst = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst); thread.inst = prog_start + get_offset(thread.inst);
break; break;
case CompiledRegex::Split_PrioritizeParent: case CompiledRegex::Split_PrioritizeParent:
{ {
auto parent = thread.inst + sizeof(CompiledRegex::Offset); auto parent = thread.inst + sizeof(CompiledRegex::Offset);
auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst); auto child = prog_start + get_offset(thread.inst);
thread.inst = parent; thread.inst = parent;
if (thread.saves) if (thread.saves)
++thread.saves->refcount; ++thread.saves->refcount;
@ -214,7 +216,7 @@ private:
case CompiledRegex::Split_PrioritizeChild: case CompiledRegex::Split_PrioritizeChild:
{ {
auto parent = thread.inst + sizeof(CompiledRegex::Offset); auto parent = thread.inst + sizeof(CompiledRegex::Offset);
auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst); auto child = prog_start + get_offset(thread.inst);
thread.inst = child; thread.inst = child;
if (thread.saves) if (thread.saves)
++thread.saves->refcount; ++thread.saves->refcount;
@ -373,6 +375,13 @@ private:
++start; ++start;
} }
static CompiledRegex::Offset get_offset(const char* ptr)
{
CompiledRegex::Offset res;
memcpy(&res, ptr, sizeof(CompiledRegex::Offset));
return res;
}
bool is_line_start(const Utf8It& pos) const bool is_line_start(const Utf8It& pos) const
{ {
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or