Regex: Use memcpy to write/read offsets from bytecode

reinterpret_cast was undefined behaviour as we do not guarantee
that offsets are going to be stored properly aligned.
This commit is contained in:
Maxime Coste 2017-10-07 10:22:50 +08:00
parent b53227d62c
commit c375268c2d
2 changed files with 18 additions and 9 deletions

View File

@ -566,7 +566,7 @@ private:
goto_inner_end_offsets.push_back(alloc_offset());
auto right_pos = compile_node(children[1]);
get_offset(offset) = right_pos;
set_offset(offset, right_pos);
break;
}
@ -611,7 +611,7 @@ private:
}
for (auto& offset : goto_inner_end_offsets)
get_offset(offset) = m_program.bytecode.size();
set_offset(offset, m_program.bytecode.size());
if (capture != -1)
{
@ -645,7 +645,7 @@ private:
{
push_op(quantifier.greedy ? CompiledRegex::Split_PrioritizeChild
: CompiledRegex::Split_PrioritizeParent);
get_offset(alloc_offset()) = inner_pos;
set_offset(alloc_offset(), inner_pos);
}
// Write the node as an optional match for the min -> max counts
else for (int i = std::max(1, quantifier.min); // STILL UGLY !
@ -658,7 +658,7 @@ private:
}
for (auto offset : goto_end_offsets)
get_offset(offset) = m_program.bytecode.size();
set_offset(offset, m_program.bytecode.size());
return pos;
}
@ -670,9 +670,9 @@ private:
return pos;
}
Offset& get_offset(Offset pos)
void set_offset(Offset pos, Offset value)
{
return *reinterpret_cast<Offset*>(&m_program.bytecode[pos]);
memcpy(&m_program.bytecode[pos], &value, sizeof(Offset));
}
void push_op(CompiledRegex::Op op)

View File

@ -8,6 +8,8 @@
#include "flags.hh"
#include "ref_ptr.hh"
#include <string.h>
namespace Kakoune
{
@ -199,12 +201,12 @@ private:
case CompiledRegex::AnyChar:
return StepResult::Consumed;
case CompiledRegex::Jump:
thread.inst = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst);
thread.inst = prog_start + get_offset(thread.inst);
break;
case CompiledRegex::Split_PrioritizeParent:
{
auto parent = thread.inst + sizeof(CompiledRegex::Offset);
auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst);
auto child = prog_start + get_offset(thread.inst);
thread.inst = parent;
if (thread.saves)
++thread.saves->refcount;
@ -214,7 +216,7 @@ private:
case CompiledRegex::Split_PrioritizeChild:
{
auto parent = thread.inst + sizeof(CompiledRegex::Offset);
auto child = prog_start + *reinterpret_cast<const CompiledRegex::Offset*>(thread.inst);
auto child = prog_start + get_offset(thread.inst);
thread.inst = child;
if (thread.saves)
++thread.saves->refcount;
@ -373,6 +375,13 @@ private:
++start;
}
static CompiledRegex::Offset get_offset(const char* ptr)
{
CompiledRegex::Offset res;
memcpy(&res, ptr, sizeof(CompiledRegex::Offset));
return res;
}
bool is_line_start(const Utf8It& pos) const
{
return (pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or