Compute StartDesc with an offset to effective start
This means `.{2,4}foo` will now consider 4 or less before f as a start candidate instead of every characters
This commit is contained in:
parent
ee364d911f
commit
ca7471c25d
|
@ -886,8 +886,8 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mutate start_desc with informations on which Codepoint could start a match.
|
// Mutate start_desc with informations on which Codepoint could start a match.
|
||||||
// Returns true if the node possibly does not consume the char, in which case
|
// Returns true if the subsequent nodes are still relevant for computing the
|
||||||
// the next node would still be relevant for the parent node start chars computation.
|
// start desc
|
||||||
template<RegexMode direction>
|
template<RegexMode direction>
|
||||||
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
||||||
CompiledRegex::StartDesc& start_desc) const
|
CompiledRegex::StartDesc& start_desc) const
|
||||||
|
@ -916,10 +916,20 @@ private:
|
||||||
add_multi_byte_utf8();
|
add_multi_byte_utf8();
|
||||||
return node.quantifier.allows_none();
|
return node.quantifier.allows_none();
|
||||||
case ParsedRegex::AnyChar:
|
case ParsedRegex::AnyChar:
|
||||||
|
if (start_desc.offset + node.quantifier.max <= CompiledRegex::StartDesc::OffsetLimits::max())
|
||||||
|
{
|
||||||
|
start_desc.offset += node.quantifier.max;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
for (auto& b : start_desc.map)
|
for (auto& b : start_desc.map)
|
||||||
b = true;
|
b = true;
|
||||||
return node.quantifier.allows_none();
|
return node.quantifier.allows_none();
|
||||||
case ParsedRegex::AnyCharExceptNewLine:
|
case ParsedRegex::AnyCharExceptNewLine:
|
||||||
|
if (start_desc.offset + node.quantifier.max <= CompiledRegex::StartDesc::OffsetLimits::max())
|
||||||
|
{
|
||||||
|
start_desc.offset += node.quantifier.max;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
for (Codepoint cp = 0; cp < single_byte_limit; ++cp)
|
for (Codepoint cp = 0; cp < single_byte_limit; ++cp)
|
||||||
{
|
{
|
||||||
if (cp != '\n')
|
if (cp != '\n')
|
||||||
|
@ -1138,7 +1148,7 @@ String dump_regex(const CompiledRegex& program)
|
||||||
res += (char)c;
|
res += (char)c;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
res += "]\n";
|
res += format("]+{}\n", static_cast<int>(desc.offset));
|
||||||
};
|
};
|
||||||
if (program.forward_start_desc)
|
if (program.forward_start_desc)
|
||||||
dump_start_desc(*program.forward_start_desc, "forward");
|
dump_start_desc(*program.forward_start_desc, "forward");
|
||||||
|
@ -1556,6 +1566,17 @@ auto test_regex = UnitTest{[]{
|
||||||
kak_assert(vm.exec("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", RegexExecFlags::None));
|
kak_assert(vm.exec("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", RegexExecFlags::None));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
TestVM<RegexMode::Forward | RegexMode::Search> vm{"(.{3,4}|f)oo"};
|
||||||
|
kak_assert(vm.forward_start_desc and vm.forward_start_desc->offset == 4);
|
||||||
|
for (int c = 0; c < CompiledRegex::StartDesc::count; ++c)
|
||||||
|
kak_assert(vm.forward_start_desc->map[c] == (c == 'f' or c == 'o'));
|
||||||
|
|
||||||
|
kak_assert(vm.exec("xxxoo", RegexExecFlags::None));
|
||||||
|
kak_assert(vm.exec("xfoo", RegexExecFlags::None));
|
||||||
|
kak_assert(not vm.exec("😄xoo", RegexExecFlags::None));
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
auto eq = [](const CompiledRegex::NamedCapture& lhs,
|
auto eq = [](const CompiledRegex::NamedCapture& lhs,
|
||||||
const CompiledRegex::NamedCapture& rhs) {
|
const CompiledRegex::NamedCapture& rhs) {
|
||||||
|
|
|
@ -153,6 +153,8 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
||||||
struct StartDesc : UseMemoryDomain<MemoryDomain::Regex>
|
struct StartDesc : UseMemoryDomain<MemoryDomain::Regex>
|
||||||
{
|
{
|
||||||
static constexpr Codepoint count = 256;
|
static constexpr Codepoint count = 256;
|
||||||
|
using OffsetLimits = std::numeric_limits<uint8_t>;
|
||||||
|
uint8_t offset = 0;
|
||||||
bool map[count];
|
bool map[count];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -531,15 +533,16 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static Iterator find_next_start(Iterator pos, const ExecConfig& config, const StartDesc& start_desc)
|
static Iterator find_next_start(Iterator start, const ExecConfig& config, const StartDesc& start_desc)
|
||||||
{
|
{
|
||||||
|
auto pos = start;
|
||||||
while (pos != config.end)
|
while (pos != config.end)
|
||||||
{
|
{
|
||||||
static_assert(StartDesc::count <= 256, "start desc should be ascii only");
|
static_assert(StartDesc::count <= 256, "start desc should be ascii only");
|
||||||
if constexpr (forward)
|
if constexpr (forward)
|
||||||
{
|
{
|
||||||
if (start_desc.map[static_cast<unsigned char>(*pos)])
|
if (start_desc.map[static_cast<unsigned char>(*pos)])
|
||||||
return pos;
|
return utf8::advance(pos, start, -CharCount(start_desc.offset));
|
||||||
++pos;
|
++pos;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
Loading…
Reference in New Issue
Block a user