Regex: Add support for subject begin/end assertion (\` and \')

This commit is contained in:
Maxime Coste 2017-09-18 12:47:10 +09:00
parent 9c5d539616
commit d04c60b911

View File

@ -22,6 +22,8 @@ enum Op : char
LineEnd, LineEnd,
WordBoundary, WordBoundary,
NotWordBoundary, NotWordBoundary,
SubjectBegin,
SubjectEnd,
}; };
using Offset = size_t; using Offset = size_t;
@ -47,6 +49,8 @@ enum class Op
LineEnd, LineEnd,
WordBoundary, WordBoundary,
NotWordBoundary, NotWordBoundary,
SubjectBegin,
SubjectEnd,
}; };
struct AstNode struct AstNode
@ -119,6 +123,8 @@ private:
{ {
case 'b': pos += 2; return make_ast_node(Op::WordBoundary); case 'b': pos += 2; return make_ast_node(Op::WordBoundary);
case 'B': pos += 2; return make_ast_node(Op::NotWordBoundary); case 'B': pos += 2; return make_ast_node(Op::NotWordBoundary);
case '`': pos += 2; return make_ast_node(Op::SubjectBegin);
case '\'': pos += 2; return make_ast_node(Op::SubjectEnd);
} }
break; break;
/* TODO: \`, \', look ahead, look behind */ /* TODO: \`, \', look ahead, look behind */
@ -236,6 +242,12 @@ RegexProgram::Offset compile_node(Vector<char>& program, const AstNodePtr& node)
case Op::NotWordBoundary: case Op::NotWordBoundary:
program.push_back(RegexProgram::NotWordBoundary); program.push_back(RegexProgram::NotWordBoundary);
break; break;
case Op::SubjectBegin:
program.push_back(RegexProgram::SubjectBegin);
break;
case Op::SubjectEnd:
program.push_back(RegexProgram::SubjectEnd);
break;
} }
for (auto& offset : goto_end_offsets) for (auto& offset : goto_end_offsets)
@ -305,6 +317,12 @@ void dump(ConstArrayView<char> program)
case RegexProgram::NotWordBoundary: case RegexProgram::NotWordBoundary:
printf("not word boundary\n"); printf("not word boundary\n");
break; break;
case RegexProgram::SubjectBegin:
printf("subject begin\n");
break;
case RegexProgram::SubjectEnd:
printf("subject end\n");
break;
case RegexProgram::Match: case RegexProgram::Match:
printf("match\n"); printf("match\n");
} }
@ -363,6 +381,14 @@ struct ThreadedExecutor
if (is_word_boundary()) if (is_word_boundary())
return { StepResult::Failed }; return { StepResult::Failed };
break; break;
case RegexProgram::SubjectBegin:
if (m_pos != m_subject.begin())
return { StepResult::Failed };
break;
case RegexProgram::SubjectEnd:
if (m_pos != m_subject.end())
return { StepResult::Failed };
break;
case RegexProgram::Match: case RegexProgram::Match:
return { StepResult::Matched }; return { StepResult::Matched };
} }
@ -478,6 +504,15 @@ auto test_regex = UnitTest{[]{
kak_assert(exec.match(program, "bar")); kak_assert(exec.match(program, "bar"));
kak_assert(not exec.match(program, "foobar")); kak_assert(not exec.match(program, "foobar"));
} }
{
StringView re = R"(\`(foo|bar)\')";
auto program = RegexCompiler::compile(re.begin(), re.end());
RegexProgram::dump(program);
Exec exec{program};
kak_assert(exec.match(program, "foo"));
kak_assert(exec.match(program, "bar"));
kak_assert(not exec.match(program, "foobar"));
}
}}; }};
} }