Regex: introduce RegexExecFlags to control various behaviours

This commit is contained in:
Maxime Coste 2017-10-02 22:34:57 +08:00
parent 73b14b11be
commit f007794d9c
3 changed files with 81 additions and 23 deletions

View File

@ -130,6 +130,28 @@ void check_captures(const Regex& re, const MatchResults<It>& res, const Vector<I
} }
} }
inline RegexExecFlags convert_flags(RegexConstant::match_flag_type flags)
{
auto res = RegexExecFlags::None;
if (flags & RegexConstant::match_not_bol)
res |= RegexExecFlags::NotBeginOfLine;
if (flags & RegexConstant::match_not_eol)
res |= RegexExecFlags::NotEndOfLine;
if (flags & RegexConstant::match_not_bow)
res |= RegexExecFlags::NotBeginOfWord;
if (flags & RegexConstant::match_not_eow)
res |= RegexExecFlags::NotEndOfWord;
if (flags & RegexConstant::match_not_bob)
res |= RegexExecFlags::NotBeginOfSubject;
if (flags & RegexConstant::match_not_initial_null)
res |= RegexExecFlags::NotInitialNull;
if (flags & RegexConstant::match_any)
res |= RegexExecFlags::AnyMatch;
return res;
}
template<typename It> template<typename It>
bool regex_match(It begin, It end, const Regex& re) bool regex_match(It begin, It end, const Regex& re)
{ {
@ -172,7 +194,7 @@ bool regex_search(It begin, It end, const Regex& re,
try try
{ {
bool matched = boost::regex_search<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, re, flags); bool matched = boost::regex_search<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, re, flags);
if (re.impl() and matched != regex_search(begin, end, re.impl())) if (re.impl() and matched != regex_search(begin, end, re.impl(), convert_flags(flags)))
regex_mismatch(re); regex_mismatch(re);
return matched; return matched;
} }
@ -190,7 +212,7 @@ bool regex_search(It begin, It end, MatchResults<It>& res, const Regex& re,
{ {
bool matched = boost::regex_search<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, res, re, flags); bool matched = boost::regex_search<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, res, re, flags);
Vector<It> captures; Vector<It> captures;
if (re.impl() and matched != regex_search(begin, end, captures, re.impl())) if (re.impl() and matched != regex_search(begin, end, captures, re.impl(), convert_flags(flags)))
regex_mismatch(re); regex_mismatch(re);
if (re.impl() and matched) if (re.impl() and matched)
check_captures(re, res, captures); check_captures(re, res, captures);

View File

@ -821,7 +821,13 @@ auto test_regex = UnitTest{[]{
bool exec(StringView re, bool match = true, bool longest = false) bool exec(StringView re, bool match = true, bool longest = false)
{ {
return ThreadedRegexVM::exec(re.begin(), re.end(), match, longest); RegexExecFlags flags = RegexExecFlags::None;
if (not match)
flags |= RegexExecFlags::Search;
if (not longest)
flags |= RegexExecFlags::AnyMatch;
return ThreadedRegexVM::exec(re.begin(), re.end(), flags);
} }
}; };

View File

@ -5,6 +5,7 @@
#include "utf8.hh" #include "utf8.hh"
#include "utf8_iterator.hh" #include "utf8_iterator.hh"
#include "vector.hh" #include "vector.hh"
#include "flags.hh"
namespace Kakoune namespace Kakoune
{ {
@ -46,6 +47,21 @@ struct CompiledRegex
CompiledRegex compile_regex(StringView re); CompiledRegex compile_regex(StringView re);
enum class RegexExecFlags
{
None = 0,
Search = 1 << 0,
NotBeginOfLine = 1 << 1,
NotEndOfLine = 1 << 2,
NotBeginOfWord = 1 << 3,
NotEndOfWord = 1 << 4,
NotBeginOfSubject = 1 << 5,
NotInitialNull = 1 << 6,
AnyMatch = 1 << 7
};
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
template<typename Iterator> template<typename Iterator>
struct ThreadedRegexVM struct ThreadedRegexVM
{ {
@ -133,7 +149,7 @@ struct ThreadedRegexVM
return StepResult::Failed; return StepResult::Failed;
break; break;
case CompiledRegex::SubjectBegin: case CompiledRegex::SubjectBegin:
if (m_pos != m_begin) if (m_pos != m_begin or m_flags & RegexExecFlags::NotBeginOfSubject)
return StepResult::Failed; return StepResult::Failed;
break; break;
case CompiledRegex::SubjectEnd: case CompiledRegex::SubjectEnd:
@ -173,16 +189,20 @@ struct ThreadedRegexVM
return StepResult::Failed; return StepResult::Failed;
} }
bool exec(Iterator begin, Iterator end, bool match = true, bool longest = false) bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
{ {
bool found_match = false; bool found_match = false;
m_threads.clear(); m_threads.clear();
const auto start_offset = (match ? CompiledRegex::search_prefix_size : 0); const auto start_offset = (flags & RegexExecFlags::Search) ? 0 : CompiledRegex::search_prefix_size;
add_thread(0, m_program.bytecode.data() + start_offset, add_thread(0, m_program.bytecode.data() + start_offset,
Vector<Iterator>(m_program.save_count, Iterator{})); Vector<Iterator>(m_program.save_count, Iterator{}));
m_begin = begin; m_begin = begin;
m_end = end; m_end = end;
m_flags = flags;
if (flags & RegexExecFlags::NotInitialNull and m_begin == m_end)
return false;
for (m_pos = Utf8It{m_begin, m_begin, m_end}; m_pos != m_end; ++m_pos) for (m_pos = Utf8It{m_begin, m_begin, m_end}; m_pos != m_end; ++m_pos)
{ {
@ -191,17 +211,19 @@ struct ThreadedRegexVM
const auto res = step(i); const auto res = step(i);
if (res == StepResult::Matched) if (res == StepResult::Matched)
{ {
if (match) if (not (flags & RegexExecFlags::Search) or // We are not at end, this is not a full match
(flags & RegexExecFlags::NotInitialNull and m_pos == m_begin))
{ {
m_threads.erase(m_threads.begin() + i); m_threads.erase(m_threads.begin() + i);
continue; // We are not at end, this is not a full match continue;
} }
m_captures = std::move(m_threads[i].saves); m_captures = std::move(m_threads[i].saves);
if (flags & RegexExecFlags::AnyMatch)
return true;
found_match = true; found_match = true;
m_threads.resize(i); // remove this and lower priority threads m_threads.resize(i); // remove this and lower priority threads
if (not longest)
return true;
} }
else if (res == StepResult::Failed) else if (res == StepResult::Failed)
m_threads.erase(m_threads.begin() + i); m_threads.erase(m_threads.begin() + i);
@ -227,10 +249,11 @@ struct ThreadedRegexVM
if (step(i) == StepResult::Matched) if (step(i) == StepResult::Matched)
{ {
m_captures = std::move(m_threads[i].saves); m_captures = std::move(m_threads[i].saves);
if (flags & RegexExecFlags::AnyMatch)
return true;
found_match = true; found_match = true;
m_threads.resize(i); // remove this and lower priority threads m_threads.resize(i); // remove this and lower priority threads
if (not longest)
return true;
} }
} }
return found_match; return found_match;
@ -246,17 +269,20 @@ struct ThreadedRegexVM
bool is_line_start() const bool is_line_start() const
{ {
return m_pos == m_begin or *(m_pos-1) == '\n'; return (m_pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfLine)) or
*(m_pos-1) == '\n';
} }
bool is_line_end() const bool is_line_end() const
{ {
return m_pos == m_end or *m_pos == '\n'; return (m_pos == m_end and not (m_flags & RegexExecFlags::NotEndOfLine)) or
*m_pos == '\n';
} }
bool is_word_boundary() const bool is_word_boundary() const
{ {
return m_pos == m_begin or m_pos == m_end or return (m_pos == m_begin and not (m_flags & RegexExecFlags::NotBeginOfWord)) or
(m_pos == m_end and not (m_flags & RegexExecFlags::NotEndOfWord)) or
is_word(*(m_pos-1)) != is_word(*m_pos); is_word(*(m_pos-1)) != is_word(*m_pos);
} }
@ -268,22 +294,24 @@ struct ThreadedRegexVM
Iterator m_begin; Iterator m_begin;
Iterator m_end; Iterator m_end;
Utf8It m_pos; Utf8It m_pos;
RegexExecFlags m_flags;
Vector<Iterator> m_captures; Vector<Iterator> m_captures;
}; };
template<typename It> template<typename It>
bool regex_match(It begin, It end, const CompiledRegex& re) bool regex_match(It begin, It end, const CompiledRegex& re, RegexExecFlags flags = RegexExecFlags::None)
{ {
ThreadedRegexVM<It> vm{re}; ThreadedRegexVM<It> vm{re};
return vm.exec(begin, end, true, false); return vm.exec(begin, end, (RegexExecFlags)(flags & ~(RegexExecFlags::Search)) | RegexExecFlags::AnyMatch);
} }
template<typename It> template<typename It>
bool regex_match(It begin, It end, Vector<It>& captures, const CompiledRegex& re) bool regex_match(It begin, It end, Vector<It>& captures, const CompiledRegex& re,
RegexExecFlags flags = RegexExecFlags::None)
{ {
ThreadedRegexVM<It> vm{re}; ThreadedRegexVM<It> vm{re};
if (vm.exec(begin, end, true, true)) if (vm.exec(begin, end, flags & ~(RegexExecFlags::Search)))
{ {
captures = std::move(vm.m_captures); captures = std::move(vm.m_captures);
return true; return true;
@ -292,17 +320,19 @@ bool regex_match(It begin, It end, Vector<It>& captures, const CompiledRegex& re
} }
template<typename It> template<typename It>
bool regex_search(It begin, It end, const CompiledRegex& re) bool regex_search(It begin, It end, const CompiledRegex& re,
RegexExecFlags flags = RegexExecFlags::None)
{ {
ThreadedRegexVM<It> vm{re}; ThreadedRegexVM<It> vm{re};
return vm.exec(begin, end, false, false); return vm.exec(begin, end, flags | RegexExecFlags::Search | RegexExecFlags::AnyMatch);
} }
template<typename It> template<typename It>
bool regex_search(It begin, It end, Vector<It>& captures, const CompiledRegex& re) bool regex_search(It begin, It end, Vector<It>& captures, const CompiledRegex& re,
RegexExecFlags flags = RegexExecFlags::None)
{ {
ThreadedRegexVM<It> vm{re}; ThreadedRegexVM<It> vm{re};
if (vm.exec(begin, end, false, true)) if (vm.exec(begin, end, flags | RegexExecFlags::Search))
{ {
captures = std::move(vm.m_captures); captures = std::move(vm.m_captures);
return true; return true;