Split compile time regex flags from runtime ones

This commit is contained in:
Maxime Coste 2018-12-27 11:29:33 +11:00
parent b76287ff99
commit fd043435e5
7 changed files with 164 additions and 148 deletions

View File

@ -1646,7 +1646,7 @@ void insert_matches(const Buffer& buffer, RegexMatchList& matches, const Regex&
{
size_t pivot = matches.size();
capture = capture and regex.mark_count() > 0;
ThreadedRegexVM<const char*, MatchDirection::Forward> vm{*regex.impl()};
ThreadedRegexVM<const char*, RegexMode::Forward | RegexMode::Search> vm{*regex.impl()};
for (auto line = range.begin; line < range.end; ++line)
{
const StringView l = buffer[line];

View File

@ -723,15 +723,16 @@ void paste_all(Context& context, NormalParams params)
selections = std::move(result);
}
constexpr RegexCompileFlags direction_flags(MatchDirection direction)
constexpr RegexCompileFlags direction_flags(RegexMode mode)
{
return (direction == MatchDirection::Forward) ?
return (mode & RegexMode::Forward) ?
RegexCompileFlags::None : RegexCompileFlags::Backward | RegexCompileFlags::NoForward;
}
template<MatchDirection direction = MatchDirection::Forward, typename T>
template<RegexMode mode = RegexMode::Forward, typename T>
void regex_prompt(Context& context, String prompt, String default_regex, T func)
{
static_assert(is_direction(mode));
DisplayCoord position = context.has_window() ? context.window().position() : DisplayCoord{};
SelectionList selections = context.selections();
context.input_handler().prompt(
@ -785,7 +786,7 @@ void regex_prompt(Context& context, String prompt, String default_regex, T func)
context.push_jump();
if (not str.empty() or event == PromptEvent::Validate)
func(Regex{str.empty() ? default_regex : str, direction_flags(direction)}, event, context);
func(Regex{str.empty() ? default_regex : str, direction_flags(mode)}, event, context);
}
catch (regex_error& err)
{
@ -805,19 +806,19 @@ void regex_prompt(Context& context, String prompt, String default_regex, T func)
});
}
template<MatchDirection direction>
template<RegexMode mode>
void select_next_matches(Context& context, const Regex& regex, int count)
{
auto& selections = context.selections();
do {
bool wrapped = false;
for (auto& sel : selections)
sel = keep_direction(find_next_match<direction>(context, sel, regex, wrapped), sel);
sel = keep_direction(find_next_match<mode>(context, sel, regex, wrapped), sel);
selections.sort_and_merge_overlapping();
} while (--count > 0);
}
template<MatchDirection direction>
template<RegexMode mode>
void extend_to_next_matches(Context& context, const Regex& regex, int count)
{
Vector<Selection> new_sels;
@ -827,7 +828,7 @@ void extend_to_next_matches(Context& context, const Regex& regex, int count)
size_t main_index = selections.main_index();
for (auto& sel : selections)
{
auto new_sel = find_next_match<direction>(context, sel, regex, wrapped);
auto new_sel = find_next_match<mode>(context, sel, regex, wrapped);
if (not wrapped)
{
new_sels.push_back(sel);
@ -844,12 +845,13 @@ void extend_to_next_matches(Context& context, const Regex& regex, int count)
} while (--count > 0);
}
template<SelectMode mode, MatchDirection direction>
template<SelectMode mode, RegexMode regex_mode>
void search(Context& context, NormalParams params)
{
static_assert(is_direction(regex_mode));
constexpr StringView prompt = mode == SelectMode::Extend ?
(direction == MatchDirection::Forward ? "search (extend):" : "reverse search (extend):")
: (direction == MatchDirection::Forward ? "search:" : "reverse search:");
(regex_mode & RegexMode::Forward ? "search (extend):" : "reverse search (extend):")
: (regex_mode & RegexMode::Forward ? "search:" : "reverse search:");
const char reg = to_lower(params.reg ? params.reg : '/');
const int count = params.count;
@ -858,7 +860,7 @@ void search(Context& context, NormalParams params)
Vector<String> saved_reg{reg_content.begin(), reg_content.end()};
const int main_index = std::min(context.selections().main_index(), saved_reg.size()-1);
regex_prompt<direction>(context, prompt.str(), saved_reg[main_index],
regex_prompt<regex_mode>(context, prompt.str(), saved_reg[main_index],
[reg, count, saved_reg]
(const Regex& regex, PromptEvent event, Context& context) {
if (event == PromptEvent::Abort)
@ -872,20 +874,20 @@ void search(Context& context, NormalParams params)
return;
if (mode == SelectMode::Extend)
extend_to_next_matches<direction>(context, regex, count);
extend_to_next_matches<regex_mode>(context, regex, count);
else
select_next_matches<direction>(context, regex, count);
select_next_matches<regex_mode>(context, regex, count);
});
}
template<SelectMode mode, MatchDirection direction>
template<SelectMode mode, RegexMode regex_mode>
void search_next(Context& context, NormalParams params)
{
const char reg = to_lower(params.reg ? params.reg : '/');
StringView str = context.main_sel_register_value(reg);
if (not str.empty())
{
Regex regex{str, direction_flags(direction)};
Regex regex{str, direction_flags(regex_mode)};
auto& selections = context.selections();
bool main_wrapped = false;
do {
@ -893,12 +895,12 @@ void search_next(Context& context, NormalParams params)
if (mode == SelectMode::Replace)
{
auto& sel = selections.main();
sel = keep_direction(find_next_match<direction>(context, sel, regex, wrapped), sel);
sel = keep_direction(find_next_match<regex_mode>(context, sel, regex, wrapped), sel);
}
else if (mode == SelectMode::Append)
{
auto sel = keep_direction(
find_next_match<direction>(context, selections.main(), regex, wrapped),
find_next_match<regex_mode>(context, selections.main(), regex, wrapped),
selections.main());
selections.push_back(std::move(sel));
selections.set_main_index(selections.size() - 1);
@ -1101,8 +1103,7 @@ void keep(Context& context, NormalParams params)
// give more intuitive behaviours in keep use cases.
const auto flags = match_flags(is_bol(begin.coord()), false,
is_bow(buffer, begin.coord()),
is_eow(buffer, end.coord())) |
RegexExecFlags::AnyMatch;
is_eow(buffer, end.coord()));
if (regex_search(begin, end, begin, end, regex, flags) == matching)
keep.push_back(sel);
}
@ -2235,14 +2236,14 @@ static const HashMap<Key, NormalCmd, MemoryDomain::Undefined, KeymapBackend> key
{ {'M'}, {"extend to matching character", select<SelectMode::Extend, select_matching<true>>} },
{ {alt('M')}, {"backward extend to matching character", select<SelectMode::Extend, select_matching<false>>} },
{ {'/'}, {"select next given regex match", search<SelectMode::Replace, MatchDirection::Forward>} },
{ {'?'}, {"extend with next given regex match", search<SelectMode::Extend, MatchDirection::Forward>} },
{ {alt('/')}, {"select previous given regex match", search<SelectMode::Replace, MatchDirection::Backward>} },
{ {alt('?')}, {"extend with previous given regex match", search<SelectMode::Extend, MatchDirection::Backward>} },
{ {'n'}, {"select next current search pattern match", search_next<SelectMode::Replace, MatchDirection::Forward>} },
{ {'N'}, {"extend with next current search pattern match", search_next<SelectMode::Append, MatchDirection::Forward>} },
{ {alt('n')}, {"select previous current search pattern match", search_next<SelectMode::Replace, MatchDirection::Backward>} },
{ {alt('N')}, {"extend with previous current search pattern match", search_next<SelectMode::Append, MatchDirection::Backward>} },
{ {'/'}, {"select next given regex match", search<SelectMode::Replace, RegexMode::Forward>} },
{ {'?'}, {"extend with next given regex match", search<SelectMode::Extend, RegexMode::Forward>} },
{ {alt('/')}, {"select previous given regex match", search<SelectMode::Replace, RegexMode::Backward>} },
{ {alt('?')}, {"extend with previous given regex match", search<SelectMode::Extend, RegexMode::Backward>} },
{ {'n'}, {"select next current search pattern match", search_next<SelectMode::Replace, RegexMode::Forward>} },
{ {'N'}, {"extend with next current search pattern match", search_next<SelectMode::Append, RegexMode::Forward>} },
{ {alt('n')}, {"select previous current search pattern match", search_next<SelectMode::Replace, RegexMode::Backward>} },
{ {alt('N')}, {"extend with previous current search pattern match", search_next<SelectMode::Append, RegexMode::Backward>} },
{ {'*'}, {"set search pattern to main selection content", use_selection_as_search_pattern<true>} },
{ {alt('*')}, {"set search pattern to main selection content, do not detect words", use_selection_as_search_pattern<false>} },

View File

@ -112,15 +112,15 @@ inline RegexExecFlags match_flags(bool bol, bool eol, bool bow, bool eow)
template<typename It>
bool regex_match(It begin, It end, const Regex& re)
{
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
return vm.exec(begin, end, begin, end, RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
ThreadedRegexVM<It, RegexMode::Forward | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()};
return vm.exec(begin, end, begin, end, RegexExecFlags::None);
}
template<typename It>
bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
{
res.values().clear();
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
ThreadedRegexVM<It, RegexMode::Forward> vm{*re.impl()};
if (vm.exec(begin, end, begin, end, RegexExecFlags::None))
{
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
@ -133,19 +133,18 @@ template<typename It>
bool regex_search(It begin, It end, It subject_begin, It subject_end, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None)
{
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
return vm.exec(begin, end, subject_begin, subject_end,
flags | RegexExecFlags::Search | RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
ThreadedRegexVM<It, RegexMode::Forward | RegexMode::Search | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()};
return vm.exec(begin, end, subject_begin, subject_end, flags);
}
template<typename It, MatchDirection direction = MatchDirection::Forward>
template<typename It, RegexMode mode = RegexMode::Forward>
bool regex_search(It begin, It end, It subject_begin, It subject_end,
MatchResults<It>& res, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None)
{
res.values().clear();
ThreadedRegexVM<It, direction> vm{*re.impl()};
if (vm.exec(begin, end, subject_begin, subject_end, flags | RegexExecFlags::Search))
ThreadedRegexVM<It, mode | RegexMode::Search> vm{*re.impl()};
if (vm.exec(begin, end, subject_begin, subject_end, flags))
{
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
return true;
@ -158,16 +157,18 @@ bool backward_regex_search(It begin, It end, It subject_begin, It subject_end,
MatchResults<It>& res, const Regex& re,
RegexExecFlags flags = RegexExecFlags::None)
{
return regex_search<It, MatchDirection::Backward>(begin, end, subject_begin, subject_end, res, re, flags);
return regex_search<It, RegexMode::Backward>(begin, end, subject_begin, subject_end, res, re, flags);
}
String option_to_string(const Regex& re);
Regex option_from_string(Meta::Type<Regex>, StringView str);
template<typename Iterator, MatchDirection direction = MatchDirection::Forward,
template<typename Iterator, RegexMode mode = RegexMode::Forward,
typename VmArg = const Regex>
struct RegexIterator
{
static_assert(has_direction(mode));
static constexpr bool forward = mode & RegexMode::Forward;
using ValueType = MatchResults<Iterator>;
struct Sentinel{};
struct It
@ -188,7 +189,7 @@ struct RegexIterator
RegexIterator(Iterator begin, Iterator end,
Iterator subject_begin, Iterator subject_end,
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None)
: m_vm{make_vm(vm_arg)}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
: m_vm{make_vm(vm_arg)}, m_next_pos{forward ? begin : end},
m_begin{std::move(begin)}, m_end{std::move(end)},
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
m_flags{flags} {}
@ -203,23 +204,21 @@ struct RegexIterator
private:
bool next()
{
auto additional_flags = RegexExecFlags::Search;
auto additional_flags = RegexExecFlags::None;
if (m_results.size() and m_results[0].first == m_results[0].second)
additional_flags |= RegexExecFlags::NotInitialNull;
constexpr bool forward = direction == MatchDirection::Forward;
if (not m_vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
m_subject_begin, m_subject_end, m_flags | additional_flags))
return false;
m_results.values().clear();
std::move(m_vm.captures().begin(), m_vm.captures().end(), std::back_inserter(m_results.values()));
m_next_pos = (direction == MatchDirection::Forward) ? m_results[0].second : m_results[0].first;
m_next_pos = forward ? m_results[0].second : m_results[0].first;
return true;
}
using RegexVM = ThreadedRegexVM<Iterator, direction>;
using RegexVM = ThreadedRegexVM<Iterator, mode | RegexMode::Search>;
static RegexVM& make_vm(RegexVM& vm) { return vm; }
static RegexVM make_vm(const Regex& regex) { return {*regex.impl()}; }

View File

@ -92,14 +92,15 @@ struct ParsedRegex
namespace
{
template<MatchDirection direction = MatchDirection::Forward>
template<RegexMode mode = RegexMode::Forward>
struct Children
{
static_assert(has_direction(mode));
using Index = ParsedRegex::NodeIndex;
struct Sentinel {};
struct Iterator
{
static constexpr bool forward = direction == MatchDirection::Forward;
static constexpr bool forward = mode & RegexMode::Forward;
Iterator(ArrayView<const ParsedRegex::Node> nodes, Index index)
: m_nodes{nodes},
m_pos(forward ? index+1 : find_prev(index, nodes[index].children_end)),
@ -681,18 +682,18 @@ struct RegexCompiler
if (not (flags & RegexCompileFlags::NoForward))
{
m_program.forward_start_desc = compute_start_desc<MatchDirection::Forward>();
m_program.forward_start_desc = compute_start_desc<RegexMode::Forward>();
write_search_prefix();
compile_node<MatchDirection::Forward>(0);
compile_node<RegexMode::Forward>(0);
push_inst(CompiledRegex::Match);
}
if (flags & RegexCompileFlags::Backward)
{
m_program.first_backward_inst = m_program.instructions.size();
m_program.backward_start_desc = compute_start_desc<MatchDirection::Backward>();
m_program.backward_start_desc = compute_start_desc<RegexMode::Backward>();
write_search_prefix();
compile_node<MatchDirection::Backward>(0);
compile_node<RegexMode::Backward>(0);
push_inst(CompiledRegex::Match);
}
else
@ -707,7 +708,7 @@ struct RegexCompiler
private:
template<MatchDirection direction>
template<RegexMode direction>
uint32_t compile_node_inner(ParsedRegex::NodeIndex index)
{
auto& node = get_node(index);
@ -717,7 +718,7 @@ private:
const bool save = (node.op == ParsedRegex::Alternation or node.op == ParsedRegex::Sequence) and
(node.value == 0 or (node.value != -1 and not (m_flags & RegexCompileFlags::NoSubs)));
constexpr bool forward = direction == MatchDirection::Forward;
constexpr bool forward = direction == RegexMode::Forward;
if (save)
push_inst(CompiledRegex::Save, node.value * 2 + (forward ? 0 : 1));
@ -774,22 +775,22 @@ private:
case ParsedRegex::LookAhead:
push_inst(ignore_case ? CompiledRegex::LookAhead_IgnoreCase
: CompiledRegex::LookAhead,
push_lookaround<MatchDirection::Forward>(index, ignore_case));
push_lookaround<RegexMode::Forward>(index, ignore_case));
break;
case ParsedRegex::NegativeLookAhead:
push_inst(ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
: CompiledRegex::NegativeLookAhead,
push_lookaround<MatchDirection::Forward>(index, ignore_case));
push_lookaround<RegexMode::Forward>(index, ignore_case));
break;
case ParsedRegex::LookBehind:
push_inst(ignore_case ? CompiledRegex::LookBehind_IgnoreCase
: CompiledRegex::LookBehind,
push_lookaround<MatchDirection::Backward>(index, ignore_case));
push_lookaround<RegexMode::Backward>(index, ignore_case));
break;
case ParsedRegex::NegativeLookBehind:
push_inst(ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
: CompiledRegex::NegativeLookBehind,
push_lookaround<MatchDirection::Backward>(index, ignore_case));
push_lookaround<RegexMode::Backward>(index, ignore_case));
break;
case ParsedRegex::LineStart:
push_inst(CompiledRegex::LineStart);
@ -823,7 +824,7 @@ private:
return start_pos;
}
template<MatchDirection direction>
template<RegexMode direction>
uint32_t compile_node(ParsedRegex::NodeIndex index)
{
auto& node = get_node(index);
@ -885,7 +886,7 @@ private:
return res;
}
template<MatchDirection direction>
template<RegexMode direction>
uint32_t push_lookaround(ParsedRegex::NodeIndex index, bool ignore_case)
{
using Lookaround = CompiledRegex::Lookaround;
@ -915,7 +916,7 @@ private:
// Mutate start_desc with informations on which Codepoint could start a match.
// Returns true if the node possibly does not consume the char, in which case
// the next node would still be relevant for the parent node start chars computation.
template<MatchDirection direction>
template<RegexMode direction>
bool compute_start_desc(ParsedRegex::NodeIndex index,
CompiledRegex::StartDesc& start_desc) const
{
@ -1018,7 +1019,7 @@ private:
return false;
}
template<MatchDirection direction>
template<RegexMode direction>
[[gnu::noinline]]
std::unique_ptr<CompiledRegex::StartDesc> compute_start_desc() const
{
@ -1199,18 +1200,18 @@ bool is_ctype(CharacterType ctype, Codepoint cp)
namespace
{
template<MatchDirection dir = MatchDirection::Forward>
struct TestVM : CompiledRegex, ThreadedRegexVM<const char*, dir>
template<RegexMode mode = RegexMode::Forward>
struct TestVM : CompiledRegex, ThreadedRegexVM<const char*, mode>
{
using VMType = ThreadedRegexVM<const char*, dir>;
using VMType = ThreadedRegexVM<const char*, mode>;
TestVM(StringView re, bool dump = false)
: CompiledRegex{compile_regex(re, dir == MatchDirection::Forward ?
: CompiledRegex{compile_regex(re, mode & RegexMode::Forward ?
RegexCompileFlags::None : RegexCompileFlags::Backward)},
VMType{(const CompiledRegex&)*this}
{ if (dump) puts(dump_regex(*this).c_str()); }
bool exec(StringView re, RegexExecFlags flags = RegexExecFlags::AnyMatch)
bool exec(StringView re, RegexExecFlags flags = RegexExecFlags::None)
{
return VMType::exec(re.begin(), re.end(), re.begin(), re.end(), flags);
}
@ -1294,11 +1295,11 @@ auto test_regex = UnitTest{[]{
}
{
TestVM<> vm{R"(f.*a(.*o))"};
kak_assert(vm.exec("blahfoobarfoobaz", RegexExecFlags::Search));
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"(f.*a(.*o))"};
kak_assert(vm.exec("blahfoobarfoobaz"));
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foobarfoo");
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "rfoo");
kak_assert(vm.exec("mais que fais la police", RegexExecFlags::Search));
kak_assert(vm.exec("mais que fais la police"));
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "fais la po");
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == " po");
}
@ -1358,21 +1359,21 @@ auto test_regex = UnitTest{[]{
}
{
TestVM<> vm{R"(foo\Kbar)"};
kak_assert(vm.exec("foobar", RegexExecFlags::None));
TestVM<RegexMode::Forward> vm{R"(foo\Kbar)"};
kak_assert(vm.exec("foobar"));
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar");
kak_assert(not vm.exec("bar", RegexExecFlags::None));
kak_assert(not vm.exec("bar"));
}
{
TestVM<> vm{R"((fo+?).*)"};
kak_assert(vm.exec("foooo", RegexExecFlags::None));
TestVM<RegexMode::Forward> vm{R"((fo+?).*)"};
kak_assert(vm.exec("foooo"));
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "fo");
}
{
TestVM<> vm{R"((?=fo[\w]).)"};
kak_assert(vm.exec("barfoo", RegexExecFlags::Search));
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"((?=fo[\w]).)"};
kak_assert(vm.exec("barfoo"));
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f");
}
@ -1423,66 +1424,66 @@ auto test_regex = UnitTest{[]{
}
{
TestVM<> vm{R"((?<!\\)(?:\\\\)*")"};
kak_assert(vm.exec("foo\"", RegexExecFlags::Search));
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"((?<!\\)(?:\\\\)*")"};
kak_assert(vm.exec("foo\""));
}
{
TestVM<> vm{R"($)"};
kak_assert(vm.exec("foo\n", RegexExecFlags::Search));
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"($)"};
kak_assert(vm.exec("foo\n"));
kak_assert(*vm.captures()[0] == '\n');
}
{
TestVM<MatchDirection::Backward> vm{R"(fo{1,})"};
kak_assert(vm.exec("foo1fooo2", RegexExecFlags::Search));
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"(fo{1,})"};
kak_assert(vm.exec("foo1fooo2"));
kak_assert(*vm.captures()[1] == '2');
}
{
TestVM<MatchDirection::Backward> vm{R"((?<=f)oo(b[ae]r)?(?=baz))"};
kak_assert(vm.exec("foobarbazfoobazfooberbaz", RegexExecFlags::Search));
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"((?<=f)oo(b[ae]r)?(?=baz))"};
kak_assert(vm.exec("foobarbazfoobazfooberbaz"));
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "oober");
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "ber");
}
{
TestVM<MatchDirection::Backward> vm{R"((baz|boz|foo|qux)(?<!baz)(?<!o))"};
kak_assert(vm.exec("quxbozfoobaz", RegexExecFlags::Search));
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"((baz|boz|foo|qux)(?<!baz)(?<!o))"};
kak_assert(vm.exec("quxbozfoobaz"));
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "boz");
}
{
TestVM<MatchDirection::Backward> vm{R"(foo)"};
kak_assert(vm.exec("foofoo", RegexExecFlags::Search));
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"(foo)"};
kak_assert(vm.exec("foofoo"));
kak_assert(*vm.captures()[1] == 0);
}
{
TestVM<MatchDirection::Backward> vm{R"($)"};
kak_assert(vm.exec("foo\nbar\nbaz\nqux", RegexExecFlags::Search | RegexExecFlags::NotEndOfLine));
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"($)"};
kak_assert(vm.exec("foo\nbar\nbaz\nqux", RegexExecFlags::NotEndOfLine));
kak_assert(StringView{vm.captures()[0]} == "\nqux");
kak_assert(vm.exec("foo\nbar\nbaz\nqux", RegexExecFlags::Search));
kak_assert(vm.exec("foo\nbar\nbaz\nqux", RegexExecFlags::None));
kak_assert(StringView{vm.captures()[0]} == "");
}
{
TestVM<MatchDirection::Backward> vm{R"(^)"};
kak_assert(not vm.exec("foo", RegexExecFlags::Search | RegexExecFlags::NotBeginOfLine));
kak_assert(vm.exec("foo", RegexExecFlags::Search));
kak_assert(vm.exec("foo\nbar", RegexExecFlags::Search));
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"(^)"};
kak_assert(not vm.exec("foo", RegexExecFlags::NotBeginOfLine));
kak_assert(vm.exec("foo", RegexExecFlags::None));
kak_assert(vm.exec("foo\nbar", RegexExecFlags::None));
kak_assert(StringView{vm.captures()[0]} == "bar");
}
{
TestVM<MatchDirection::Backward> vm{R"(\A\w+)"};
kak_assert(vm.exec("foo\nbar\nbaz", RegexExecFlags::Search));
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"(\A\w+)"};
kak_assert(vm.exec("foo\nbar\nbaz", RegexExecFlags::None));
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foo");
}
{
TestVM<MatchDirection::Backward> vm{R"(\b\w+\z)"};
kak_assert(vm.exec("foo\nbar\nbaz", RegexExecFlags::Search));
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"(\b\w+\z)"};
kak_assert(vm.exec("foo\nbar\nbaz", RegexExecFlags::None));
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "baz");
}
@ -1492,8 +1493,8 @@ auto test_regex = UnitTest{[]{
}
{
TestVM<> vm{R"(\b(?<!-)(a|b|)(?!-)\b)"};
kak_assert(vm.exec("# foo bar", RegexExecFlags::Search));
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"(\b(?<!-)(a|b|)(?!-)\b)"};
kak_assert(vm.exec("# foo bar", RegexExecFlags::None));
kak_assert(*vm.captures()[0] == '#');
}
@ -1503,19 +1504,19 @@ auto test_regex = UnitTest{[]{
}
{
TestVM<> vm{R"((?i)FOO)"};
kak_assert(vm.exec("foo", RegexExecFlags::Search));
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"((?i)FOO)"};
kak_assert(vm.exec("foo", RegexExecFlags::None));
}
{
TestVM<> vm{R"(.?(?=foo))"};
kak_assert(vm.exec("afoo", RegexExecFlags::Search));
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"(.?(?=foo))"};
kak_assert(vm.exec("afoo", RegexExecFlags::None));
kak_assert(*vm.captures()[0] == 'a');
}
{
TestVM<> vm{R"((?i)(?=Foo))"};
kak_assert(vm.exec("fOO", RegexExecFlags::Search));
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"((?i)(?=Foo))"};
kak_assert(vm.exec("fOO", RegexExecFlags::None));
kak_assert(*vm.captures()[0] == 'f');
}
@ -1530,8 +1531,8 @@ auto test_regex = UnitTest{[]{
}
{
TestVM<> vm{R"(д)"};
kak_assert(vm.exec("д", RegexExecFlags::Search));
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"(д)"};
kak_assert(vm.exec("д", RegexExecFlags::None));
}
{

View File

@ -17,12 +17,6 @@ struct regex_error : runtime_error
using runtime_error::runtime_error;
};
enum class MatchDirection
{
Forward,
Backward
};
enum class CharacterType : unsigned char
{
None = 0,
@ -148,25 +142,43 @@ CompiledRegex compile_regex(StringView re, RegexCompileFlags flags);
enum class RegexExecFlags
{
None = 0,
Search = 1 << 0,
NotBeginOfLine = 1 << 1,
NotEndOfLine = 1 << 2,
NotBeginOfWord = 1 << 3,
NotEndOfWord = 1 << 4,
NotInitialNull = 1 << 5,
AnyMatch = 1 << 6,
NoSaves = 1 << 7,
};
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
enum class RegexMode
{
Forward = 1 << 0,
Backward = 1 << 1,
Search = 1 << 2,
AnyMatch = 1 << 3,
NoSaves = 1 << 4,
};
constexpr bool with_bit_ops(Meta::Type<RegexMode>) { return true; }
constexpr bool has_direction(RegexMode mode)
{
return (bool)(mode & RegexMode::Forward) xor
(bool)(mode & RegexMode::Backward);
}
constexpr bool is_direction(RegexMode mode)
{
return has_direction(mode) and
(mode & ~(RegexMode::Forward | RegexMode::Backward)) == RegexMode{0};
}
template<typename It, typename=void>
struct SentinelType { using Type = It; };
template<typename It>
struct SentinelType<It, void_t<typename It::Sentinel>> { using Type = typename It::Sentinel; };
template<typename Iterator, MatchDirection direction>
template<typename Iterator, RegexMode mode>
class ThreadedRegexVM
{
public:
@ -174,7 +186,7 @@ public:
: m_program{program}
{
kak_assert((forward and program.first_backward_inst != 0) or
(direction == MatchDirection::Backward and program.first_backward_inst != -1));
(not forward and program.first_backward_inst != -1));
}
ThreadedRegexVM(const ThreadedRegexVM&) = delete;
@ -198,7 +210,7 @@ public:
if (flags & RegexExecFlags::NotInitialNull and begin == end)
return false;
const bool search = (flags & RegexExecFlags::Search);
constexpr bool search = (mode & RegexMode::Search);
ConstArrayView<CompiledRegex::Instruction> instructions{m_program.instructions};
if (forward)
@ -370,7 +382,7 @@ private:
}
case CompiledRegex::Save:
{
if (config.flags & RegexExecFlags::NoSaves)
if (mode & RegexMode::NoSaves)
break;
if (thread.saves < 0)
thread.saves = new_saves<false>(nullptr);
@ -418,25 +430,25 @@ private:
break;
case CompiledRegex::LookAhead:
case CompiledRegex::NegativeLookAhead:
if (lookaround<MatchDirection::Forward, false>(inst.param, pos, config) !=
if (lookaround<true, false>(inst.param, pos, config) !=
(inst.op == CompiledRegex::LookAhead))
return failed();
break;
case CompiledRegex::LookAhead_IgnoreCase:
case CompiledRegex::NegativeLookAhead_IgnoreCase:
if (lookaround<MatchDirection::Forward, true>(inst.param, pos, config) !=
if (lookaround<true, true>(inst.param, pos, config) !=
(inst.op == CompiledRegex::LookAhead_IgnoreCase))
return failed();
break;
case CompiledRegex::LookBehind:
case CompiledRegex::NegativeLookBehind:
if (lookaround<MatchDirection::Backward, false>(inst.param, pos, config) !=
if (lookaround<false, false>(inst.param, pos, config) !=
(inst.op == CompiledRegex::LookBehind))
return failed();
break;
case CompiledRegex::LookBehind_IgnoreCase:
case CompiledRegex::NegativeLookBehind_IgnoreCase:
if (lookaround<MatchDirection::Backward, true>(inst.param, pos, config) !=
if (lookaround<false, true>(inst.param, pos, config) !=
(inst.op == CompiledRegex::LookBehind_IgnoreCase))
return failed();
break;
@ -449,7 +461,7 @@ private:
m_find_next_start = true;
return;
case CompiledRegex::Match:
if ((pos != config.end and not (config.flags & RegexExecFlags::Search)) or
if ((pos != config.end and not (mode & RegexMode::Search)) or
(config.flags & RegexExecFlags::NotInitialNull and pos == config.begin))
return failed();
@ -476,7 +488,7 @@ private:
const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc;
const bool any_match = config.flags & RegexExecFlags::AnyMatch;
constexpr bool any_match = mode & RegexMode::AnyMatch;
uint16_t current_step = -1;
m_found_match = false;
while (true) // Iterate on all codepoints and once at the end
@ -527,12 +539,12 @@ private:
}
}
template<MatchDirection look_direction, bool ignore_case>
template<bool look_forward, bool ignore_case>
bool lookaround(uint32_t index, Iterator pos, const ExecConfig& config) const
{
using Lookaround = CompiledRegex::Lookaround;
if (look_direction == MatchDirection::Backward)
if (not look_forward)
{
if (pos == config.subject_begin)
return m_program.lookarounds[index] == Lookaround::EndOfLookaround;
@ -541,7 +553,7 @@ private:
for (auto it = m_program.lookarounds.begin() + index; *it != Lookaround::EndOfLookaround; ++it)
{
if (look_direction == MatchDirection::Forward and pos == config.subject_end)
if (look_forward and pos == config.subject_end)
return false;
Codepoint cp = utf8::codepoint(pos, config.subject_end);
@ -571,10 +583,10 @@ private:
else if (static_cast<Codepoint>(op) != cp)
return false;
if (look_direction == MatchDirection::Backward and pos == config.subject_begin)
if (not look_forward and pos == config.subject_begin)
return *++it == Lookaround::EndOfLookaround;
(look_direction == MatchDirection::Forward) ? utf8::to_next(pos, config.subject_end)
look_forward ? utf8::to_next(pos, config.subject_end)
: utf8::to_previous(pos, config.subject_begin);
}
return true;
@ -671,7 +683,8 @@ private:
int32_t m_next = 0;
};
static constexpr bool forward = direction == MatchDirection::Forward;
static_assert(has_direction(mode));
static constexpr bool forward = mode & RegexMode::Forward;
DualThreadStack m_threads;
Vector<Saves*, MemoryDomain::Regex> m_saves;

View File

@ -304,7 +304,7 @@ find_opening(Iterator pos, const Container& container,
res[0].second == pos)
pos = res[0].first;
using RegexIt = RegexIterator<Iterator, MatchDirection::Backward>;
using RegexIt = RegexIterator<Iterator, RegexMode::Backward>;
for (auto&& match : RegexIt{container.begin(), pos, container.begin(), container.end(), opening})
{
if (nestable)
@ -332,7 +332,7 @@ find_closing(Iterator pos, const Container& container,
res, opening) and res[0].first == pos)
pos = res[0].second;
using RegexIt = RegexIterator<Iterator, MatchDirection::Forward>;
using RegexIt = RegexIterator<Iterator, RegexMode::Forward>;
for (auto match : RegexIt{pos, container.end(), container.begin(), container.end(), closing})
{
if (nestable)
@ -895,14 +895,16 @@ static bool find_prev(const Buffer& buffer, const BufferIterator& pos,
RegexExecFlags::NotInitialNull);
}
template<MatchDirection direction>
template<RegexMode mode>
Selection find_next_match(const Context& context, const Selection& sel, const Regex& regex, bool& wrapped)
{
static_assert(is_direction(mode));
constexpr bool forward = mode & RegexMode::Forward;
auto& buffer = context.buffer();
MatchResults<BufferIterator> matches;
auto pos = buffer.iterator_at(direction == MatchDirection::Backward ? sel.min() : sel.max());
auto pos = buffer.iterator_at(forward ? sel.max() : sel.min());
wrapped = false;
const bool found = (direction == MatchDirection::Forward) ?
const bool found = forward ?
find_next(buffer, utf8::next(pos, buffer.end()), matches, regex, wrapped)
: find_prev(buffer, pos, matches, regex, wrapped);
@ -915,13 +917,13 @@ Selection find_next_match(const Context& context, const Selection& sel, const Re
auto begin = matches[0].first, end = matches[0].second;
end = (begin == end) ? end : utf8::previous(end, begin);
if (direction == MatchDirection::Backward)
if (not forward)
std::swap(begin, end);
return {begin.coord(), end.coord(), std::move(captures)};
}
template Selection find_next_match<MatchDirection::Forward>(const Context&, const Selection&, const Regex&, bool&);
template Selection find_next_match<MatchDirection::Backward>(const Context&, const Selection&, const Regex&, bool&);
template Selection find_next_match<RegexMode::Forward>(const Context&, const Selection&, const Regex&, bool&);
template Selection find_next_match<RegexMode::Backward>(const Context&, const Selection&, const Regex&, bool&);
void select_all_matches(SelectionList& selections, const Regex& regex, int capture)
{
@ -931,7 +933,7 @@ void select_all_matches(SelectionList& selections, const Regex& regex, int captu
Vector<Selection> result;
auto& buffer = selections.buffer();
ThreadedRegexVM<BufferIterator, MatchDirection::Forward> vm{*regex.impl()};
ThreadedRegexVM<BufferIterator, RegexMode::Forward | RegexMode::Search> vm{*regex.impl()};
for (auto& sel : selections)
{
auto sel_beg = buffer.iterator_at(sel.min());
@ -973,7 +975,7 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture
auto& buffer = selections.buffer();
auto buf_end = buffer.end();
auto buf_begin = buffer.begin();
ThreadedRegexVM<BufferIterator, MatchDirection::Forward> vm{*regex.impl()};
ThreadedRegexVM<BufferIterator, RegexMode::Forward | RegexMode::Search> vm{*regex.impl()};
for (auto& sel : selections)
{
auto begin = buffer.iterator_at(sel.min());

View File

@ -99,9 +99,9 @@ trim_partial_lines(const Context& context, const Selection& selection);
void select_buffer(SelectionList& selections);
enum class MatchDirection;
enum class RegexMode;
template<MatchDirection direction>
template<RegexMode mode>
Selection find_next_match(const Context& context, const Selection& sel,
const Regex& regex, bool& wrapped);