Split compile time regex flags from runtime ones
This commit is contained in:
parent
b76287ff99
commit
fd043435e5
|
@ -1646,7 +1646,7 @@ void insert_matches(const Buffer& buffer, RegexMatchList& matches, const Regex&
|
||||||
{
|
{
|
||||||
size_t pivot = matches.size();
|
size_t pivot = matches.size();
|
||||||
capture = capture and regex.mark_count() > 0;
|
capture = capture and regex.mark_count() > 0;
|
||||||
ThreadedRegexVM<const char*, MatchDirection::Forward> vm{*regex.impl()};
|
ThreadedRegexVM<const char*, RegexMode::Forward | RegexMode::Search> vm{*regex.impl()};
|
||||||
for (auto line = range.begin; line < range.end; ++line)
|
for (auto line = range.begin; line < range.end; ++line)
|
||||||
{
|
{
|
||||||
const StringView l = buffer[line];
|
const StringView l = buffer[line];
|
||||||
|
|
|
@ -723,15 +723,16 @@ void paste_all(Context& context, NormalParams params)
|
||||||
selections = std::move(result);
|
selections = std::move(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr RegexCompileFlags direction_flags(MatchDirection direction)
|
constexpr RegexCompileFlags direction_flags(RegexMode mode)
|
||||||
{
|
{
|
||||||
return (direction == MatchDirection::Forward) ?
|
return (mode & RegexMode::Forward) ?
|
||||||
RegexCompileFlags::None : RegexCompileFlags::Backward | RegexCompileFlags::NoForward;
|
RegexCompileFlags::None : RegexCompileFlags::Backward | RegexCompileFlags::NoForward;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<MatchDirection direction = MatchDirection::Forward, typename T>
|
template<RegexMode mode = RegexMode::Forward, typename T>
|
||||||
void regex_prompt(Context& context, String prompt, String default_regex, T func)
|
void regex_prompt(Context& context, String prompt, String default_regex, T func)
|
||||||
{
|
{
|
||||||
|
static_assert(is_direction(mode));
|
||||||
DisplayCoord position = context.has_window() ? context.window().position() : DisplayCoord{};
|
DisplayCoord position = context.has_window() ? context.window().position() : DisplayCoord{};
|
||||||
SelectionList selections = context.selections();
|
SelectionList selections = context.selections();
|
||||||
context.input_handler().prompt(
|
context.input_handler().prompt(
|
||||||
|
@ -785,7 +786,7 @@ void regex_prompt(Context& context, String prompt, String default_regex, T func)
|
||||||
context.push_jump();
|
context.push_jump();
|
||||||
|
|
||||||
if (not str.empty() or event == PromptEvent::Validate)
|
if (not str.empty() or event == PromptEvent::Validate)
|
||||||
func(Regex{str.empty() ? default_regex : str, direction_flags(direction)}, event, context);
|
func(Regex{str.empty() ? default_regex : str, direction_flags(mode)}, event, context);
|
||||||
}
|
}
|
||||||
catch (regex_error& err)
|
catch (regex_error& err)
|
||||||
{
|
{
|
||||||
|
@ -805,19 +806,19 @@ void regex_prompt(Context& context, String prompt, String default_regex, T func)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template<MatchDirection direction>
|
template<RegexMode mode>
|
||||||
void select_next_matches(Context& context, const Regex& regex, int count)
|
void select_next_matches(Context& context, const Regex& regex, int count)
|
||||||
{
|
{
|
||||||
auto& selections = context.selections();
|
auto& selections = context.selections();
|
||||||
do {
|
do {
|
||||||
bool wrapped = false;
|
bool wrapped = false;
|
||||||
for (auto& sel : selections)
|
for (auto& sel : selections)
|
||||||
sel = keep_direction(find_next_match<direction>(context, sel, regex, wrapped), sel);
|
sel = keep_direction(find_next_match<mode>(context, sel, regex, wrapped), sel);
|
||||||
selections.sort_and_merge_overlapping();
|
selections.sort_and_merge_overlapping();
|
||||||
} while (--count > 0);
|
} while (--count > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<MatchDirection direction>
|
template<RegexMode mode>
|
||||||
void extend_to_next_matches(Context& context, const Regex& regex, int count)
|
void extend_to_next_matches(Context& context, const Regex& regex, int count)
|
||||||
{
|
{
|
||||||
Vector<Selection> new_sels;
|
Vector<Selection> new_sels;
|
||||||
|
@ -827,7 +828,7 @@ void extend_to_next_matches(Context& context, const Regex& regex, int count)
|
||||||
size_t main_index = selections.main_index();
|
size_t main_index = selections.main_index();
|
||||||
for (auto& sel : selections)
|
for (auto& sel : selections)
|
||||||
{
|
{
|
||||||
auto new_sel = find_next_match<direction>(context, sel, regex, wrapped);
|
auto new_sel = find_next_match<mode>(context, sel, regex, wrapped);
|
||||||
if (not wrapped)
|
if (not wrapped)
|
||||||
{
|
{
|
||||||
new_sels.push_back(sel);
|
new_sels.push_back(sel);
|
||||||
|
@ -844,12 +845,13 @@ void extend_to_next_matches(Context& context, const Regex& regex, int count)
|
||||||
} while (--count > 0);
|
} while (--count > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<SelectMode mode, MatchDirection direction>
|
template<SelectMode mode, RegexMode regex_mode>
|
||||||
void search(Context& context, NormalParams params)
|
void search(Context& context, NormalParams params)
|
||||||
{
|
{
|
||||||
|
static_assert(is_direction(regex_mode));
|
||||||
constexpr StringView prompt = mode == SelectMode::Extend ?
|
constexpr StringView prompt = mode == SelectMode::Extend ?
|
||||||
(direction == MatchDirection::Forward ? "search (extend):" : "reverse search (extend):")
|
(regex_mode & RegexMode::Forward ? "search (extend):" : "reverse search (extend):")
|
||||||
: (direction == MatchDirection::Forward ? "search:" : "reverse search:");
|
: (regex_mode & RegexMode::Forward ? "search:" : "reverse search:");
|
||||||
|
|
||||||
const char reg = to_lower(params.reg ? params.reg : '/');
|
const char reg = to_lower(params.reg ? params.reg : '/');
|
||||||
const int count = params.count;
|
const int count = params.count;
|
||||||
|
@ -858,7 +860,7 @@ void search(Context& context, NormalParams params)
|
||||||
Vector<String> saved_reg{reg_content.begin(), reg_content.end()};
|
Vector<String> saved_reg{reg_content.begin(), reg_content.end()};
|
||||||
const int main_index = std::min(context.selections().main_index(), saved_reg.size()-1);
|
const int main_index = std::min(context.selections().main_index(), saved_reg.size()-1);
|
||||||
|
|
||||||
regex_prompt<direction>(context, prompt.str(), saved_reg[main_index],
|
regex_prompt<regex_mode>(context, prompt.str(), saved_reg[main_index],
|
||||||
[reg, count, saved_reg]
|
[reg, count, saved_reg]
|
||||||
(const Regex& regex, PromptEvent event, Context& context) {
|
(const Regex& regex, PromptEvent event, Context& context) {
|
||||||
if (event == PromptEvent::Abort)
|
if (event == PromptEvent::Abort)
|
||||||
|
@ -872,20 +874,20 @@ void search(Context& context, NormalParams params)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (mode == SelectMode::Extend)
|
if (mode == SelectMode::Extend)
|
||||||
extend_to_next_matches<direction>(context, regex, count);
|
extend_to_next_matches<regex_mode>(context, regex, count);
|
||||||
else
|
else
|
||||||
select_next_matches<direction>(context, regex, count);
|
select_next_matches<regex_mode>(context, regex, count);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template<SelectMode mode, MatchDirection direction>
|
template<SelectMode mode, RegexMode regex_mode>
|
||||||
void search_next(Context& context, NormalParams params)
|
void search_next(Context& context, NormalParams params)
|
||||||
{
|
{
|
||||||
const char reg = to_lower(params.reg ? params.reg : '/');
|
const char reg = to_lower(params.reg ? params.reg : '/');
|
||||||
StringView str = context.main_sel_register_value(reg);
|
StringView str = context.main_sel_register_value(reg);
|
||||||
if (not str.empty())
|
if (not str.empty())
|
||||||
{
|
{
|
||||||
Regex regex{str, direction_flags(direction)};
|
Regex regex{str, direction_flags(regex_mode)};
|
||||||
auto& selections = context.selections();
|
auto& selections = context.selections();
|
||||||
bool main_wrapped = false;
|
bool main_wrapped = false;
|
||||||
do {
|
do {
|
||||||
|
@ -893,12 +895,12 @@ void search_next(Context& context, NormalParams params)
|
||||||
if (mode == SelectMode::Replace)
|
if (mode == SelectMode::Replace)
|
||||||
{
|
{
|
||||||
auto& sel = selections.main();
|
auto& sel = selections.main();
|
||||||
sel = keep_direction(find_next_match<direction>(context, sel, regex, wrapped), sel);
|
sel = keep_direction(find_next_match<regex_mode>(context, sel, regex, wrapped), sel);
|
||||||
}
|
}
|
||||||
else if (mode == SelectMode::Append)
|
else if (mode == SelectMode::Append)
|
||||||
{
|
{
|
||||||
auto sel = keep_direction(
|
auto sel = keep_direction(
|
||||||
find_next_match<direction>(context, selections.main(), regex, wrapped),
|
find_next_match<regex_mode>(context, selections.main(), regex, wrapped),
|
||||||
selections.main());
|
selections.main());
|
||||||
selections.push_back(std::move(sel));
|
selections.push_back(std::move(sel));
|
||||||
selections.set_main_index(selections.size() - 1);
|
selections.set_main_index(selections.size() - 1);
|
||||||
|
@ -1101,8 +1103,7 @@ void keep(Context& context, NormalParams params)
|
||||||
// give more intuitive behaviours in keep use cases.
|
// give more intuitive behaviours in keep use cases.
|
||||||
const auto flags = match_flags(is_bol(begin.coord()), false,
|
const auto flags = match_flags(is_bol(begin.coord()), false,
|
||||||
is_bow(buffer, begin.coord()),
|
is_bow(buffer, begin.coord()),
|
||||||
is_eow(buffer, end.coord())) |
|
is_eow(buffer, end.coord()));
|
||||||
RegexExecFlags::AnyMatch;
|
|
||||||
if (regex_search(begin, end, begin, end, regex, flags) == matching)
|
if (regex_search(begin, end, begin, end, regex, flags) == matching)
|
||||||
keep.push_back(sel);
|
keep.push_back(sel);
|
||||||
}
|
}
|
||||||
|
@ -2235,14 +2236,14 @@ static const HashMap<Key, NormalCmd, MemoryDomain::Undefined, KeymapBackend> key
|
||||||
{ {'M'}, {"extend to matching character", select<SelectMode::Extend, select_matching<true>>} },
|
{ {'M'}, {"extend to matching character", select<SelectMode::Extend, select_matching<true>>} },
|
||||||
{ {alt('M')}, {"backward extend to matching character", select<SelectMode::Extend, select_matching<false>>} },
|
{ {alt('M')}, {"backward extend to matching character", select<SelectMode::Extend, select_matching<false>>} },
|
||||||
|
|
||||||
{ {'/'}, {"select next given regex match", search<SelectMode::Replace, MatchDirection::Forward>} },
|
{ {'/'}, {"select next given regex match", search<SelectMode::Replace, RegexMode::Forward>} },
|
||||||
{ {'?'}, {"extend with next given regex match", search<SelectMode::Extend, MatchDirection::Forward>} },
|
{ {'?'}, {"extend with next given regex match", search<SelectMode::Extend, RegexMode::Forward>} },
|
||||||
{ {alt('/')}, {"select previous given regex match", search<SelectMode::Replace, MatchDirection::Backward>} },
|
{ {alt('/')}, {"select previous given regex match", search<SelectMode::Replace, RegexMode::Backward>} },
|
||||||
{ {alt('?')}, {"extend with previous given regex match", search<SelectMode::Extend, MatchDirection::Backward>} },
|
{ {alt('?')}, {"extend with previous given regex match", search<SelectMode::Extend, RegexMode::Backward>} },
|
||||||
{ {'n'}, {"select next current search pattern match", search_next<SelectMode::Replace, MatchDirection::Forward>} },
|
{ {'n'}, {"select next current search pattern match", search_next<SelectMode::Replace, RegexMode::Forward>} },
|
||||||
{ {'N'}, {"extend with next current search pattern match", search_next<SelectMode::Append, MatchDirection::Forward>} },
|
{ {'N'}, {"extend with next current search pattern match", search_next<SelectMode::Append, RegexMode::Forward>} },
|
||||||
{ {alt('n')}, {"select previous current search pattern match", search_next<SelectMode::Replace, MatchDirection::Backward>} },
|
{ {alt('n')}, {"select previous current search pattern match", search_next<SelectMode::Replace, RegexMode::Backward>} },
|
||||||
{ {alt('N')}, {"extend with previous current search pattern match", search_next<SelectMode::Append, MatchDirection::Backward>} },
|
{ {alt('N')}, {"extend with previous current search pattern match", search_next<SelectMode::Append, RegexMode::Backward>} },
|
||||||
{ {'*'}, {"set search pattern to main selection content", use_selection_as_search_pattern<true>} },
|
{ {'*'}, {"set search pattern to main selection content", use_selection_as_search_pattern<true>} },
|
||||||
{ {alt('*')}, {"set search pattern to main selection content, do not detect words", use_selection_as_search_pattern<false>} },
|
{ {alt('*')}, {"set search pattern to main selection content, do not detect words", use_selection_as_search_pattern<false>} },
|
||||||
|
|
||||||
|
|
33
src/regex.hh
33
src/regex.hh
|
@ -112,15 +112,15 @@ inline RegexExecFlags match_flags(bool bol, bool eol, bool bow, bool eow)
|
||||||
template<typename It>
|
template<typename It>
|
||||||
bool regex_match(It begin, It end, const Regex& re)
|
bool regex_match(It begin, It end, const Regex& re)
|
||||||
{
|
{
|
||||||
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
ThreadedRegexVM<It, RegexMode::Forward | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()};
|
||||||
return vm.exec(begin, end, begin, end, RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
|
return vm.exec(begin, end, begin, end, RegexExecFlags::None);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename It>
|
template<typename It>
|
||||||
bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
|
bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
|
||||||
{
|
{
|
||||||
res.values().clear();
|
res.values().clear();
|
||||||
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
ThreadedRegexVM<It, RegexMode::Forward> vm{*re.impl()};
|
||||||
if (vm.exec(begin, end, begin, end, RegexExecFlags::None))
|
if (vm.exec(begin, end, begin, end, RegexExecFlags::None))
|
||||||
{
|
{
|
||||||
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
|
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
|
||||||
|
@ -133,19 +133,18 @@ template<typename It>
|
||||||
bool regex_search(It begin, It end, It subject_begin, It subject_end, const Regex& re,
|
bool regex_search(It begin, It end, It subject_begin, It subject_end, const Regex& re,
|
||||||
RegexExecFlags flags = RegexExecFlags::None)
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
{
|
{
|
||||||
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
ThreadedRegexVM<It, RegexMode::Forward | RegexMode::Search | RegexMode::AnyMatch | RegexMode::NoSaves> vm{*re.impl()};
|
||||||
return vm.exec(begin, end, subject_begin, subject_end,
|
return vm.exec(begin, end, subject_begin, subject_end, flags);
|
||||||
flags | RegexExecFlags::Search | RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename It, MatchDirection direction = MatchDirection::Forward>
|
template<typename It, RegexMode mode = RegexMode::Forward>
|
||||||
bool regex_search(It begin, It end, It subject_begin, It subject_end,
|
bool regex_search(It begin, It end, It subject_begin, It subject_end,
|
||||||
MatchResults<It>& res, const Regex& re,
|
MatchResults<It>& res, const Regex& re,
|
||||||
RegexExecFlags flags = RegexExecFlags::None)
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
{
|
{
|
||||||
res.values().clear();
|
res.values().clear();
|
||||||
ThreadedRegexVM<It, direction> vm{*re.impl()};
|
ThreadedRegexVM<It, mode | RegexMode::Search> vm{*re.impl()};
|
||||||
if (vm.exec(begin, end, subject_begin, subject_end, flags | RegexExecFlags::Search))
|
if (vm.exec(begin, end, subject_begin, subject_end, flags))
|
||||||
{
|
{
|
||||||
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
|
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
|
||||||
return true;
|
return true;
|
||||||
|
@ -158,16 +157,18 @@ bool backward_regex_search(It begin, It end, It subject_begin, It subject_end,
|
||||||
MatchResults<It>& res, const Regex& re,
|
MatchResults<It>& res, const Regex& re,
|
||||||
RegexExecFlags flags = RegexExecFlags::None)
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
{
|
{
|
||||||
return regex_search<It, MatchDirection::Backward>(begin, end, subject_begin, subject_end, res, re, flags);
|
return regex_search<It, RegexMode::Backward>(begin, end, subject_begin, subject_end, res, re, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
String option_to_string(const Regex& re);
|
String option_to_string(const Regex& re);
|
||||||
Regex option_from_string(Meta::Type<Regex>, StringView str);
|
Regex option_from_string(Meta::Type<Regex>, StringView str);
|
||||||
|
|
||||||
template<typename Iterator, MatchDirection direction = MatchDirection::Forward,
|
template<typename Iterator, RegexMode mode = RegexMode::Forward,
|
||||||
typename VmArg = const Regex>
|
typename VmArg = const Regex>
|
||||||
struct RegexIterator
|
struct RegexIterator
|
||||||
{
|
{
|
||||||
|
static_assert(has_direction(mode));
|
||||||
|
static constexpr bool forward = mode & RegexMode::Forward;
|
||||||
using ValueType = MatchResults<Iterator>;
|
using ValueType = MatchResults<Iterator>;
|
||||||
struct Sentinel{};
|
struct Sentinel{};
|
||||||
struct It
|
struct It
|
||||||
|
@ -188,7 +189,7 @@ struct RegexIterator
|
||||||
RegexIterator(Iterator begin, Iterator end,
|
RegexIterator(Iterator begin, Iterator end,
|
||||||
Iterator subject_begin, Iterator subject_end,
|
Iterator subject_begin, Iterator subject_end,
|
||||||
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None)
|
VmArg& vm_arg, RegexExecFlags flags = RegexExecFlags::None)
|
||||||
: m_vm{make_vm(vm_arg)}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
|
: m_vm{make_vm(vm_arg)}, m_next_pos{forward ? begin : end},
|
||||||
m_begin{std::move(begin)}, m_end{std::move(end)},
|
m_begin{std::move(begin)}, m_end{std::move(end)},
|
||||||
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
|
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
|
||||||
m_flags{flags} {}
|
m_flags{flags} {}
|
||||||
|
@ -203,23 +204,21 @@ struct RegexIterator
|
||||||
private:
|
private:
|
||||||
bool next()
|
bool next()
|
||||||
{
|
{
|
||||||
auto additional_flags = RegexExecFlags::Search;
|
auto additional_flags = RegexExecFlags::None;
|
||||||
if (m_results.size() and m_results[0].first == m_results[0].second)
|
if (m_results.size() and m_results[0].first == m_results[0].second)
|
||||||
additional_flags |= RegexExecFlags::NotInitialNull;
|
additional_flags |= RegexExecFlags::NotInitialNull;
|
||||||
|
|
||||||
constexpr bool forward = direction == MatchDirection::Forward;
|
|
||||||
|
|
||||||
if (not m_vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
|
if (not m_vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
|
||||||
m_subject_begin, m_subject_end, m_flags | additional_flags))
|
m_subject_begin, m_subject_end, m_flags | additional_flags))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
m_results.values().clear();
|
m_results.values().clear();
|
||||||
std::move(m_vm.captures().begin(), m_vm.captures().end(), std::back_inserter(m_results.values()));
|
std::move(m_vm.captures().begin(), m_vm.captures().end(), std::back_inserter(m_results.values()));
|
||||||
m_next_pos = (direction == MatchDirection::Forward) ? m_results[0].second : m_results[0].first;
|
m_next_pos = forward ? m_results[0].second : m_results[0].first;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
using RegexVM = ThreadedRegexVM<Iterator, direction>;
|
using RegexVM = ThreadedRegexVM<Iterator, mode | RegexMode::Search>;
|
||||||
static RegexVM& make_vm(RegexVM& vm) { return vm; }
|
static RegexVM& make_vm(RegexVM& vm) { return vm; }
|
||||||
static RegexVM make_vm(const Regex& regex) { return {*regex.impl()}; }
|
static RegexVM make_vm(const Regex& regex) { return {*regex.impl()}; }
|
||||||
|
|
||||||
|
|
|
@ -92,14 +92,15 @@ struct ParsedRegex
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
|
||||||
template<MatchDirection direction = MatchDirection::Forward>
|
template<RegexMode mode = RegexMode::Forward>
|
||||||
struct Children
|
struct Children
|
||||||
{
|
{
|
||||||
|
static_assert(has_direction(mode));
|
||||||
using Index = ParsedRegex::NodeIndex;
|
using Index = ParsedRegex::NodeIndex;
|
||||||
struct Sentinel {};
|
struct Sentinel {};
|
||||||
struct Iterator
|
struct Iterator
|
||||||
{
|
{
|
||||||
static constexpr bool forward = direction == MatchDirection::Forward;
|
static constexpr bool forward = mode & RegexMode::Forward;
|
||||||
Iterator(ArrayView<const ParsedRegex::Node> nodes, Index index)
|
Iterator(ArrayView<const ParsedRegex::Node> nodes, Index index)
|
||||||
: m_nodes{nodes},
|
: m_nodes{nodes},
|
||||||
m_pos(forward ? index+1 : find_prev(index, nodes[index].children_end)),
|
m_pos(forward ? index+1 : find_prev(index, nodes[index].children_end)),
|
||||||
|
@ -681,18 +682,18 @@ struct RegexCompiler
|
||||||
|
|
||||||
if (not (flags & RegexCompileFlags::NoForward))
|
if (not (flags & RegexCompileFlags::NoForward))
|
||||||
{
|
{
|
||||||
m_program.forward_start_desc = compute_start_desc<MatchDirection::Forward>();
|
m_program.forward_start_desc = compute_start_desc<RegexMode::Forward>();
|
||||||
write_search_prefix();
|
write_search_prefix();
|
||||||
compile_node<MatchDirection::Forward>(0);
|
compile_node<RegexMode::Forward>(0);
|
||||||
push_inst(CompiledRegex::Match);
|
push_inst(CompiledRegex::Match);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & RegexCompileFlags::Backward)
|
if (flags & RegexCompileFlags::Backward)
|
||||||
{
|
{
|
||||||
m_program.first_backward_inst = m_program.instructions.size();
|
m_program.first_backward_inst = m_program.instructions.size();
|
||||||
m_program.backward_start_desc = compute_start_desc<MatchDirection::Backward>();
|
m_program.backward_start_desc = compute_start_desc<RegexMode::Backward>();
|
||||||
write_search_prefix();
|
write_search_prefix();
|
||||||
compile_node<MatchDirection::Backward>(0);
|
compile_node<RegexMode::Backward>(0);
|
||||||
push_inst(CompiledRegex::Match);
|
push_inst(CompiledRegex::Match);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -707,7 +708,7 @@ struct RegexCompiler
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
template<MatchDirection direction>
|
template<RegexMode direction>
|
||||||
uint32_t compile_node_inner(ParsedRegex::NodeIndex index)
|
uint32_t compile_node_inner(ParsedRegex::NodeIndex index)
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
|
@ -717,7 +718,7 @@ private:
|
||||||
|
|
||||||
const bool save = (node.op == ParsedRegex::Alternation or node.op == ParsedRegex::Sequence) and
|
const bool save = (node.op == ParsedRegex::Alternation or node.op == ParsedRegex::Sequence) and
|
||||||
(node.value == 0 or (node.value != -1 and not (m_flags & RegexCompileFlags::NoSubs)));
|
(node.value == 0 or (node.value != -1 and not (m_flags & RegexCompileFlags::NoSubs)));
|
||||||
constexpr bool forward = direction == MatchDirection::Forward;
|
constexpr bool forward = direction == RegexMode::Forward;
|
||||||
if (save)
|
if (save)
|
||||||
push_inst(CompiledRegex::Save, node.value * 2 + (forward ? 0 : 1));
|
push_inst(CompiledRegex::Save, node.value * 2 + (forward ? 0 : 1));
|
||||||
|
|
||||||
|
@ -774,22 +775,22 @@ private:
|
||||||
case ParsedRegex::LookAhead:
|
case ParsedRegex::LookAhead:
|
||||||
push_inst(ignore_case ? CompiledRegex::LookAhead_IgnoreCase
|
push_inst(ignore_case ? CompiledRegex::LookAhead_IgnoreCase
|
||||||
: CompiledRegex::LookAhead,
|
: CompiledRegex::LookAhead,
|
||||||
push_lookaround<MatchDirection::Forward>(index, ignore_case));
|
push_lookaround<RegexMode::Forward>(index, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::NegativeLookAhead:
|
case ParsedRegex::NegativeLookAhead:
|
||||||
push_inst(ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
|
push_inst(ignore_case ? CompiledRegex::NegativeLookAhead_IgnoreCase
|
||||||
: CompiledRegex::NegativeLookAhead,
|
: CompiledRegex::NegativeLookAhead,
|
||||||
push_lookaround<MatchDirection::Forward>(index, ignore_case));
|
push_lookaround<RegexMode::Forward>(index, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::LookBehind:
|
case ParsedRegex::LookBehind:
|
||||||
push_inst(ignore_case ? CompiledRegex::LookBehind_IgnoreCase
|
push_inst(ignore_case ? CompiledRegex::LookBehind_IgnoreCase
|
||||||
: CompiledRegex::LookBehind,
|
: CompiledRegex::LookBehind,
|
||||||
push_lookaround<MatchDirection::Backward>(index, ignore_case));
|
push_lookaround<RegexMode::Backward>(index, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::NegativeLookBehind:
|
case ParsedRegex::NegativeLookBehind:
|
||||||
push_inst(ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
|
push_inst(ignore_case ? CompiledRegex::NegativeLookBehind_IgnoreCase
|
||||||
: CompiledRegex::NegativeLookBehind,
|
: CompiledRegex::NegativeLookBehind,
|
||||||
push_lookaround<MatchDirection::Backward>(index, ignore_case));
|
push_lookaround<RegexMode::Backward>(index, ignore_case));
|
||||||
break;
|
break;
|
||||||
case ParsedRegex::LineStart:
|
case ParsedRegex::LineStart:
|
||||||
push_inst(CompiledRegex::LineStart);
|
push_inst(CompiledRegex::LineStart);
|
||||||
|
@ -823,7 +824,7 @@ private:
|
||||||
return start_pos;
|
return start_pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<MatchDirection direction>
|
template<RegexMode direction>
|
||||||
uint32_t compile_node(ParsedRegex::NodeIndex index)
|
uint32_t compile_node(ParsedRegex::NodeIndex index)
|
||||||
{
|
{
|
||||||
auto& node = get_node(index);
|
auto& node = get_node(index);
|
||||||
|
@ -885,7 +886,7 @@ private:
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<MatchDirection direction>
|
template<RegexMode direction>
|
||||||
uint32_t push_lookaround(ParsedRegex::NodeIndex index, bool ignore_case)
|
uint32_t push_lookaround(ParsedRegex::NodeIndex index, bool ignore_case)
|
||||||
{
|
{
|
||||||
using Lookaround = CompiledRegex::Lookaround;
|
using Lookaround = CompiledRegex::Lookaround;
|
||||||
|
@ -915,7 +916,7 @@ private:
|
||||||
// Mutate start_desc with informations on which Codepoint could start a match.
|
// Mutate start_desc with informations on which Codepoint could start a match.
|
||||||
// Returns true if the node possibly does not consume the char, in which case
|
// Returns true if the node possibly does not consume the char, in which case
|
||||||
// the next node would still be relevant for the parent node start chars computation.
|
// the next node would still be relevant for the parent node start chars computation.
|
||||||
template<MatchDirection direction>
|
template<RegexMode direction>
|
||||||
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
bool compute_start_desc(ParsedRegex::NodeIndex index,
|
||||||
CompiledRegex::StartDesc& start_desc) const
|
CompiledRegex::StartDesc& start_desc) const
|
||||||
{
|
{
|
||||||
|
@ -1018,7 +1019,7 @@ private:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<MatchDirection direction>
|
template<RegexMode direction>
|
||||||
[[gnu::noinline]]
|
[[gnu::noinline]]
|
||||||
std::unique_ptr<CompiledRegex::StartDesc> compute_start_desc() const
|
std::unique_ptr<CompiledRegex::StartDesc> compute_start_desc() const
|
||||||
{
|
{
|
||||||
|
@ -1199,18 +1200,18 @@ bool is_ctype(CharacterType ctype, Codepoint cp)
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template<MatchDirection dir = MatchDirection::Forward>
|
template<RegexMode mode = RegexMode::Forward>
|
||||||
struct TestVM : CompiledRegex, ThreadedRegexVM<const char*, dir>
|
struct TestVM : CompiledRegex, ThreadedRegexVM<const char*, mode>
|
||||||
{
|
{
|
||||||
using VMType = ThreadedRegexVM<const char*, dir>;
|
using VMType = ThreadedRegexVM<const char*, mode>;
|
||||||
|
|
||||||
TestVM(StringView re, bool dump = false)
|
TestVM(StringView re, bool dump = false)
|
||||||
: CompiledRegex{compile_regex(re, dir == MatchDirection::Forward ?
|
: CompiledRegex{compile_regex(re, mode & RegexMode::Forward ?
|
||||||
RegexCompileFlags::None : RegexCompileFlags::Backward)},
|
RegexCompileFlags::None : RegexCompileFlags::Backward)},
|
||||||
VMType{(const CompiledRegex&)*this}
|
VMType{(const CompiledRegex&)*this}
|
||||||
{ if (dump) puts(dump_regex(*this).c_str()); }
|
{ if (dump) puts(dump_regex(*this).c_str()); }
|
||||||
|
|
||||||
bool exec(StringView re, RegexExecFlags flags = RegexExecFlags::AnyMatch)
|
bool exec(StringView re, RegexExecFlags flags = RegexExecFlags::None)
|
||||||
{
|
{
|
||||||
return VMType::exec(re.begin(), re.end(), re.begin(), re.end(), flags);
|
return VMType::exec(re.begin(), re.end(), re.begin(), re.end(), flags);
|
||||||
}
|
}
|
||||||
|
@ -1294,11 +1295,11 @@ auto test_regex = UnitTest{[]{
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"(f.*a(.*o))"};
|
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"(f.*a(.*o))"};
|
||||||
kak_assert(vm.exec("blahfoobarfoobaz", RegexExecFlags::Search));
|
kak_assert(vm.exec("blahfoobarfoobaz"));
|
||||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foobarfoo");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foobarfoo");
|
||||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "rfoo");
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "rfoo");
|
||||||
kak_assert(vm.exec("mais que fais la police", RegexExecFlags::Search));
|
kak_assert(vm.exec("mais que fais la police"));
|
||||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "fais la po");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "fais la po");
|
||||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == " po");
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == " po");
|
||||||
}
|
}
|
||||||
|
@ -1358,21 +1359,21 @@ auto test_regex = UnitTest{[]{
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"(foo\Kbar)"};
|
TestVM<RegexMode::Forward> vm{R"(foo\Kbar)"};
|
||||||
kak_assert(vm.exec("foobar", RegexExecFlags::None));
|
kak_assert(vm.exec("foobar"));
|
||||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "bar");
|
||||||
kak_assert(not vm.exec("bar", RegexExecFlags::None));
|
kak_assert(not vm.exec("bar"));
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"((fo+?).*)"};
|
TestVM<RegexMode::Forward> vm{R"((fo+?).*)"};
|
||||||
kak_assert(vm.exec("foooo", RegexExecFlags::None));
|
kak_assert(vm.exec("foooo"));
|
||||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "fo");
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "fo");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"((?=fo[\w]).)"};
|
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"((?=fo[\w]).)"};
|
||||||
kak_assert(vm.exec("barfoo", RegexExecFlags::Search));
|
kak_assert(vm.exec("barfoo"));
|
||||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "f");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1423,66 +1424,66 @@ auto test_regex = UnitTest{[]{
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"((?<!\\)(?:\\\\)*")"};
|
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"((?<!\\)(?:\\\\)*")"};
|
||||||
kak_assert(vm.exec("foo\"", RegexExecFlags::Search));
|
kak_assert(vm.exec("foo\""));
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"($)"};
|
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"($)"};
|
||||||
kak_assert(vm.exec("foo\n", RegexExecFlags::Search));
|
kak_assert(vm.exec("foo\n"));
|
||||||
kak_assert(*vm.captures()[0] == '\n');
|
kak_assert(*vm.captures()[0] == '\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<MatchDirection::Backward> vm{R"(fo{1,})"};
|
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"(fo{1,})"};
|
||||||
kak_assert(vm.exec("foo1fooo2", RegexExecFlags::Search));
|
kak_assert(vm.exec("foo1fooo2"));
|
||||||
kak_assert(*vm.captures()[1] == '2');
|
kak_assert(*vm.captures()[1] == '2');
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<MatchDirection::Backward> vm{R"((?<=f)oo(b[ae]r)?(?=baz))"};
|
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"((?<=f)oo(b[ae]r)?(?=baz))"};
|
||||||
kak_assert(vm.exec("foobarbazfoobazfooberbaz", RegexExecFlags::Search));
|
kak_assert(vm.exec("foobarbazfoobazfooberbaz"));
|
||||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "oober");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "oober");
|
||||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "ber");
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "ber");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<MatchDirection::Backward> vm{R"((baz|boz|foo|qux)(?<!baz)(?<!o))"};
|
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"((baz|boz|foo|qux)(?<!baz)(?<!o))"};
|
||||||
kak_assert(vm.exec("quxbozfoobaz", RegexExecFlags::Search));
|
kak_assert(vm.exec("quxbozfoobaz"));
|
||||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "boz");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "boz");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<MatchDirection::Backward> vm{R"(foo)"};
|
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"(foo)"};
|
||||||
kak_assert(vm.exec("foofoo", RegexExecFlags::Search));
|
kak_assert(vm.exec("foofoo"));
|
||||||
kak_assert(*vm.captures()[1] == 0);
|
kak_assert(*vm.captures()[1] == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<MatchDirection::Backward> vm{R"($)"};
|
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"($)"};
|
||||||
kak_assert(vm.exec("foo\nbar\nbaz\nqux", RegexExecFlags::Search | RegexExecFlags::NotEndOfLine));
|
kak_assert(vm.exec("foo\nbar\nbaz\nqux", RegexExecFlags::NotEndOfLine));
|
||||||
kak_assert(StringView{vm.captures()[0]} == "\nqux");
|
kak_assert(StringView{vm.captures()[0]} == "\nqux");
|
||||||
kak_assert(vm.exec("foo\nbar\nbaz\nqux", RegexExecFlags::Search));
|
kak_assert(vm.exec("foo\nbar\nbaz\nqux", RegexExecFlags::None));
|
||||||
kak_assert(StringView{vm.captures()[0]} == "");
|
kak_assert(StringView{vm.captures()[0]} == "");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<MatchDirection::Backward> vm{R"(^)"};
|
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"(^)"};
|
||||||
kak_assert(not vm.exec("foo", RegexExecFlags::Search | RegexExecFlags::NotBeginOfLine));
|
kak_assert(not vm.exec("foo", RegexExecFlags::NotBeginOfLine));
|
||||||
kak_assert(vm.exec("foo", RegexExecFlags::Search));
|
kak_assert(vm.exec("foo", RegexExecFlags::None));
|
||||||
kak_assert(vm.exec("foo\nbar", RegexExecFlags::Search));
|
kak_assert(vm.exec("foo\nbar", RegexExecFlags::None));
|
||||||
kak_assert(StringView{vm.captures()[0]} == "bar");
|
kak_assert(StringView{vm.captures()[0]} == "bar");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<MatchDirection::Backward> vm{R"(\A\w+)"};
|
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"(\A\w+)"};
|
||||||
kak_assert(vm.exec("foo\nbar\nbaz", RegexExecFlags::Search));
|
kak_assert(vm.exec("foo\nbar\nbaz", RegexExecFlags::None));
|
||||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foo");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "foo");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<MatchDirection::Backward> vm{R"(\b\w+\z)"};
|
TestVM<RegexMode::Backward | RegexMode::Search> vm{R"(\b\w+\z)"};
|
||||||
kak_assert(vm.exec("foo\nbar\nbaz", RegexExecFlags::Search));
|
kak_assert(vm.exec("foo\nbar\nbaz", RegexExecFlags::None));
|
||||||
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "baz");
|
kak_assert(StringView{vm.captures()[0], vm.captures()[1]} == "baz");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1492,8 +1493,8 @@ auto test_regex = UnitTest{[]{
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"(\b(?<!-)(a|b|)(?!-)\b)"};
|
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"(\b(?<!-)(a|b|)(?!-)\b)"};
|
||||||
kak_assert(vm.exec("# foo bar", RegexExecFlags::Search));
|
kak_assert(vm.exec("# foo bar", RegexExecFlags::None));
|
||||||
kak_assert(*vm.captures()[0] == '#');
|
kak_assert(*vm.captures()[0] == '#');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1503,19 +1504,19 @@ auto test_regex = UnitTest{[]{
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"((?i)FOO)"};
|
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"((?i)FOO)"};
|
||||||
kak_assert(vm.exec("foo", RegexExecFlags::Search));
|
kak_assert(vm.exec("foo", RegexExecFlags::None));
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"(.?(?=foo))"};
|
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"(.?(?=foo))"};
|
||||||
kak_assert(vm.exec("afoo", RegexExecFlags::Search));
|
kak_assert(vm.exec("afoo", RegexExecFlags::None));
|
||||||
kak_assert(*vm.captures()[0] == 'a');
|
kak_assert(*vm.captures()[0] == 'a');
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"((?i)(?=Foo))"};
|
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"((?i)(?=Foo))"};
|
||||||
kak_assert(vm.exec("fOO", RegexExecFlags::Search));
|
kak_assert(vm.exec("fOO", RegexExecFlags::None));
|
||||||
kak_assert(*vm.captures()[0] == 'f');
|
kak_assert(*vm.captures()[0] == 'f');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1530,8 +1531,8 @@ auto test_regex = UnitTest{[]{
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
TestVM<> vm{R"(д)"};
|
TestVM<RegexMode::Forward | RegexMode::Search> vm{R"(д)"};
|
||||||
kak_assert(vm.exec("д", RegexExecFlags::Search));
|
kak_assert(vm.exec("д", RegexExecFlags::None));
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|
|
@ -17,12 +17,6 @@ struct regex_error : runtime_error
|
||||||
using runtime_error::runtime_error;
|
using runtime_error::runtime_error;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class MatchDirection
|
|
||||||
{
|
|
||||||
Forward,
|
|
||||||
Backward
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class CharacterType : unsigned char
|
enum class CharacterType : unsigned char
|
||||||
{
|
{
|
||||||
None = 0,
|
None = 0,
|
||||||
|
@ -148,25 +142,43 @@ CompiledRegex compile_regex(StringView re, RegexCompileFlags flags);
|
||||||
enum class RegexExecFlags
|
enum class RegexExecFlags
|
||||||
{
|
{
|
||||||
None = 0,
|
None = 0,
|
||||||
Search = 1 << 0,
|
|
||||||
NotBeginOfLine = 1 << 1,
|
NotBeginOfLine = 1 << 1,
|
||||||
NotEndOfLine = 1 << 2,
|
NotEndOfLine = 1 << 2,
|
||||||
NotBeginOfWord = 1 << 3,
|
NotBeginOfWord = 1 << 3,
|
||||||
NotEndOfWord = 1 << 4,
|
NotEndOfWord = 1 << 4,
|
||||||
NotInitialNull = 1 << 5,
|
NotInitialNull = 1 << 5,
|
||||||
AnyMatch = 1 << 6,
|
|
||||||
NoSaves = 1 << 7,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
|
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
|
||||||
|
|
||||||
|
enum class RegexMode
|
||||||
|
{
|
||||||
|
Forward = 1 << 0,
|
||||||
|
Backward = 1 << 1,
|
||||||
|
Search = 1 << 2,
|
||||||
|
AnyMatch = 1 << 3,
|
||||||
|
NoSaves = 1 << 4,
|
||||||
|
};
|
||||||
|
constexpr bool with_bit_ops(Meta::Type<RegexMode>) { return true; }
|
||||||
|
constexpr bool has_direction(RegexMode mode)
|
||||||
|
{
|
||||||
|
return (bool)(mode & RegexMode::Forward) xor
|
||||||
|
(bool)(mode & RegexMode::Backward);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr bool is_direction(RegexMode mode)
|
||||||
|
{
|
||||||
|
return has_direction(mode) and
|
||||||
|
(mode & ~(RegexMode::Forward | RegexMode::Backward)) == RegexMode{0};
|
||||||
|
}
|
||||||
|
|
||||||
template<typename It, typename=void>
|
template<typename It, typename=void>
|
||||||
struct SentinelType { using Type = It; };
|
struct SentinelType { using Type = It; };
|
||||||
|
|
||||||
template<typename It>
|
template<typename It>
|
||||||
struct SentinelType<It, void_t<typename It::Sentinel>> { using Type = typename It::Sentinel; };
|
struct SentinelType<It, void_t<typename It::Sentinel>> { using Type = typename It::Sentinel; };
|
||||||
|
|
||||||
template<typename Iterator, MatchDirection direction>
|
template<typename Iterator, RegexMode mode>
|
||||||
class ThreadedRegexVM
|
class ThreadedRegexVM
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -174,7 +186,7 @@ public:
|
||||||
: m_program{program}
|
: m_program{program}
|
||||||
{
|
{
|
||||||
kak_assert((forward and program.first_backward_inst != 0) or
|
kak_assert((forward and program.first_backward_inst != 0) or
|
||||||
(direction == MatchDirection::Backward and program.first_backward_inst != -1));
|
(not forward and program.first_backward_inst != -1));
|
||||||
}
|
}
|
||||||
|
|
||||||
ThreadedRegexVM(const ThreadedRegexVM&) = delete;
|
ThreadedRegexVM(const ThreadedRegexVM&) = delete;
|
||||||
|
@ -198,7 +210,7 @@ public:
|
||||||
if (flags & RegexExecFlags::NotInitialNull and begin == end)
|
if (flags & RegexExecFlags::NotInitialNull and begin == end)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const bool search = (flags & RegexExecFlags::Search);
|
constexpr bool search = (mode & RegexMode::Search);
|
||||||
|
|
||||||
ConstArrayView<CompiledRegex::Instruction> instructions{m_program.instructions};
|
ConstArrayView<CompiledRegex::Instruction> instructions{m_program.instructions};
|
||||||
if (forward)
|
if (forward)
|
||||||
|
@ -370,7 +382,7 @@ private:
|
||||||
}
|
}
|
||||||
case CompiledRegex::Save:
|
case CompiledRegex::Save:
|
||||||
{
|
{
|
||||||
if (config.flags & RegexExecFlags::NoSaves)
|
if (mode & RegexMode::NoSaves)
|
||||||
break;
|
break;
|
||||||
if (thread.saves < 0)
|
if (thread.saves < 0)
|
||||||
thread.saves = new_saves<false>(nullptr);
|
thread.saves = new_saves<false>(nullptr);
|
||||||
|
@ -418,25 +430,25 @@ private:
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::LookAhead:
|
case CompiledRegex::LookAhead:
|
||||||
case CompiledRegex::NegativeLookAhead:
|
case CompiledRegex::NegativeLookAhead:
|
||||||
if (lookaround<MatchDirection::Forward, false>(inst.param, pos, config) !=
|
if (lookaround<true, false>(inst.param, pos, config) !=
|
||||||
(inst.op == CompiledRegex::LookAhead))
|
(inst.op == CompiledRegex::LookAhead))
|
||||||
return failed();
|
return failed();
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::LookAhead_IgnoreCase:
|
case CompiledRegex::LookAhead_IgnoreCase:
|
||||||
case CompiledRegex::NegativeLookAhead_IgnoreCase:
|
case CompiledRegex::NegativeLookAhead_IgnoreCase:
|
||||||
if (lookaround<MatchDirection::Forward, true>(inst.param, pos, config) !=
|
if (lookaround<true, true>(inst.param, pos, config) !=
|
||||||
(inst.op == CompiledRegex::LookAhead_IgnoreCase))
|
(inst.op == CompiledRegex::LookAhead_IgnoreCase))
|
||||||
return failed();
|
return failed();
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::LookBehind:
|
case CompiledRegex::LookBehind:
|
||||||
case CompiledRegex::NegativeLookBehind:
|
case CompiledRegex::NegativeLookBehind:
|
||||||
if (lookaround<MatchDirection::Backward, false>(inst.param, pos, config) !=
|
if (lookaround<false, false>(inst.param, pos, config) !=
|
||||||
(inst.op == CompiledRegex::LookBehind))
|
(inst.op == CompiledRegex::LookBehind))
|
||||||
return failed();
|
return failed();
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::LookBehind_IgnoreCase:
|
case CompiledRegex::LookBehind_IgnoreCase:
|
||||||
case CompiledRegex::NegativeLookBehind_IgnoreCase:
|
case CompiledRegex::NegativeLookBehind_IgnoreCase:
|
||||||
if (lookaround<MatchDirection::Backward, true>(inst.param, pos, config) !=
|
if (lookaround<false, true>(inst.param, pos, config) !=
|
||||||
(inst.op == CompiledRegex::LookBehind_IgnoreCase))
|
(inst.op == CompiledRegex::LookBehind_IgnoreCase))
|
||||||
return failed();
|
return failed();
|
||||||
break;
|
break;
|
||||||
|
@ -449,7 +461,7 @@ private:
|
||||||
m_find_next_start = true;
|
m_find_next_start = true;
|
||||||
return;
|
return;
|
||||||
case CompiledRegex::Match:
|
case CompiledRegex::Match:
|
||||||
if ((pos != config.end and not (config.flags & RegexExecFlags::Search)) or
|
if ((pos != config.end and not (mode & RegexMode::Search)) or
|
||||||
(config.flags & RegexExecFlags::NotInitialNull and pos == config.begin))
|
(config.flags & RegexExecFlags::NotInitialNull and pos == config.begin))
|
||||||
return failed();
|
return failed();
|
||||||
|
|
||||||
|
@ -476,7 +488,7 @@ private:
|
||||||
|
|
||||||
const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc;
|
const auto& start_desc = forward ? m_program.forward_start_desc : m_program.backward_start_desc;
|
||||||
|
|
||||||
const bool any_match = config.flags & RegexExecFlags::AnyMatch;
|
constexpr bool any_match = mode & RegexMode::AnyMatch;
|
||||||
uint16_t current_step = -1;
|
uint16_t current_step = -1;
|
||||||
m_found_match = false;
|
m_found_match = false;
|
||||||
while (true) // Iterate on all codepoints and once at the end
|
while (true) // Iterate on all codepoints and once at the end
|
||||||
|
@ -527,12 +539,12 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<MatchDirection look_direction, bool ignore_case>
|
template<bool look_forward, bool ignore_case>
|
||||||
bool lookaround(uint32_t index, Iterator pos, const ExecConfig& config) const
|
bool lookaround(uint32_t index, Iterator pos, const ExecConfig& config) const
|
||||||
{
|
{
|
||||||
using Lookaround = CompiledRegex::Lookaround;
|
using Lookaround = CompiledRegex::Lookaround;
|
||||||
|
|
||||||
if (look_direction == MatchDirection::Backward)
|
if (not look_forward)
|
||||||
{
|
{
|
||||||
if (pos == config.subject_begin)
|
if (pos == config.subject_begin)
|
||||||
return m_program.lookarounds[index] == Lookaround::EndOfLookaround;
|
return m_program.lookarounds[index] == Lookaround::EndOfLookaround;
|
||||||
|
@ -541,7 +553,7 @@ private:
|
||||||
|
|
||||||
for (auto it = m_program.lookarounds.begin() + index; *it != Lookaround::EndOfLookaround; ++it)
|
for (auto it = m_program.lookarounds.begin() + index; *it != Lookaround::EndOfLookaround; ++it)
|
||||||
{
|
{
|
||||||
if (look_direction == MatchDirection::Forward and pos == config.subject_end)
|
if (look_forward and pos == config.subject_end)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
Codepoint cp = utf8::codepoint(pos, config.subject_end);
|
Codepoint cp = utf8::codepoint(pos, config.subject_end);
|
||||||
|
@ -571,10 +583,10 @@ private:
|
||||||
else if (static_cast<Codepoint>(op) != cp)
|
else if (static_cast<Codepoint>(op) != cp)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (look_direction == MatchDirection::Backward and pos == config.subject_begin)
|
if (not look_forward and pos == config.subject_begin)
|
||||||
return *++it == Lookaround::EndOfLookaround;
|
return *++it == Lookaround::EndOfLookaround;
|
||||||
|
|
||||||
(look_direction == MatchDirection::Forward) ? utf8::to_next(pos, config.subject_end)
|
look_forward ? utf8::to_next(pos, config.subject_end)
|
||||||
: utf8::to_previous(pos, config.subject_begin);
|
: utf8::to_previous(pos, config.subject_begin);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -671,7 +683,8 @@ private:
|
||||||
int32_t m_next = 0;
|
int32_t m_next = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr bool forward = direction == MatchDirection::Forward;
|
static_assert(has_direction(mode));
|
||||||
|
static constexpr bool forward = mode & RegexMode::Forward;
|
||||||
|
|
||||||
DualThreadStack m_threads;
|
DualThreadStack m_threads;
|
||||||
Vector<Saves*, MemoryDomain::Regex> m_saves;
|
Vector<Saves*, MemoryDomain::Regex> m_saves;
|
||||||
|
|
|
@ -304,7 +304,7 @@ find_opening(Iterator pos, const Container& container,
|
||||||
res[0].second == pos)
|
res[0].second == pos)
|
||||||
pos = res[0].first;
|
pos = res[0].first;
|
||||||
|
|
||||||
using RegexIt = RegexIterator<Iterator, MatchDirection::Backward>;
|
using RegexIt = RegexIterator<Iterator, RegexMode::Backward>;
|
||||||
for (auto&& match : RegexIt{container.begin(), pos, container.begin(), container.end(), opening})
|
for (auto&& match : RegexIt{container.begin(), pos, container.begin(), container.end(), opening})
|
||||||
{
|
{
|
||||||
if (nestable)
|
if (nestable)
|
||||||
|
@ -332,7 +332,7 @@ find_closing(Iterator pos, const Container& container,
|
||||||
res, opening) and res[0].first == pos)
|
res, opening) and res[0].first == pos)
|
||||||
pos = res[0].second;
|
pos = res[0].second;
|
||||||
|
|
||||||
using RegexIt = RegexIterator<Iterator, MatchDirection::Forward>;
|
using RegexIt = RegexIterator<Iterator, RegexMode::Forward>;
|
||||||
for (auto match : RegexIt{pos, container.end(), container.begin(), container.end(), closing})
|
for (auto match : RegexIt{pos, container.end(), container.begin(), container.end(), closing})
|
||||||
{
|
{
|
||||||
if (nestable)
|
if (nestable)
|
||||||
|
@ -895,14 +895,16 @@ static bool find_prev(const Buffer& buffer, const BufferIterator& pos,
|
||||||
RegexExecFlags::NotInitialNull);
|
RegexExecFlags::NotInitialNull);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<MatchDirection direction>
|
template<RegexMode mode>
|
||||||
Selection find_next_match(const Context& context, const Selection& sel, const Regex& regex, bool& wrapped)
|
Selection find_next_match(const Context& context, const Selection& sel, const Regex& regex, bool& wrapped)
|
||||||
{
|
{
|
||||||
|
static_assert(is_direction(mode));
|
||||||
|
constexpr bool forward = mode & RegexMode::Forward;
|
||||||
auto& buffer = context.buffer();
|
auto& buffer = context.buffer();
|
||||||
MatchResults<BufferIterator> matches;
|
MatchResults<BufferIterator> matches;
|
||||||
auto pos = buffer.iterator_at(direction == MatchDirection::Backward ? sel.min() : sel.max());
|
auto pos = buffer.iterator_at(forward ? sel.max() : sel.min());
|
||||||
wrapped = false;
|
wrapped = false;
|
||||||
const bool found = (direction == MatchDirection::Forward) ?
|
const bool found = forward ?
|
||||||
find_next(buffer, utf8::next(pos, buffer.end()), matches, regex, wrapped)
|
find_next(buffer, utf8::next(pos, buffer.end()), matches, regex, wrapped)
|
||||||
: find_prev(buffer, pos, matches, regex, wrapped);
|
: find_prev(buffer, pos, matches, regex, wrapped);
|
||||||
|
|
||||||
|
@ -915,13 +917,13 @@ Selection find_next_match(const Context& context, const Selection& sel, const Re
|
||||||
|
|
||||||
auto begin = matches[0].first, end = matches[0].second;
|
auto begin = matches[0].first, end = matches[0].second;
|
||||||
end = (begin == end) ? end : utf8::previous(end, begin);
|
end = (begin == end) ? end : utf8::previous(end, begin);
|
||||||
if (direction == MatchDirection::Backward)
|
if (not forward)
|
||||||
std::swap(begin, end);
|
std::swap(begin, end);
|
||||||
|
|
||||||
return {begin.coord(), end.coord(), std::move(captures)};
|
return {begin.coord(), end.coord(), std::move(captures)};
|
||||||
}
|
}
|
||||||
template Selection find_next_match<MatchDirection::Forward>(const Context&, const Selection&, const Regex&, bool&);
|
template Selection find_next_match<RegexMode::Forward>(const Context&, const Selection&, const Regex&, bool&);
|
||||||
template Selection find_next_match<MatchDirection::Backward>(const Context&, const Selection&, const Regex&, bool&);
|
template Selection find_next_match<RegexMode::Backward>(const Context&, const Selection&, const Regex&, bool&);
|
||||||
|
|
||||||
void select_all_matches(SelectionList& selections, const Regex& regex, int capture)
|
void select_all_matches(SelectionList& selections, const Regex& regex, int capture)
|
||||||
{
|
{
|
||||||
|
@ -931,7 +933,7 @@ void select_all_matches(SelectionList& selections, const Regex& regex, int captu
|
||||||
|
|
||||||
Vector<Selection> result;
|
Vector<Selection> result;
|
||||||
auto& buffer = selections.buffer();
|
auto& buffer = selections.buffer();
|
||||||
ThreadedRegexVM<BufferIterator, MatchDirection::Forward> vm{*regex.impl()};
|
ThreadedRegexVM<BufferIterator, RegexMode::Forward | RegexMode::Search> vm{*regex.impl()};
|
||||||
for (auto& sel : selections)
|
for (auto& sel : selections)
|
||||||
{
|
{
|
||||||
auto sel_beg = buffer.iterator_at(sel.min());
|
auto sel_beg = buffer.iterator_at(sel.min());
|
||||||
|
@ -973,7 +975,7 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture
|
||||||
auto& buffer = selections.buffer();
|
auto& buffer = selections.buffer();
|
||||||
auto buf_end = buffer.end();
|
auto buf_end = buffer.end();
|
||||||
auto buf_begin = buffer.begin();
|
auto buf_begin = buffer.begin();
|
||||||
ThreadedRegexVM<BufferIterator, MatchDirection::Forward> vm{*regex.impl()};
|
ThreadedRegexVM<BufferIterator, RegexMode::Forward | RegexMode::Search> vm{*regex.impl()};
|
||||||
for (auto& sel : selections)
|
for (auto& sel : selections)
|
||||||
{
|
{
|
||||||
auto begin = buffer.iterator_at(sel.min());
|
auto begin = buffer.iterator_at(sel.min());
|
||||||
|
|
|
@ -99,9 +99,9 @@ trim_partial_lines(const Context& context, const Selection& selection);
|
||||||
|
|
||||||
void select_buffer(SelectionList& selections);
|
void select_buffer(SelectionList& selections);
|
||||||
|
|
||||||
enum class MatchDirection;
|
enum class RegexMode;
|
||||||
|
|
||||||
template<MatchDirection direction>
|
template<RegexMode mode>
|
||||||
Selection find_next_match(const Context& context, const Selection& sel,
|
Selection find_next_match(const Context& context, const Selection& sel,
|
||||||
const Regex& regex, bool& wrapped);
|
const Regex& regex, bool& wrapped);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user