Regex: switch to custom impl, use boost for checking
This commit is contained in:
parent
9305fa1369
commit
065bbc8f59
|
@ -789,7 +789,7 @@ const CommandDesc add_hook_cmd = {
|
|||
if (not contains(hooks, parser[1]))
|
||||
throw runtime_error{format("Unknown hook '{}'", parser[1])};
|
||||
|
||||
Regex regex{parser[2], Regex::optimize | Regex::ECMAScript};
|
||||
Regex regex{parser[2], RegexCompileFlags::Optimize};
|
||||
const String& command = parser[3];
|
||||
auto group = parser.get_switch("group").value_or(StringView{});
|
||||
get_scope(parser[0], context).hooks().add_hook(parser[1], group.str(), std::move(regex), command);
|
||||
|
|
|
@ -323,7 +323,7 @@ public:
|
|||
|
||||
String id = format("hlregex'{}'", params[0]);
|
||||
|
||||
Regex ex{params[0], Regex::optimize};
|
||||
Regex ex{params[0], RegexCompileFlags::Optimize};
|
||||
|
||||
return {id, std::make_unique<RegexHighlighter>(std::move(ex),
|
||||
std::move(faces))};
|
||||
|
@ -1823,8 +1823,8 @@ public:
|
|||
if (parser[i].empty() or parser[i+1].empty() or parser[i+2].empty())
|
||||
throw runtime_error("group id, begin and end must not be empty");
|
||||
|
||||
const Regex::flag_type flags = match_capture ?
|
||||
Regex::optimize : Regex::nosubs | Regex::optimize;
|
||||
const RegexCompileFlags flags = match_capture ?
|
||||
RegexCompileFlags::Optimize : RegexCompileFlags::NoSubs | RegexCompileFlags::Optimize;
|
||||
|
||||
regions.push_back({ parser[i],
|
||||
Regex{parser[i+1], flags}, Regex{parser[i+2], flags},
|
||||
|
|
|
@ -981,7 +981,7 @@ void keep(Context& context, NormalParams)
|
|||
const auto flags = match_flags(is_bol(begin.coord()), false,
|
||||
is_bow(buffer, begin.coord()),
|
||||
is_eow(buffer, end.coord())) |
|
||||
RegexConstant::match_any;
|
||||
RegexExecFlags::AnyMatch;
|
||||
if (regex_search(begin, end, ex, flags) == matching)
|
||||
keep.push_back(sel);
|
||||
}
|
||||
|
|
49
src/regex.cc
49
src/regex.cc
|
@ -1,6 +1,5 @@
|
|||
#include "regex.hh"
|
||||
|
||||
#include "exception.hh"
|
||||
#include "buffer_utils.hh"
|
||||
|
||||
namespace Kakoune
|
||||
|
@ -8,17 +7,45 @@ namespace Kakoune
|
|||
|
||||
using Utf8It = RegexUtf8It<const char*>;
|
||||
|
||||
Regex::Regex(StringView re, flag_type flags) try
|
||||
: RegexBase{Utf8It{re.begin(), re}, Utf8It{re.end(), re}, flags}, m_str{re.str()}
|
||||
boost::regbase::flag_type convert_flags(RegexCompileFlags flags)
|
||||
{
|
||||
boost::regbase::flag_type res = boost::regbase::ECMAScript;
|
||||
if (flags & RegexCompileFlags::NoSubs)
|
||||
res |= boost::regbase::nosubs;
|
||||
if (flags & RegexCompileFlags::Optimize)
|
||||
res |= boost::regbase::optimize;
|
||||
return res;
|
||||
}
|
||||
|
||||
boost::regex_constants::match_flag_type convert_flags(RegexExecFlags flags)
|
||||
{
|
||||
boost::regex_constants::match_flag_type res = boost::regex_constants::match_default;
|
||||
|
||||
if (flags & RegexExecFlags::NotBeginOfLine)
|
||||
res |= boost::regex_constants::match_not_bol;
|
||||
if (flags & RegexExecFlags::NotEndOfLine)
|
||||
res |= boost::regex_constants::match_not_eol;
|
||||
if (flags & RegexExecFlags::NotBeginOfWord)
|
||||
res |= boost::regex_constants::match_not_bow;
|
||||
if (flags & RegexExecFlags::NotEndOfWord)
|
||||
res |= boost::regex_constants::match_not_eow;
|
||||
if (flags & RegexExecFlags::NotBeginOfSubject)
|
||||
res |= boost::regex_constants::match_not_bob;
|
||||
if (flags & RegexExecFlags::NotInitialNull)
|
||||
res |= boost::regex_constants::match_not_initial_null;
|
||||
if (flags & RegexExecFlags::AnyMatch)
|
||||
res |= boost::regex_constants::match_any;
|
||||
if (flags & RegexExecFlags::PrevAvailable)
|
||||
res |= boost::regex_constants::match_prev_avail;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Regex::Regex(StringView re, RegexCompileFlags flags) try
|
||||
: m_impl{new CompiledRegex{compile_regex(re, flags)}},
|
||||
m_str{re.str()},
|
||||
m_boost_impl{Utf8It{re.begin(), re}, Utf8It{re.end(), re}, convert_flags(flags)}
|
||||
{
|
||||
try
|
||||
{
|
||||
m_impl = new CompiledRegex{compile_regex(re)};
|
||||
}
|
||||
catch (runtime_error& err)
|
||||
{
|
||||
write_to_debug_buffer(err.what());
|
||||
}
|
||||
} catch (std::runtime_error& err) { throw regex_error(err.what()); }
|
||||
|
||||
String option_to_string(const Regex& re)
|
||||
|
|
195
src/regex.hh
195
src/regex.hh
|
@ -12,89 +12,116 @@
|
|||
namespace Kakoune
|
||||
{
|
||||
|
||||
struct regex_error : runtime_error
|
||||
{
|
||||
regex_error(StringView desc)
|
||||
: runtime_error{format("regex error: '{}'", desc)}
|
||||
{}
|
||||
};
|
||||
|
||||
using RegexBase = boost::basic_regex<wchar_t, boost::c_regex_traits<wchar_t>>;
|
||||
|
||||
// Regex that keeps track of its string representation
|
||||
class Regex : public RegexBase
|
||||
class Regex
|
||||
{
|
||||
public:
|
||||
Regex() = default;
|
||||
|
||||
explicit Regex(StringView re, flag_type flags = ECMAScript);
|
||||
explicit Regex(StringView re, RegexCompileFlags flags = RegexCompileFlags::None);
|
||||
bool empty() const { return m_str.empty(); }
|
||||
bool operator==(const Regex& other) const { return m_str == other.m_str; }
|
||||
bool operator!=(const Regex& other) const { return m_str != other.m_str; }
|
||||
|
||||
const String& str() const { return m_str; }
|
||||
|
||||
size_t mark_count() const { return m_impl->save_count / 2 - 1; }
|
||||
|
||||
static constexpr const char* option_type_name = "regex";
|
||||
|
||||
const CompiledRegex* impl() const { return m_impl.get(); }
|
||||
|
||||
using BoostImpl = boost::basic_regex<wchar_t, boost::c_regex_traits<wchar_t>>;
|
||||
const BoostImpl& boost_impl() const { return m_boost_impl; }
|
||||
|
||||
private:
|
||||
String m_str;
|
||||
RefPtr<CompiledRegex> m_impl;
|
||||
String m_str;
|
||||
BoostImpl m_boost_impl;
|
||||
};
|
||||
|
||||
template<typename It>
|
||||
using RegexUtf8It = utf8::iterator<It, wchar_t, ssize_t>;
|
||||
|
||||
template<typename It>
|
||||
using RegexIteratorBase = boost::regex_iterator<RegexUtf8It<It>, wchar_t,
|
||||
boost::c_regex_traits<wchar_t>>;
|
||||
|
||||
namespace RegexConstant = boost::regex_constants;
|
||||
|
||||
template<typename Iterator>
|
||||
struct MatchResults : boost::match_results<RegexUtf8It<Iterator>>
|
||||
struct MatchResults
|
||||
{
|
||||
using ParentType = boost::match_results<RegexUtf8It<Iterator>>;
|
||||
struct SubMatch : std::pair<Iterator, Iterator>
|
||||
{
|
||||
SubMatch() = default;
|
||||
SubMatch(const boost::sub_match<RegexUtf8It<Iterator>>& m)
|
||||
: std::pair<Iterator, Iterator>{m.first.base(), m.second.base()},
|
||||
matched{m.matched}
|
||||
SubMatch(Iterator begin, Iterator end)
|
||||
: std::pair<Iterator, Iterator>{begin, end}, matched{begin != Iterator{}}
|
||||
{}
|
||||
|
||||
bool matched = false;
|
||||
};
|
||||
|
||||
struct iterator : boost::match_results<RegexUtf8It<Iterator>>::iterator
|
||||
struct iterator : std::iterator<std::bidirectional_iterator_tag, SubMatch, size_t, SubMatch*, SubMatch>
|
||||
{
|
||||
using ParentType = typename boost::match_results<RegexUtf8It<Iterator>>::iterator;
|
||||
iterator(const ParentType& it) : ParentType(it) {}
|
||||
using It = typename Vector<Iterator>::const_iterator;
|
||||
|
||||
SubMatch operator*() const { return {ParentType::operator*()}; }
|
||||
iterator() = default;
|
||||
iterator(It it) : m_it{std::move(it)} {}
|
||||
|
||||
iterator& operator--() { m_it += 2; return *this; }
|
||||
iterator& operator++() { m_it += 2; return *this; }
|
||||
SubMatch operator*() const { return {*m_it, *(m_it+1)}; }
|
||||
|
||||
friend bool operator==(const iterator& lhs, const iterator& rhs) { return lhs.m_it == rhs.m_it; }
|
||||
friend bool operator!=(const iterator& lhs, const iterator& rhs) { return lhs.m_it != rhs.m_it; }
|
||||
private:
|
||||
|
||||
It m_it;
|
||||
};
|
||||
|
||||
iterator begin() const { return {ParentType::begin()}; }
|
||||
iterator cbegin() const { return {ParentType::cbegin()}; }
|
||||
iterator end() const { return {ParentType::end()}; }
|
||||
iterator cend() const { return {ParentType::cend()}; }
|
||||
MatchResults() = default;
|
||||
MatchResults(Vector<Iterator> values) : m_values{std::move(values)} {}
|
||||
|
||||
SubMatch operator[](size_t s) const { return {ParentType::operator[](s)}; }
|
||||
iterator begin() const { return iterator{m_values.begin()}; }
|
||||
iterator cbegin() const { return iterator{m_values.cbegin()}; }
|
||||
iterator end() const { return iterator{m_values.end()}; }
|
||||
iterator cend() const { return iterator{m_values.cend()}; }
|
||||
|
||||
size_t size() const { return m_values.size() / 2; }
|
||||
bool empty() const { return m_values.empty(); }
|
||||
|
||||
SubMatch operator[](size_t i) const
|
||||
{
|
||||
return i * 2 < m_values.size() ?
|
||||
SubMatch{m_values[i*2], m_values[i*2+1]} : SubMatch{};
|
||||
}
|
||||
|
||||
friend bool operator==(const MatchResults& lhs, const MatchResults& rhs)
|
||||
{
|
||||
return lhs.m_values == rhs.m_values;
|
||||
}
|
||||
|
||||
friend bool operator!=(const MatchResults& lhs, const MatchResults& rhs)
|
||||
{
|
||||
return not (lhs == rhs);
|
||||
}
|
||||
|
||||
void swap(MatchResults& other)
|
||||
{
|
||||
m_values.swap(other.m_values);
|
||||
}
|
||||
|
||||
private:
|
||||
Vector<Iterator> m_values;
|
||||
};
|
||||
|
||||
inline RegexConstant::match_flag_type match_flags(bool bol, bool eol, bool bow, bool eow)
|
||||
inline RegexExecFlags match_flags(bool bol, bool eol, bool bow, bool eow)
|
||||
{
|
||||
return (bol ? RegexConstant::match_default : RegexConstant::match_not_bol) |
|
||||
(eol ? RegexConstant::match_default : RegexConstant::match_not_eol) |
|
||||
(bow ? RegexConstant::match_default : RegexConstant::match_not_bow) |
|
||||
(eow ? RegexConstant::match_default : RegexConstant::match_not_eow);
|
||||
return (bol ? RegexExecFlags::None : RegexExecFlags::NotBeginOfLine) |
|
||||
(eol ? RegexExecFlags::None : RegexExecFlags::NotEndOfLine) |
|
||||
(bow ? RegexExecFlags::None : RegexExecFlags::NotBeginOfWord) |
|
||||
(eow ? RegexExecFlags::None : RegexExecFlags::NotEndOfWord);
|
||||
}
|
||||
|
||||
void regex_mismatch(const Regex& re);
|
||||
|
||||
template<typename It>
|
||||
void check_captures(const Regex& re, const MatchResults<It>& res, const Vector<It>& captures)
|
||||
using RegexUtf8It = utf8::iterator<It, wchar_t, ssize_t>;
|
||||
|
||||
template<typename It>
|
||||
void check_captures(const Regex& re, const boost::match_results<RegexUtf8It<It>>& res, const Vector<It>& captures)
|
||||
{
|
||||
if (res.size() > captures.size() * 2)
|
||||
return regex_mismatch(re);
|
||||
|
@ -115,37 +142,18 @@ void check_captures(const Regex& re, const MatchResults<It>& res, const Vector<I
|
|||
}
|
||||
}
|
||||
|
||||
inline RegexExecFlags convert_flags(RegexConstant::match_flag_type flags)
|
||||
{
|
||||
auto res = RegexExecFlags::None;
|
||||
|
||||
if (flags & RegexConstant::match_not_bol)
|
||||
res |= RegexExecFlags::NotBeginOfLine;
|
||||
if (flags & RegexConstant::match_not_eol)
|
||||
res |= RegexExecFlags::NotEndOfLine;
|
||||
if (flags & RegexConstant::match_not_bow)
|
||||
res |= RegexExecFlags::NotBeginOfWord;
|
||||
if (flags & RegexConstant::match_not_eow)
|
||||
res |= RegexExecFlags::NotEndOfWord;
|
||||
if (flags & RegexConstant::match_not_bob)
|
||||
res |= RegexExecFlags::NotBeginOfSubject;
|
||||
if (flags & RegexConstant::match_not_initial_null)
|
||||
res |= RegexExecFlags::NotInitialNull;
|
||||
if (flags & RegexConstant::match_any)
|
||||
res |= RegexExecFlags::AnyMatch;
|
||||
if (flags & RegexConstant::match_prev_avail)
|
||||
res |= RegexExecFlags::PrevAvailable;
|
||||
|
||||
return res;
|
||||
}
|
||||
boost::regbase::flag_type convert_flags(RegexCompileFlags flags);
|
||||
boost::regex_constants::match_flag_type convert_flags(RegexExecFlags flags);
|
||||
|
||||
template<typename It>
|
||||
bool regex_match(It begin, It end, const Regex& re)
|
||||
{
|
||||
try
|
||||
{
|
||||
bool matched = boost::regex_match<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, re);
|
||||
if (re.impl() and matched != regex_match(begin, end, *re.impl()))
|
||||
const bool matched = regex_match(begin, end, *re.impl());
|
||||
if (not re.boost_impl().empty() and
|
||||
matched != boost::regex_match<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end},
|
||||
re.boost_impl()))
|
||||
regex_mismatch(re);
|
||||
return matched;
|
||||
}
|
||||
|
@ -160,12 +168,18 @@ bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
|
|||
{
|
||||
try
|
||||
{
|
||||
bool matched = boost::regex_match<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, res, re);
|
||||
Vector<It> captures;
|
||||
if (re.impl() and matched != regex_match(begin, end, captures, *re.impl()))
|
||||
const bool matched = regex_match(begin, end, captures, *re.impl());
|
||||
|
||||
boost::match_results<RegexUtf8It<It>> boost_res;
|
||||
if (not re.boost_impl().empty() and
|
||||
matched != boost::regex_match<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end},
|
||||
boost_res, re.boost_impl()))
|
||||
regex_mismatch(re);
|
||||
if (re.impl() and matched)
|
||||
check_captures(re, res, captures);
|
||||
if (not re.boost_impl().empty() and matched)
|
||||
check_captures(re, boost_res, captures);
|
||||
|
||||
res = matched ? MatchResults<It>{std::move(captures)} : MatchResults<It>{};
|
||||
return matched;
|
||||
}
|
||||
catch (std::runtime_error& err)
|
||||
|
@ -176,13 +190,16 @@ bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
|
|||
|
||||
template<typename It>
|
||||
bool regex_search(It begin, It end, const Regex& re,
|
||||
RegexConstant::match_flag_type flags = RegexConstant::match_default)
|
||||
RegexExecFlags flags = RegexExecFlags::None)
|
||||
{
|
||||
try
|
||||
{
|
||||
auto first = (flags & RegexConstant::match_prev_avail) ? begin-1 : begin;
|
||||
bool matched = boost::regex_search<RegexUtf8It<It>>({begin, first, end}, {end, first, end}, re, flags);
|
||||
if (re.impl() and matched != regex_search(begin, end, *re.impl(), convert_flags(flags)))
|
||||
const bool matched = regex_search(begin, end, *re.impl(), flags);
|
||||
|
||||
auto first = (flags & RegexExecFlags::PrevAvailable) ? begin-1 : begin;
|
||||
if (not re.boost_impl().empty() and
|
||||
matched != boost::regex_search<RegexUtf8It<It>>({begin, first, end}, {end, first, end},
|
||||
re.boost_impl(), convert_flags(flags)))
|
||||
regex_mismatch(re);
|
||||
return matched;
|
||||
}
|
||||
|
@ -194,17 +211,23 @@ bool regex_search(It begin, It end, const Regex& re,
|
|||
|
||||
template<typename It>
|
||||
bool regex_search(It begin, It end, MatchResults<It>& res, const Regex& re,
|
||||
RegexConstant::match_flag_type flags = RegexConstant::match_default)
|
||||
RegexExecFlags flags = RegexExecFlags::None)
|
||||
{
|
||||
try
|
||||
{
|
||||
auto first = (flags & RegexConstant::match_prev_avail) ? begin-1 : begin;
|
||||
bool matched = boost::regex_search<RegexUtf8It<It>>({begin, first, end}, {end, first, end}, res, re, flags);
|
||||
Vector<It> captures;
|
||||
if (re.impl() and matched != regex_search(begin, end, captures, *re.impl(), convert_flags(flags)))
|
||||
const bool matched = regex_search(begin, end, captures, *re.impl(), flags);
|
||||
|
||||
auto first = (flags & RegexExecFlags::PrevAvailable) ? begin-1 : begin;
|
||||
boost::match_results<RegexUtf8It<It>> boost_res;
|
||||
if (not re.boost_impl().empty() and
|
||||
matched != boost::regex_search<RegexUtf8It<It>>({begin, first, end}, {end, first, end},
|
||||
boost_res, re.boost_impl(), convert_flags(flags)))
|
||||
regex_mismatch(re);
|
||||
if (re.impl() and matched)
|
||||
check_captures(re, res, captures);
|
||||
if (not re.boost_impl().empty() and matched)
|
||||
check_captures(re, boost_res, captures);
|
||||
|
||||
res = matched ? MatchResults<It>{std::move(captures)} : MatchResults<It>{};
|
||||
return matched;
|
||||
}
|
||||
catch (std::runtime_error& err)
|
||||
|
@ -219,12 +242,11 @@ void option_from_string(StringView str, Regex& re);
|
|||
template<typename Iterator>
|
||||
struct RegexIterator
|
||||
{
|
||||
using Utf8It = RegexUtf8It<Iterator>;
|
||||
using ValueType = MatchResults<Iterator>;
|
||||
|
||||
RegexIterator() = default;
|
||||
RegexIterator(Iterator begin, Iterator end, const Regex& re,
|
||||
RegexConstant::match_flag_type flags = RegexConstant::match_default)
|
||||
RegexExecFlags flags = RegexExecFlags::None)
|
||||
: m_regex{&re}, m_next_begin{begin}, m_begin{begin}, m_end{end}, m_flags{flags}
|
||||
{
|
||||
next();
|
||||
|
@ -261,11 +283,11 @@ private:
|
|||
{
|
||||
kak_assert(m_regex);
|
||||
|
||||
RegexConstant::match_flag_type additional_flags{};
|
||||
RegexExecFlags additional_flags{};
|
||||
if (m_results.size() and m_results[0].first == m_results[0].second)
|
||||
additional_flags |= RegexConstant::match_not_initial_null;
|
||||
additional_flags |= RegexExecFlags::NotInitialNull;
|
||||
if (m_begin != m_next_begin)
|
||||
additional_flags |= RegexConstant::match_not_bob | RegexConstant::match_prev_avail;
|
||||
additional_flags |= RegexExecFlags::NotBeginOfSubject | RegexExecFlags::PrevAvailable;
|
||||
|
||||
if (not regex_search(m_next_begin, m_end, m_results, *m_regex,
|
||||
m_flags | additional_flags))
|
||||
|
@ -279,10 +301,9 @@ private:
|
|||
Iterator m_next_begin{};
|
||||
const Iterator m_begin{};
|
||||
const Iterator m_end{};
|
||||
const RegexConstant::match_flag_type m_flags = RegexConstant::match_default;
|
||||
const RegexExecFlags m_flags = RegexExecFlags::None;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // regex_hh_INCLUDED
|
||||
|
|
|
@ -96,7 +96,7 @@ struct RegexParser
|
|||
private:
|
||||
struct InvalidPolicy
|
||||
{
|
||||
Codepoint operator()(Codepoint cp) { throw runtime_error{"Invalid utf8 in regex"}; }
|
||||
Codepoint operator()(Codepoint cp) { throw regex_error{"Invalid utf8 in regex"}; }
|
||||
};
|
||||
|
||||
using Iterator = utf8::iterator<const char*, Codepoint, int, InvalidPolicy>;
|
||||
|
@ -466,9 +466,9 @@ private:
|
|||
[[gnu::noreturn]]
|
||||
void parse_error(StringView error)
|
||||
{
|
||||
throw runtime_error(format("regex parse error: {} at '{}<<<HERE>>>{}'", error,
|
||||
StringView{m_regex.begin(), m_pos.base()},
|
||||
StringView{m_pos.base(), m_regex.end()}));
|
||||
throw regex_error(format("regex parse error: {} at '{}<<<HERE>>>{}'", error,
|
||||
StringView{m_regex.begin(), m_pos.base()},
|
||||
StringView{m_pos.base(), m_regex.end()}));
|
||||
}
|
||||
|
||||
void validate_lookaround(const AstNodePtr& node)
|
||||
|
@ -515,8 +515,8 @@ const RegexParser::ControlEscape RegexParser::control_escapes[5] = {
|
|||
|
||||
struct RegexCompiler
|
||||
{
|
||||
RegexCompiler(const ParsedRegex& parsed_regex, MatchDirection direction)
|
||||
: m_parsed_regex{parsed_regex}, m_forward{direction == MatchDirection::Forward}
|
||||
RegexCompiler(const ParsedRegex& parsed_regex, RegexCompileFlags flags, MatchDirection direction)
|
||||
: m_parsed_regex{parsed_regex}, m_flags(flags), m_forward{direction == MatchDirection::Forward}
|
||||
{
|
||||
compile_node(m_parsed_regex.ast);
|
||||
push_inst(CompiledRegex::Match);
|
||||
|
@ -535,7 +535,7 @@ private:
|
|||
const auto start_pos = m_program.instructions.size();
|
||||
|
||||
const Codepoint capture = (node->op == ParsedRegex::Alternation or node->op == ParsedRegex::Sequence) ? node->value : -1;
|
||||
if (capture != -1)
|
||||
if (capture != -1 and (capture == 0 or not (m_flags & RegexCompileFlags::NoSubs)))
|
||||
push_inst(CompiledRegex::Save, capture * 2 + (m_forward ? 0 : 1));
|
||||
|
||||
Vector<uint32_t> goto_inner_end_offsets;
|
||||
|
@ -629,7 +629,7 @@ private:
|
|||
for (auto& offset : goto_inner_end_offsets)
|
||||
m_program.instructions[offset].param = m_program.instructions.size();
|
||||
|
||||
if (capture != -1)
|
||||
if (capture != -1 and (capture == 0 or not (m_flags & RegexCompileFlags::NoSubs)))
|
||||
push_inst(CompiledRegex::Save, capture * 2 + (m_forward ? 1 : 0));
|
||||
|
||||
return start_pos;
|
||||
|
@ -797,6 +797,7 @@ private:
|
|||
}
|
||||
|
||||
CompiledRegex m_program;
|
||||
RegexCompileFlags m_flags;
|
||||
const ParsedRegex& m_parsed_regex;
|
||||
const bool m_forward;
|
||||
};
|
||||
|
@ -878,9 +879,9 @@ void dump_regex(const CompiledRegex& program)
|
|||
}
|
||||
}
|
||||
|
||||
CompiledRegex compile_regex(StringView re, MatchDirection direction)
|
||||
CompiledRegex compile_regex(StringView re, RegexCompileFlags flags, MatchDirection direction)
|
||||
{
|
||||
return RegexCompiler{RegexParser::parse(re), direction}.get_compiled_regex();
|
||||
return RegexCompiler{RegexParser::parse(re), flags, direction}.get_compiled_regex();
|
||||
}
|
||||
|
||||
namespace
|
||||
|
@ -891,7 +892,7 @@ struct TestVM : CompiledRegex, ThreadedRegexVM<const char*, dir>
|
|||
using VMType = ThreadedRegexVM<const char*, dir>;
|
||||
|
||||
TestVM(StringView re, bool dump = false)
|
||||
: CompiledRegex{compile_regex(re, dir)},
|
||||
: CompiledRegex{compile_regex(re, RegexCompileFlags::None, dir)},
|
||||
VMType{(const CompiledRegex&)*this}
|
||||
{ if (dump) dump_regex(*this); }
|
||||
|
||||
|
|
|
@ -14,6 +14,11 @@
|
|||
namespace Kakoune
|
||||
{
|
||||
|
||||
struct regex_error : runtime_error
|
||||
{
|
||||
using runtime_error::runtime_error;
|
||||
};
|
||||
|
||||
enum class MatchDirection
|
||||
{
|
||||
Forward,
|
||||
|
@ -66,7 +71,15 @@ struct CompiledRegex : RefCountable
|
|||
std::unique_ptr<StartChars> start_chars;
|
||||
};
|
||||
|
||||
CompiledRegex compile_regex(StringView re, MatchDirection direction = MatchDirection::Forward);
|
||||
enum RegexCompileFlags
|
||||
{
|
||||
None = 0,
|
||||
NoSubs = 1 << 0,
|
||||
Optimize = 1 << 1
|
||||
};
|
||||
constexpr bool with_bit_ops(Meta::Type<RegexCompileFlags>) { return true; }
|
||||
|
||||
CompiledRegex compile_regex(StringView re, RegexCompileFlags flags, MatchDirection direction = MatchDirection::Forward);
|
||||
|
||||
enum class RegexExecFlags
|
||||
{
|
||||
|
@ -475,7 +488,7 @@ bool regex_search(It begin, It end, Vector<It>& captures, const CompiledRegex& r
|
|||
ThreadedRegexVM<It, direction> vm{re};
|
||||
if (vm.exec(begin, end, flags | RegexExecFlags::Search))
|
||||
{
|
||||
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
|
||||
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -837,7 +837,7 @@ void select_buffer(SelectionList& selections)
|
|||
selections = SelectionList{ buffer, target_eol({{0,0}, buffer.back_coord()}) };
|
||||
}
|
||||
|
||||
static RegexConstant::match_flag_type
|
||||
static RegexExecFlags
|
||||
match_flags(const Buffer& buf, const BufferIterator& begin, const BufferIterator& end)
|
||||
{
|
||||
return match_flags(is_bol(begin.coord()), is_eol(buf, end.coord()),
|
||||
|
|
Loading…
Reference in New Issue
Block a user