Regex: switch to custom impl, use boost for checking
This commit is contained in:
parent
9305fa1369
commit
065bbc8f59
|
@ -789,7 +789,7 @@ const CommandDesc add_hook_cmd = {
|
||||||
if (not contains(hooks, parser[1]))
|
if (not contains(hooks, parser[1]))
|
||||||
throw runtime_error{format("Unknown hook '{}'", parser[1])};
|
throw runtime_error{format("Unknown hook '{}'", parser[1])};
|
||||||
|
|
||||||
Regex regex{parser[2], Regex::optimize | Regex::ECMAScript};
|
Regex regex{parser[2], RegexCompileFlags::Optimize};
|
||||||
const String& command = parser[3];
|
const String& command = parser[3];
|
||||||
auto group = parser.get_switch("group").value_or(StringView{});
|
auto group = parser.get_switch("group").value_or(StringView{});
|
||||||
get_scope(parser[0], context).hooks().add_hook(parser[1], group.str(), std::move(regex), command);
|
get_scope(parser[0], context).hooks().add_hook(parser[1], group.str(), std::move(regex), command);
|
||||||
|
|
|
@ -323,7 +323,7 @@ public:
|
||||||
|
|
||||||
String id = format("hlregex'{}'", params[0]);
|
String id = format("hlregex'{}'", params[0]);
|
||||||
|
|
||||||
Regex ex{params[0], Regex::optimize};
|
Regex ex{params[0], RegexCompileFlags::Optimize};
|
||||||
|
|
||||||
return {id, std::make_unique<RegexHighlighter>(std::move(ex),
|
return {id, std::make_unique<RegexHighlighter>(std::move(ex),
|
||||||
std::move(faces))};
|
std::move(faces))};
|
||||||
|
@ -1823,8 +1823,8 @@ public:
|
||||||
if (parser[i].empty() or parser[i+1].empty() or parser[i+2].empty())
|
if (parser[i].empty() or parser[i+1].empty() or parser[i+2].empty())
|
||||||
throw runtime_error("group id, begin and end must not be empty");
|
throw runtime_error("group id, begin and end must not be empty");
|
||||||
|
|
||||||
const Regex::flag_type flags = match_capture ?
|
const RegexCompileFlags flags = match_capture ?
|
||||||
Regex::optimize : Regex::nosubs | Regex::optimize;
|
RegexCompileFlags::Optimize : RegexCompileFlags::NoSubs | RegexCompileFlags::Optimize;
|
||||||
|
|
||||||
regions.push_back({ parser[i],
|
regions.push_back({ parser[i],
|
||||||
Regex{parser[i+1], flags}, Regex{parser[i+2], flags},
|
Regex{parser[i+1], flags}, Regex{parser[i+2], flags},
|
||||||
|
|
|
@ -981,7 +981,7 @@ void keep(Context& context, NormalParams)
|
||||||
const auto flags = match_flags(is_bol(begin.coord()), false,
|
const auto flags = match_flags(is_bol(begin.coord()), false,
|
||||||
is_bow(buffer, begin.coord()),
|
is_bow(buffer, begin.coord()),
|
||||||
is_eow(buffer, end.coord())) |
|
is_eow(buffer, end.coord())) |
|
||||||
RegexConstant::match_any;
|
RegexExecFlags::AnyMatch;
|
||||||
if (regex_search(begin, end, ex, flags) == matching)
|
if (regex_search(begin, end, ex, flags) == matching)
|
||||||
keep.push_back(sel);
|
keep.push_back(sel);
|
||||||
}
|
}
|
||||||
|
|
49
src/regex.cc
49
src/regex.cc
|
@ -1,6 +1,5 @@
|
||||||
#include "regex.hh"
|
#include "regex.hh"
|
||||||
|
|
||||||
#include "exception.hh"
|
|
||||||
#include "buffer_utils.hh"
|
#include "buffer_utils.hh"
|
||||||
|
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
|
@ -8,17 +7,45 @@ namespace Kakoune
|
||||||
|
|
||||||
using Utf8It = RegexUtf8It<const char*>;
|
using Utf8It = RegexUtf8It<const char*>;
|
||||||
|
|
||||||
Regex::Regex(StringView re, flag_type flags) try
|
boost::regbase::flag_type convert_flags(RegexCompileFlags flags)
|
||||||
: RegexBase{Utf8It{re.begin(), re}, Utf8It{re.end(), re}, flags}, m_str{re.str()}
|
{
|
||||||
|
boost::regbase::flag_type res = boost::regbase::ECMAScript;
|
||||||
|
if (flags & RegexCompileFlags::NoSubs)
|
||||||
|
res |= boost::regbase::nosubs;
|
||||||
|
if (flags & RegexCompileFlags::Optimize)
|
||||||
|
res |= boost::regbase::optimize;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
boost::regex_constants::match_flag_type convert_flags(RegexExecFlags flags)
|
||||||
|
{
|
||||||
|
boost::regex_constants::match_flag_type res = boost::regex_constants::match_default;
|
||||||
|
|
||||||
|
if (flags & RegexExecFlags::NotBeginOfLine)
|
||||||
|
res |= boost::regex_constants::match_not_bol;
|
||||||
|
if (flags & RegexExecFlags::NotEndOfLine)
|
||||||
|
res |= boost::regex_constants::match_not_eol;
|
||||||
|
if (flags & RegexExecFlags::NotBeginOfWord)
|
||||||
|
res |= boost::regex_constants::match_not_bow;
|
||||||
|
if (flags & RegexExecFlags::NotEndOfWord)
|
||||||
|
res |= boost::regex_constants::match_not_eow;
|
||||||
|
if (flags & RegexExecFlags::NotBeginOfSubject)
|
||||||
|
res |= boost::regex_constants::match_not_bob;
|
||||||
|
if (flags & RegexExecFlags::NotInitialNull)
|
||||||
|
res |= boost::regex_constants::match_not_initial_null;
|
||||||
|
if (flags & RegexExecFlags::AnyMatch)
|
||||||
|
res |= boost::regex_constants::match_any;
|
||||||
|
if (flags & RegexExecFlags::PrevAvailable)
|
||||||
|
res |= boost::regex_constants::match_prev_avail;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
Regex::Regex(StringView re, RegexCompileFlags flags) try
|
||||||
|
: m_impl{new CompiledRegex{compile_regex(re, flags)}},
|
||||||
|
m_str{re.str()},
|
||||||
|
m_boost_impl{Utf8It{re.begin(), re}, Utf8It{re.end(), re}, convert_flags(flags)}
|
||||||
{
|
{
|
||||||
try
|
|
||||||
{
|
|
||||||
m_impl = new CompiledRegex{compile_regex(re)};
|
|
||||||
}
|
|
||||||
catch (runtime_error& err)
|
|
||||||
{
|
|
||||||
write_to_debug_buffer(err.what());
|
|
||||||
}
|
|
||||||
} catch (std::runtime_error& err) { throw regex_error(err.what()); }
|
} catch (std::runtime_error& err) { throw regex_error(err.what()); }
|
||||||
|
|
||||||
String option_to_string(const Regex& re)
|
String option_to_string(const Regex& re)
|
||||||
|
|
195
src/regex.hh
195
src/regex.hh
|
@ -12,89 +12,116 @@
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
struct regex_error : runtime_error
|
|
||||||
{
|
|
||||||
regex_error(StringView desc)
|
|
||||||
: runtime_error{format("regex error: '{}'", desc)}
|
|
||||||
{}
|
|
||||||
};
|
|
||||||
|
|
||||||
using RegexBase = boost::basic_regex<wchar_t, boost::c_regex_traits<wchar_t>>;
|
|
||||||
|
|
||||||
// Regex that keeps track of its string representation
|
// Regex that keeps track of its string representation
|
||||||
class Regex : public RegexBase
|
class Regex
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Regex() = default;
|
Regex() = default;
|
||||||
|
|
||||||
explicit Regex(StringView re, flag_type flags = ECMAScript);
|
explicit Regex(StringView re, RegexCompileFlags flags = RegexCompileFlags::None);
|
||||||
bool empty() const { return m_str.empty(); }
|
bool empty() const { return m_str.empty(); }
|
||||||
bool operator==(const Regex& other) const { return m_str == other.m_str; }
|
bool operator==(const Regex& other) const { return m_str == other.m_str; }
|
||||||
bool operator!=(const Regex& other) const { return m_str != other.m_str; }
|
bool operator!=(const Regex& other) const { return m_str != other.m_str; }
|
||||||
|
|
||||||
const String& str() const { return m_str; }
|
const String& str() const { return m_str; }
|
||||||
|
|
||||||
|
size_t mark_count() const { return m_impl->save_count / 2 - 1; }
|
||||||
|
|
||||||
static constexpr const char* option_type_name = "regex";
|
static constexpr const char* option_type_name = "regex";
|
||||||
|
|
||||||
const CompiledRegex* impl() const { return m_impl.get(); }
|
const CompiledRegex* impl() const { return m_impl.get(); }
|
||||||
|
|
||||||
|
using BoostImpl = boost::basic_regex<wchar_t, boost::c_regex_traits<wchar_t>>;
|
||||||
|
const BoostImpl& boost_impl() const { return m_boost_impl; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
String m_str;
|
|
||||||
RefPtr<CompiledRegex> m_impl;
|
RefPtr<CompiledRegex> m_impl;
|
||||||
|
String m_str;
|
||||||
|
BoostImpl m_boost_impl;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename It>
|
|
||||||
using RegexUtf8It = utf8::iterator<It, wchar_t, ssize_t>;
|
|
||||||
|
|
||||||
template<typename It>
|
|
||||||
using RegexIteratorBase = boost::regex_iterator<RegexUtf8It<It>, wchar_t,
|
|
||||||
boost::c_regex_traits<wchar_t>>;
|
|
||||||
|
|
||||||
namespace RegexConstant = boost::regex_constants;
|
|
||||||
|
|
||||||
template<typename Iterator>
|
template<typename Iterator>
|
||||||
struct MatchResults : boost::match_results<RegexUtf8It<Iterator>>
|
struct MatchResults
|
||||||
{
|
{
|
||||||
using ParentType = boost::match_results<RegexUtf8It<Iterator>>;
|
|
||||||
struct SubMatch : std::pair<Iterator, Iterator>
|
struct SubMatch : std::pair<Iterator, Iterator>
|
||||||
{
|
{
|
||||||
SubMatch() = default;
|
SubMatch() = default;
|
||||||
SubMatch(const boost::sub_match<RegexUtf8It<Iterator>>& m)
|
SubMatch(Iterator begin, Iterator end)
|
||||||
: std::pair<Iterator, Iterator>{m.first.base(), m.second.base()},
|
: std::pair<Iterator, Iterator>{begin, end}, matched{begin != Iterator{}}
|
||||||
matched{m.matched}
|
|
||||||
{}
|
{}
|
||||||
|
|
||||||
bool matched = false;
|
bool matched = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct iterator : boost::match_results<RegexUtf8It<Iterator>>::iterator
|
struct iterator : std::iterator<std::bidirectional_iterator_tag, SubMatch, size_t, SubMatch*, SubMatch>
|
||||||
{
|
{
|
||||||
using ParentType = typename boost::match_results<RegexUtf8It<Iterator>>::iterator;
|
using It = typename Vector<Iterator>::const_iterator;
|
||||||
iterator(const ParentType& it) : ParentType(it) {}
|
|
||||||
|
|
||||||
SubMatch operator*() const { return {ParentType::operator*()}; }
|
iterator() = default;
|
||||||
|
iterator(It it) : m_it{std::move(it)} {}
|
||||||
|
|
||||||
|
iterator& operator--() { m_it += 2; return *this; }
|
||||||
|
iterator& operator++() { m_it += 2; return *this; }
|
||||||
|
SubMatch operator*() const { return {*m_it, *(m_it+1)}; }
|
||||||
|
|
||||||
|
friend bool operator==(const iterator& lhs, const iterator& rhs) { return lhs.m_it == rhs.m_it; }
|
||||||
|
friend bool operator!=(const iterator& lhs, const iterator& rhs) { return lhs.m_it != rhs.m_it; }
|
||||||
|
private:
|
||||||
|
|
||||||
|
It m_it;
|
||||||
};
|
};
|
||||||
|
|
||||||
iterator begin() const { return {ParentType::begin()}; }
|
MatchResults() = default;
|
||||||
iterator cbegin() const { return {ParentType::cbegin()}; }
|
MatchResults(Vector<Iterator> values) : m_values{std::move(values)} {}
|
||||||
iterator end() const { return {ParentType::end()}; }
|
|
||||||
iterator cend() const { return {ParentType::cend()}; }
|
|
||||||
|
|
||||||
SubMatch operator[](size_t s) const { return {ParentType::operator[](s)}; }
|
iterator begin() const { return iterator{m_values.begin()}; }
|
||||||
|
iterator cbegin() const { return iterator{m_values.cbegin()}; }
|
||||||
|
iterator end() const { return iterator{m_values.end()}; }
|
||||||
|
iterator cend() const { return iterator{m_values.cend()}; }
|
||||||
|
|
||||||
|
size_t size() const { return m_values.size() / 2; }
|
||||||
|
bool empty() const { return m_values.empty(); }
|
||||||
|
|
||||||
|
SubMatch operator[](size_t i) const
|
||||||
|
{
|
||||||
|
return i * 2 < m_values.size() ?
|
||||||
|
SubMatch{m_values[i*2], m_values[i*2+1]} : SubMatch{};
|
||||||
|
}
|
||||||
|
|
||||||
|
friend bool operator==(const MatchResults& lhs, const MatchResults& rhs)
|
||||||
|
{
|
||||||
|
return lhs.m_values == rhs.m_values;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend bool operator!=(const MatchResults& lhs, const MatchResults& rhs)
|
||||||
|
{
|
||||||
|
return not (lhs == rhs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void swap(MatchResults& other)
|
||||||
|
{
|
||||||
|
m_values.swap(other.m_values);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Vector<Iterator> m_values;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline RegexConstant::match_flag_type match_flags(bool bol, bool eol, bool bow, bool eow)
|
inline RegexExecFlags match_flags(bool bol, bool eol, bool bow, bool eow)
|
||||||
{
|
{
|
||||||
return (bol ? RegexConstant::match_default : RegexConstant::match_not_bol) |
|
return (bol ? RegexExecFlags::None : RegexExecFlags::NotBeginOfLine) |
|
||||||
(eol ? RegexConstant::match_default : RegexConstant::match_not_eol) |
|
(eol ? RegexExecFlags::None : RegexExecFlags::NotEndOfLine) |
|
||||||
(bow ? RegexConstant::match_default : RegexConstant::match_not_bow) |
|
(bow ? RegexExecFlags::None : RegexExecFlags::NotBeginOfWord) |
|
||||||
(eow ? RegexConstant::match_default : RegexConstant::match_not_eow);
|
(eow ? RegexExecFlags::None : RegexExecFlags::NotEndOfWord);
|
||||||
}
|
}
|
||||||
|
|
||||||
void regex_mismatch(const Regex& re);
|
void regex_mismatch(const Regex& re);
|
||||||
|
|
||||||
template<typename It>
|
template<typename It>
|
||||||
void check_captures(const Regex& re, const MatchResults<It>& res, const Vector<It>& captures)
|
using RegexUtf8It = utf8::iterator<It, wchar_t, ssize_t>;
|
||||||
|
|
||||||
|
template<typename It>
|
||||||
|
void check_captures(const Regex& re, const boost::match_results<RegexUtf8It<It>>& res, const Vector<It>& captures)
|
||||||
{
|
{
|
||||||
if (res.size() > captures.size() * 2)
|
if (res.size() > captures.size() * 2)
|
||||||
return regex_mismatch(re);
|
return regex_mismatch(re);
|
||||||
|
@ -115,37 +142,18 @@ void check_captures(const Regex& re, const MatchResults<It>& res, const Vector<I
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline RegexExecFlags convert_flags(RegexConstant::match_flag_type flags)
|
boost::regbase::flag_type convert_flags(RegexCompileFlags flags);
|
||||||
{
|
boost::regex_constants::match_flag_type convert_flags(RegexExecFlags flags);
|
||||||
auto res = RegexExecFlags::None;
|
|
||||||
|
|
||||||
if (flags & RegexConstant::match_not_bol)
|
|
||||||
res |= RegexExecFlags::NotBeginOfLine;
|
|
||||||
if (flags & RegexConstant::match_not_eol)
|
|
||||||
res |= RegexExecFlags::NotEndOfLine;
|
|
||||||
if (flags & RegexConstant::match_not_bow)
|
|
||||||
res |= RegexExecFlags::NotBeginOfWord;
|
|
||||||
if (flags & RegexConstant::match_not_eow)
|
|
||||||
res |= RegexExecFlags::NotEndOfWord;
|
|
||||||
if (flags & RegexConstant::match_not_bob)
|
|
||||||
res |= RegexExecFlags::NotBeginOfSubject;
|
|
||||||
if (flags & RegexConstant::match_not_initial_null)
|
|
||||||
res |= RegexExecFlags::NotInitialNull;
|
|
||||||
if (flags & RegexConstant::match_any)
|
|
||||||
res |= RegexExecFlags::AnyMatch;
|
|
||||||
if (flags & RegexConstant::match_prev_avail)
|
|
||||||
res |= RegexExecFlags::PrevAvailable;
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename It>
|
template<typename It>
|
||||||
bool regex_match(It begin, It end, const Regex& re)
|
bool regex_match(It begin, It end, const Regex& re)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
bool matched = boost::regex_match<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, re);
|
const bool matched = regex_match(begin, end, *re.impl());
|
||||||
if (re.impl() and matched != regex_match(begin, end, *re.impl()))
|
if (not re.boost_impl().empty() and
|
||||||
|
matched != boost::regex_match<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end},
|
||||||
|
re.boost_impl()))
|
||||||
regex_mismatch(re);
|
regex_mismatch(re);
|
||||||
return matched;
|
return matched;
|
||||||
}
|
}
|
||||||
|
@ -160,12 +168,18 @@ bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
bool matched = boost::regex_match<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, res, re);
|
|
||||||
Vector<It> captures;
|
Vector<It> captures;
|
||||||
if (re.impl() and matched != regex_match(begin, end, captures, *re.impl()))
|
const bool matched = regex_match(begin, end, captures, *re.impl());
|
||||||
|
|
||||||
|
boost::match_results<RegexUtf8It<It>> boost_res;
|
||||||
|
if (not re.boost_impl().empty() and
|
||||||
|
matched != boost::regex_match<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end},
|
||||||
|
boost_res, re.boost_impl()))
|
||||||
regex_mismatch(re);
|
regex_mismatch(re);
|
||||||
if (re.impl() and matched)
|
if (not re.boost_impl().empty() and matched)
|
||||||
check_captures(re, res, captures);
|
check_captures(re, boost_res, captures);
|
||||||
|
|
||||||
|
res = matched ? MatchResults<It>{std::move(captures)} : MatchResults<It>{};
|
||||||
return matched;
|
return matched;
|
||||||
}
|
}
|
||||||
catch (std::runtime_error& err)
|
catch (std::runtime_error& err)
|
||||||
|
@ -176,13 +190,16 @@ bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
|
||||||
|
|
||||||
template<typename It>
|
template<typename It>
|
||||||
bool regex_search(It begin, It end, const Regex& re,
|
bool regex_search(It begin, It end, const Regex& re,
|
||||||
RegexConstant::match_flag_type flags = RegexConstant::match_default)
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
auto first = (flags & RegexConstant::match_prev_avail) ? begin-1 : begin;
|
const bool matched = regex_search(begin, end, *re.impl(), flags);
|
||||||
bool matched = boost::regex_search<RegexUtf8It<It>>({begin, first, end}, {end, first, end}, re, flags);
|
|
||||||
if (re.impl() and matched != regex_search(begin, end, *re.impl(), convert_flags(flags)))
|
auto first = (flags & RegexExecFlags::PrevAvailable) ? begin-1 : begin;
|
||||||
|
if (not re.boost_impl().empty() and
|
||||||
|
matched != boost::regex_search<RegexUtf8It<It>>({begin, first, end}, {end, first, end},
|
||||||
|
re.boost_impl(), convert_flags(flags)))
|
||||||
regex_mismatch(re);
|
regex_mismatch(re);
|
||||||
return matched;
|
return matched;
|
||||||
}
|
}
|
||||||
|
@ -194,17 +211,23 @@ bool regex_search(It begin, It end, const Regex& re,
|
||||||
|
|
||||||
template<typename It>
|
template<typename It>
|
||||||
bool regex_search(It begin, It end, MatchResults<It>& res, const Regex& re,
|
bool regex_search(It begin, It end, MatchResults<It>& res, const Regex& re,
|
||||||
RegexConstant::match_flag_type flags = RegexConstant::match_default)
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
auto first = (flags & RegexConstant::match_prev_avail) ? begin-1 : begin;
|
|
||||||
bool matched = boost::regex_search<RegexUtf8It<It>>({begin, first, end}, {end, first, end}, res, re, flags);
|
|
||||||
Vector<It> captures;
|
Vector<It> captures;
|
||||||
if (re.impl() and matched != regex_search(begin, end, captures, *re.impl(), convert_flags(flags)))
|
const bool matched = regex_search(begin, end, captures, *re.impl(), flags);
|
||||||
|
|
||||||
|
auto first = (flags & RegexExecFlags::PrevAvailable) ? begin-1 : begin;
|
||||||
|
boost::match_results<RegexUtf8It<It>> boost_res;
|
||||||
|
if (not re.boost_impl().empty() and
|
||||||
|
matched != boost::regex_search<RegexUtf8It<It>>({begin, first, end}, {end, first, end},
|
||||||
|
boost_res, re.boost_impl(), convert_flags(flags)))
|
||||||
regex_mismatch(re);
|
regex_mismatch(re);
|
||||||
if (re.impl() and matched)
|
if (not re.boost_impl().empty() and matched)
|
||||||
check_captures(re, res, captures);
|
check_captures(re, boost_res, captures);
|
||||||
|
|
||||||
|
res = matched ? MatchResults<It>{std::move(captures)} : MatchResults<It>{};
|
||||||
return matched;
|
return matched;
|
||||||
}
|
}
|
||||||
catch (std::runtime_error& err)
|
catch (std::runtime_error& err)
|
||||||
|
@ -219,12 +242,11 @@ void option_from_string(StringView str, Regex& re);
|
||||||
template<typename Iterator>
|
template<typename Iterator>
|
||||||
struct RegexIterator
|
struct RegexIterator
|
||||||
{
|
{
|
||||||
using Utf8It = RegexUtf8It<Iterator>;
|
|
||||||
using ValueType = MatchResults<Iterator>;
|
using ValueType = MatchResults<Iterator>;
|
||||||
|
|
||||||
RegexIterator() = default;
|
RegexIterator() = default;
|
||||||
RegexIterator(Iterator begin, Iterator end, const Regex& re,
|
RegexIterator(Iterator begin, Iterator end, const Regex& re,
|
||||||
RegexConstant::match_flag_type flags = RegexConstant::match_default)
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
: m_regex{&re}, m_next_begin{begin}, m_begin{begin}, m_end{end}, m_flags{flags}
|
: m_regex{&re}, m_next_begin{begin}, m_begin{begin}, m_end{end}, m_flags{flags}
|
||||||
{
|
{
|
||||||
next();
|
next();
|
||||||
|
@ -261,11 +283,11 @@ private:
|
||||||
{
|
{
|
||||||
kak_assert(m_regex);
|
kak_assert(m_regex);
|
||||||
|
|
||||||
RegexConstant::match_flag_type additional_flags{};
|
RegexExecFlags additional_flags{};
|
||||||
if (m_results.size() and m_results[0].first == m_results[0].second)
|
if (m_results.size() and m_results[0].first == m_results[0].second)
|
||||||
additional_flags |= RegexConstant::match_not_initial_null;
|
additional_flags |= RegexExecFlags::NotInitialNull;
|
||||||
if (m_begin != m_next_begin)
|
if (m_begin != m_next_begin)
|
||||||
additional_flags |= RegexConstant::match_not_bob | RegexConstant::match_prev_avail;
|
additional_flags |= RegexExecFlags::NotBeginOfSubject | RegexExecFlags::PrevAvailable;
|
||||||
|
|
||||||
if (not regex_search(m_next_begin, m_end, m_results, *m_regex,
|
if (not regex_search(m_next_begin, m_end, m_results, *m_regex,
|
||||||
m_flags | additional_flags))
|
m_flags | additional_flags))
|
||||||
|
@ -279,10 +301,9 @@ private:
|
||||||
Iterator m_next_begin{};
|
Iterator m_next_begin{};
|
||||||
const Iterator m_begin{};
|
const Iterator m_begin{};
|
||||||
const Iterator m_end{};
|
const Iterator m_end{};
|
||||||
const RegexConstant::match_flag_type m_flags = RegexConstant::match_default;
|
const RegexExecFlags m_flags = RegexExecFlags::None;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // regex_hh_INCLUDED
|
#endif // regex_hh_INCLUDED
|
||||||
|
|
|
@ -96,7 +96,7 @@ struct RegexParser
|
||||||
private:
|
private:
|
||||||
struct InvalidPolicy
|
struct InvalidPolicy
|
||||||
{
|
{
|
||||||
Codepoint operator()(Codepoint cp) { throw runtime_error{"Invalid utf8 in regex"}; }
|
Codepoint operator()(Codepoint cp) { throw regex_error{"Invalid utf8 in regex"}; }
|
||||||
};
|
};
|
||||||
|
|
||||||
using Iterator = utf8::iterator<const char*, Codepoint, int, InvalidPolicy>;
|
using Iterator = utf8::iterator<const char*, Codepoint, int, InvalidPolicy>;
|
||||||
|
@ -466,7 +466,7 @@ private:
|
||||||
[[gnu::noreturn]]
|
[[gnu::noreturn]]
|
||||||
void parse_error(StringView error)
|
void parse_error(StringView error)
|
||||||
{
|
{
|
||||||
throw runtime_error(format("regex parse error: {} at '{}<<<HERE>>>{}'", error,
|
throw regex_error(format("regex parse error: {} at '{}<<<HERE>>>{}'", error,
|
||||||
StringView{m_regex.begin(), m_pos.base()},
|
StringView{m_regex.begin(), m_pos.base()},
|
||||||
StringView{m_pos.base(), m_regex.end()}));
|
StringView{m_pos.base(), m_regex.end()}));
|
||||||
}
|
}
|
||||||
|
@ -515,8 +515,8 @@ const RegexParser::ControlEscape RegexParser::control_escapes[5] = {
|
||||||
|
|
||||||
struct RegexCompiler
|
struct RegexCompiler
|
||||||
{
|
{
|
||||||
RegexCompiler(const ParsedRegex& parsed_regex, MatchDirection direction)
|
RegexCompiler(const ParsedRegex& parsed_regex, RegexCompileFlags flags, MatchDirection direction)
|
||||||
: m_parsed_regex{parsed_regex}, m_forward{direction == MatchDirection::Forward}
|
: m_parsed_regex{parsed_regex}, m_flags(flags), m_forward{direction == MatchDirection::Forward}
|
||||||
{
|
{
|
||||||
compile_node(m_parsed_regex.ast);
|
compile_node(m_parsed_regex.ast);
|
||||||
push_inst(CompiledRegex::Match);
|
push_inst(CompiledRegex::Match);
|
||||||
|
@ -535,7 +535,7 @@ private:
|
||||||
const auto start_pos = m_program.instructions.size();
|
const auto start_pos = m_program.instructions.size();
|
||||||
|
|
||||||
const Codepoint capture = (node->op == ParsedRegex::Alternation or node->op == ParsedRegex::Sequence) ? node->value : -1;
|
const Codepoint capture = (node->op == ParsedRegex::Alternation or node->op == ParsedRegex::Sequence) ? node->value : -1;
|
||||||
if (capture != -1)
|
if (capture != -1 and (capture == 0 or not (m_flags & RegexCompileFlags::NoSubs)))
|
||||||
push_inst(CompiledRegex::Save, capture * 2 + (m_forward ? 0 : 1));
|
push_inst(CompiledRegex::Save, capture * 2 + (m_forward ? 0 : 1));
|
||||||
|
|
||||||
Vector<uint32_t> goto_inner_end_offsets;
|
Vector<uint32_t> goto_inner_end_offsets;
|
||||||
|
@ -629,7 +629,7 @@ private:
|
||||||
for (auto& offset : goto_inner_end_offsets)
|
for (auto& offset : goto_inner_end_offsets)
|
||||||
m_program.instructions[offset].param = m_program.instructions.size();
|
m_program.instructions[offset].param = m_program.instructions.size();
|
||||||
|
|
||||||
if (capture != -1)
|
if (capture != -1 and (capture == 0 or not (m_flags & RegexCompileFlags::NoSubs)))
|
||||||
push_inst(CompiledRegex::Save, capture * 2 + (m_forward ? 1 : 0));
|
push_inst(CompiledRegex::Save, capture * 2 + (m_forward ? 1 : 0));
|
||||||
|
|
||||||
return start_pos;
|
return start_pos;
|
||||||
|
@ -797,6 +797,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
CompiledRegex m_program;
|
CompiledRegex m_program;
|
||||||
|
RegexCompileFlags m_flags;
|
||||||
const ParsedRegex& m_parsed_regex;
|
const ParsedRegex& m_parsed_regex;
|
||||||
const bool m_forward;
|
const bool m_forward;
|
||||||
};
|
};
|
||||||
|
@ -878,9 +879,9 @@ void dump_regex(const CompiledRegex& program)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CompiledRegex compile_regex(StringView re, MatchDirection direction)
|
CompiledRegex compile_regex(StringView re, RegexCompileFlags flags, MatchDirection direction)
|
||||||
{
|
{
|
||||||
return RegexCompiler{RegexParser::parse(re), direction}.get_compiled_regex();
|
return RegexCompiler{RegexParser::parse(re), flags, direction}.get_compiled_regex();
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
|
@ -891,7 +892,7 @@ struct TestVM : CompiledRegex, ThreadedRegexVM<const char*, dir>
|
||||||
using VMType = ThreadedRegexVM<const char*, dir>;
|
using VMType = ThreadedRegexVM<const char*, dir>;
|
||||||
|
|
||||||
TestVM(StringView re, bool dump = false)
|
TestVM(StringView re, bool dump = false)
|
||||||
: CompiledRegex{compile_regex(re, dir)},
|
: CompiledRegex{compile_regex(re, RegexCompileFlags::None, dir)},
|
||||||
VMType{(const CompiledRegex&)*this}
|
VMType{(const CompiledRegex&)*this}
|
||||||
{ if (dump) dump_regex(*this); }
|
{ if (dump) dump_regex(*this); }
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,11 @@
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
|
||||||
|
struct regex_error : runtime_error
|
||||||
|
{
|
||||||
|
using runtime_error::runtime_error;
|
||||||
|
};
|
||||||
|
|
||||||
enum class MatchDirection
|
enum class MatchDirection
|
||||||
{
|
{
|
||||||
Forward,
|
Forward,
|
||||||
|
@ -66,7 +71,15 @@ struct CompiledRegex : RefCountable
|
||||||
std::unique_ptr<StartChars> start_chars;
|
std::unique_ptr<StartChars> start_chars;
|
||||||
};
|
};
|
||||||
|
|
||||||
CompiledRegex compile_regex(StringView re, MatchDirection direction = MatchDirection::Forward);
|
enum RegexCompileFlags
|
||||||
|
{
|
||||||
|
None = 0,
|
||||||
|
NoSubs = 1 << 0,
|
||||||
|
Optimize = 1 << 1
|
||||||
|
};
|
||||||
|
constexpr bool with_bit_ops(Meta::Type<RegexCompileFlags>) { return true; }
|
||||||
|
|
||||||
|
CompiledRegex compile_regex(StringView re, RegexCompileFlags flags, MatchDirection direction = MatchDirection::Forward);
|
||||||
|
|
||||||
enum class RegexExecFlags
|
enum class RegexExecFlags
|
||||||
{
|
{
|
||||||
|
@ -475,7 +488,7 @@ bool regex_search(It begin, It end, Vector<It>& captures, const CompiledRegex& r
|
||||||
ThreadedRegexVM<It, direction> vm{re};
|
ThreadedRegexVM<It, direction> vm{re};
|
||||||
if (vm.exec(begin, end, flags | RegexExecFlags::Search))
|
if (vm.exec(begin, end, flags | RegexExecFlags::Search))
|
||||||
{
|
{
|
||||||
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
|
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(captures));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -837,7 +837,7 @@ void select_buffer(SelectionList& selections)
|
||||||
selections = SelectionList{ buffer, target_eol({{0,0}, buffer.back_coord()}) };
|
selections = SelectionList{ buffer, target_eol({{0,0}, buffer.back_coord()}) };
|
||||||
}
|
}
|
||||||
|
|
||||||
static RegexConstant::match_flag_type
|
static RegexExecFlags
|
||||||
match_flags(const Buffer& buf, const BufferIterator& begin, const BufferIterator& end)
|
match_flags(const Buffer& buf, const BufferIterator& begin, const BufferIterator& end)
|
||||||
{
|
{
|
||||||
return match_flags(is_bol(begin.coord()), is_eol(buf, end.coord()),
|
return match_flags(is_bol(begin.coord()), is_eol(buf, end.coord()),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user