2014-10-13 14:12:33 +02:00
|
|
|
#ifndef regex_hh_INCLUDED
|
|
|
|
#define regex_hh_INCLUDED
|
|
|
|
|
|
|
|
#include "string.hh"
|
2017-10-09 16:12:42 +02:00
|
|
|
#include "string_utils.hh"
|
2015-07-14 22:06:41 +02:00
|
|
|
#include "exception.hh"
|
2016-05-10 10:12:30 +02:00
|
|
|
#include "utf8_iterator.hh"
|
2017-10-02 08:59:04 +02:00
|
|
|
#include "regex_impl.hh"
|
2014-10-13 14:12:33 +02:00
|
|
|
|
|
|
|
#include <boost/regex.hpp>
|
|
|
|
|
|
|
|
namespace Kakoune
|
|
|
|
{
|
|
|
|
|
2015-07-14 22:06:41 +02:00
|
|
|
struct regex_error : runtime_error
|
|
|
|
{
|
|
|
|
regex_error(StringView desc)
|
|
|
|
: runtime_error{format("regex error: '{}'", desc)}
|
|
|
|
{}
|
|
|
|
};
|
|
|
|
|
2016-05-19 22:45:23 +02:00
|
|
|
using RegexBase = boost::basic_regex<wchar_t, boost::c_regex_traits<wchar_t>>;
|
|
|
|
|
2014-10-13 20:40:27 +02:00
|
|
|
// Regex that keeps track of its string representation
|
2016-12-15 00:50:29 +01:00
|
|
|
class Regex : public RegexBase
|
2014-10-13 20:40:27 +02:00
|
|
|
{
|
2016-12-14 21:59:39 +01:00
|
|
|
public:
|
2014-10-13 20:40:27 +02:00
|
|
|
Regex() = default;
|
|
|
|
|
2016-05-10 10:12:30 +02:00
|
|
|
explicit Regex(StringView re, flag_type flags = ECMAScript);
|
2014-10-13 20:40:27 +02:00
|
|
|
bool empty() const { return m_str.empty(); }
|
2015-04-09 00:16:27 +02:00
|
|
|
bool operator==(const Regex& other) const { return m_str == other.m_str; }
|
|
|
|
bool operator!=(const Regex& other) const { return m_str != other.m_str; }
|
2014-10-13 20:40:27 +02:00
|
|
|
|
2015-03-10 20:33:46 +01:00
|
|
|
const String& str() const { return m_str; }
|
2014-10-13 20:40:27 +02:00
|
|
|
|
2016-08-18 23:42:31 +02:00
|
|
|
static constexpr const char* option_type_name = "regex";
|
2016-08-06 10:05:50 +02:00
|
|
|
|
2017-10-02 08:59:04 +02:00
|
|
|
const CompiledRegex& impl() const { return m_impl; }
|
|
|
|
|
2014-10-13 20:40:27 +02:00
|
|
|
private:
|
|
|
|
String m_str;
|
2017-10-02 08:59:04 +02:00
|
|
|
CompiledRegex m_impl;
|
2014-10-13 20:40:27 +02:00
|
|
|
};
|
2015-03-10 20:33:46 +01:00
|
|
|
|
2016-05-10 10:12:30 +02:00
|
|
|
template<typename It>
|
|
|
|
using RegexUtf8It = utf8::iterator<It, wchar_t, ssize_t>;
|
2015-03-10 20:33:46 +01:00
|
|
|
|
2016-05-19 22:45:23 +02:00
|
|
|
template<typename It>
|
|
|
|
using RegexIteratorBase = boost::regex_iterator<RegexUtf8It<It>, wchar_t,
|
|
|
|
boost::c_regex_traits<wchar_t>>;
|
|
|
|
|
2016-05-10 10:12:30 +02:00
|
|
|
namespace RegexConstant = boost::regex_constants;
|
2014-10-13 14:12:33 +02:00
|
|
|
|
|
|
|
template<typename Iterator>
|
2016-05-10 10:12:30 +02:00
|
|
|
struct MatchResults : boost::match_results<RegexUtf8It<Iterator>>
|
|
|
|
{
|
|
|
|
using ParentType = boost::match_results<RegexUtf8It<Iterator>>;
|
|
|
|
struct SubMatch : std::pair<Iterator, Iterator>
|
|
|
|
{
|
|
|
|
SubMatch() = default;
|
|
|
|
SubMatch(const boost::sub_match<RegexUtf8It<Iterator>>& m)
|
|
|
|
: std::pair<Iterator, Iterator>{m.first.base(), m.second.base()},
|
|
|
|
matched{m.matched}
|
|
|
|
{}
|
|
|
|
|
|
|
|
bool matched = false;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct iterator : boost::match_results<RegexUtf8It<Iterator>>::iterator
|
|
|
|
{
|
|
|
|
using ParentType = typename boost::match_results<RegexUtf8It<Iterator>>::iterator;
|
|
|
|
iterator(const ParentType& it) : ParentType(it) {}
|
|
|
|
|
|
|
|
SubMatch operator*() const { return {ParentType::operator*()}; }
|
|
|
|
};
|
|
|
|
|
|
|
|
iterator begin() const { return {ParentType::begin()}; }
|
|
|
|
iterator cbegin() const { return {ParentType::cbegin()}; }
|
|
|
|
iterator end() const { return {ParentType::end()}; }
|
|
|
|
iterator cend() const { return {ParentType::cend()}; }
|
|
|
|
|
|
|
|
SubMatch operator[](size_t s) const { return {ParentType::operator[](s)}; }
|
|
|
|
};
|
2014-10-13 14:12:33 +02:00
|
|
|
|
|
|
|
template<typename Iterator>
|
2016-05-19 22:45:23 +02:00
|
|
|
struct RegexIterator : RegexIteratorBase<Iterator>
|
2016-05-10 10:12:30 +02:00
|
|
|
{
|
|
|
|
using Utf8It = RegexUtf8It<Iterator>;
|
|
|
|
using ValueType = MatchResults<Iterator>;
|
|
|
|
|
|
|
|
RegexIterator() = default;
|
|
|
|
RegexIterator(Iterator begin, Iterator end, const Regex& re,
|
|
|
|
RegexConstant::match_flag_type flags = RegexConstant::match_default)
|
2016-05-19 22:45:23 +02:00
|
|
|
: RegexIteratorBase<Iterator>{Utf8It{begin, begin, end}, Utf8It{end, begin, end}, re, flags} {}
|
2014-10-13 14:12:33 +02:00
|
|
|
|
2016-05-19 22:45:23 +02:00
|
|
|
const ValueType& operator*() const { return *reinterpret_cast<const ValueType*>(&RegexIteratorBase<Iterator>::operator*()); }
|
|
|
|
const ValueType* operator->() const { return reinterpret_cast<const ValueType*>(RegexIteratorBase<Iterator>::operator->()); }
|
2016-05-10 10:12:30 +02:00
|
|
|
};
|
2015-12-23 22:43:07 +01:00
|
|
|
|
2016-05-10 10:12:30 +02:00
|
|
|
inline RegexConstant::match_flag_type match_flags(bool bol, bool eol, bool bow, bool eow)
|
2015-12-23 22:43:07 +01:00
|
|
|
{
|
2016-05-10 10:12:30 +02:00
|
|
|
return (bol ? RegexConstant::match_default : RegexConstant::match_not_bol) |
|
2015-12-23 22:43:07 +01:00
|
|
|
(eol ? RegexConstant::match_default : RegexConstant::match_not_eol) |
|
2016-05-10 10:12:30 +02:00
|
|
|
(bow ? RegexConstant::match_default : RegexConstant::match_not_bow) |
|
2015-12-23 22:43:07 +01:00
|
|
|
(eow ? RegexConstant::match_default : RegexConstant::match_not_eow);
|
|
|
|
}
|
|
|
|
|
2017-10-02 08:59:04 +02:00
|
|
|
void regex_mismatch(const Regex& re);
|
|
|
|
|
|
|
|
template<typename It>
|
|
|
|
void check_captures(const Regex& re, const MatchResults<It>& res, const Vector<It>& captures)
|
|
|
|
{
|
|
|
|
if (res.size() > captures.size() * 2)
|
|
|
|
return regex_mismatch(re);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < res.size(); ++i)
|
|
|
|
{
|
|
|
|
if (not res[i].matched)
|
|
|
|
{
|
|
|
|
if (captures[i*2] != It{} or captures[i*2+1] != It{})
|
|
|
|
regex_mismatch(re);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (res[i].first != captures[i*2])
|
|
|
|
regex_mismatch(re);
|
|
|
|
if (res[i].second != captures[i*2+1])
|
|
|
|
regex_mismatch(re);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-10 10:12:30 +02:00
|
|
|
template<typename It>
|
|
|
|
bool regex_match(It begin, It end, const Regex& re)
|
|
|
|
{
|
2016-09-06 14:55:14 +02:00
|
|
|
try
|
|
|
|
{
|
2017-10-02 08:59:04 +02:00
|
|
|
bool matched = boost::regex_match<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, re);
|
|
|
|
if (re.impl() and matched != regex_match(begin, end, re.impl()))
|
|
|
|
regex_mismatch(re);
|
|
|
|
return matched;
|
2016-09-06 14:55:14 +02:00
|
|
|
}
|
|
|
|
catch (std::runtime_error& err)
|
|
|
|
{
|
|
|
|
throw runtime_error{format("Regex matching error: {}", err.what())};
|
|
|
|
}
|
2016-05-10 10:12:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<typename It>
|
|
|
|
bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
|
|
|
|
{
|
2016-09-06 14:55:14 +02:00
|
|
|
try
|
|
|
|
{
|
2017-10-02 08:59:04 +02:00
|
|
|
bool matched = boost::regex_match<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, res, re);
|
|
|
|
Vector<It> captures;
|
|
|
|
if (re.impl() and matched != regex_match(begin, end, captures, re.impl()))
|
|
|
|
regex_mismatch(re);
|
|
|
|
if (re.impl() and matched)
|
|
|
|
check_captures(re, res, captures);
|
|
|
|
return matched;
|
2016-09-06 14:55:14 +02:00
|
|
|
}
|
|
|
|
catch (std::runtime_error& err)
|
|
|
|
{
|
|
|
|
throw runtime_error{format("Regex matching error: {}", err.what())};
|
|
|
|
}
|
2016-05-10 10:12:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<typename It>
|
|
|
|
bool regex_search(It begin, It end, const Regex& re,
|
|
|
|
RegexConstant::match_flag_type flags = RegexConstant::match_default)
|
|
|
|
{
|
2016-09-06 14:55:14 +02:00
|
|
|
try
|
|
|
|
{
|
2017-10-02 08:59:04 +02:00
|
|
|
bool matched = boost::regex_search<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, re, flags);
|
|
|
|
if (re.impl() and matched != regex_search(begin, end, re.impl()))
|
|
|
|
regex_mismatch(re);
|
|
|
|
return matched;
|
2016-09-06 14:55:14 +02:00
|
|
|
}
|
|
|
|
catch (std::runtime_error& err)
|
|
|
|
{
|
|
|
|
throw runtime_error{format("Regex searching error: {}", err.what())};
|
|
|
|
}
|
2016-05-10 10:12:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<typename It>
|
|
|
|
bool regex_search(It begin, It end, MatchResults<It>& res, const Regex& re,
|
|
|
|
RegexConstant::match_flag_type flags = RegexConstant::match_default)
|
|
|
|
{
|
2016-09-06 14:55:14 +02:00
|
|
|
try
|
|
|
|
{
|
2017-10-02 08:59:04 +02:00
|
|
|
bool matched = boost::regex_search<RegexUtf8It<It>>({begin, begin, end}, {end, begin, end}, res, re, flags);
|
|
|
|
Vector<It> captures;
|
|
|
|
if (re.impl() and matched != regex_search(begin, end, captures, re.impl()))
|
|
|
|
regex_mismatch(re);
|
|
|
|
if (re.impl() and matched)
|
|
|
|
check_captures(re, res, captures);
|
|
|
|
return matched;
|
2016-09-06 14:55:14 +02:00
|
|
|
}
|
|
|
|
catch (std::runtime_error& err)
|
|
|
|
{
|
|
|
|
throw runtime_error{format("Regex searching error: {}", err.what())};
|
|
|
|
}
|
2016-05-10 10:12:30 +02:00
|
|
|
}
|
|
|
|
|
2014-10-13 14:12:33 +02:00
|
|
|
String option_to_string(const Regex& re);
|
|
|
|
void option_from_string(StringView str, Regex& re);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // regex_hh_INCLUDED
|