2014-10-13 14:12:33 +02:00
|
|
|
#ifndef regex_hh_INCLUDED
|
|
|
|
#define regex_hh_INCLUDED
|
|
|
|
|
|
|
|
#include "string.hh"
|
2017-10-02 08:59:04 +02:00
|
|
|
#include "regex_impl.hh"
|
2014-10-13 14:12:33 +02:00
|
|
|
|
|
|
|
namespace Kakoune
|
|
|
|
{
|
|
|
|
|
2014-10-13 20:40:27 +02:00
|
|
|
// Regex that keeps track of its string representation
|
2017-10-09 08:04:14 +02:00
|
|
|
class Regex
|
2014-10-13 20:40:27 +02:00
|
|
|
{
|
2016-12-14 21:59:39 +01:00
|
|
|
public:
|
2014-10-13 20:40:27 +02:00
|
|
|
Regex() = default;
|
|
|
|
|
2017-12-01 12:57:02 +01:00
|
|
|
explicit Regex(StringView re, RegexCompileFlags flags = RegexCompileFlags::None);
|
2014-10-13 20:40:27 +02:00
|
|
|
bool empty() const { return m_str.empty(); }
|
2015-04-09 00:16:27 +02:00
|
|
|
bool operator==(const Regex& other) const { return m_str == other.m_str; }
|
|
|
|
bool operator!=(const Regex& other) const { return m_str != other.m_str; }
|
2014-10-13 20:40:27 +02:00
|
|
|
|
2015-03-10 20:33:46 +01:00
|
|
|
const String& str() const { return m_str; }
|
2014-10-13 20:40:27 +02:00
|
|
|
|
2017-10-09 08:04:14 +02:00
|
|
|
size_t mark_count() const { return m_impl->save_count / 2 - 1; }
|
|
|
|
|
2016-08-18 23:42:31 +02:00
|
|
|
static constexpr const char* option_type_name = "regex";
|
2016-08-06 10:05:50 +02:00
|
|
|
|
2017-10-06 07:40:27 +02:00
|
|
|
const CompiledRegex* impl() const { return m_impl.get(); }
|
2017-10-02 08:59:04 +02:00
|
|
|
|
2014-10-13 20:40:27 +02:00
|
|
|
private:
|
2017-10-06 07:40:27 +02:00
|
|
|
RefPtr<CompiledRegex> m_impl;
|
2017-10-09 08:04:14 +02:00
|
|
|
String m_str;
|
2014-10-13 20:40:27 +02:00
|
|
|
};
|
2015-03-10 20:33:46 +01:00
|
|
|
|
2014-10-13 14:12:33 +02:00
|
|
|
template<typename Iterator>
|
2017-10-09 08:04:14 +02:00
|
|
|
struct MatchResults
|
2016-05-10 10:12:30 +02:00
|
|
|
{
|
|
|
|
struct SubMatch : std::pair<Iterator, Iterator>
|
|
|
|
{
|
|
|
|
SubMatch() = default;
|
2017-10-09 08:04:14 +02:00
|
|
|
SubMatch(Iterator begin, Iterator end)
|
|
|
|
: std::pair<Iterator, Iterator>{begin, end}, matched{begin != Iterator{}}
|
2016-05-10 10:12:30 +02:00
|
|
|
{}
|
|
|
|
|
|
|
|
bool matched = false;
|
|
|
|
};
|
|
|
|
|
2017-10-09 08:04:14 +02:00
|
|
|
struct iterator : std::iterator<std::bidirectional_iterator_tag, SubMatch, size_t, SubMatch*, SubMatch>
|
2016-05-10 10:12:30 +02:00
|
|
|
{
|
2017-11-12 05:30:21 +01:00
|
|
|
using It = typename Vector<Iterator, MemoryDomain::Regex>::const_iterator;
|
2017-10-09 08:04:14 +02:00
|
|
|
|
|
|
|
iterator() = default;
|
|
|
|
iterator(It it) : m_it{std::move(it)} {}
|
|
|
|
|
|
|
|
iterator& operator--() { m_it += 2; return *this; }
|
|
|
|
iterator& operator++() { m_it += 2; return *this; }
|
|
|
|
SubMatch operator*() const { return {*m_it, *(m_it+1)}; }
|
|
|
|
|
|
|
|
friend bool operator==(const iterator& lhs, const iterator& rhs) { return lhs.m_it == rhs.m_it; }
|
|
|
|
friend bool operator!=(const iterator& lhs, const iterator& rhs) { return lhs.m_it != rhs.m_it; }
|
|
|
|
private:
|
2016-05-10 10:12:30 +02:00
|
|
|
|
2017-10-09 08:04:14 +02:00
|
|
|
It m_it;
|
2016-05-10 10:12:30 +02:00
|
|
|
};
|
|
|
|
|
2017-10-09 08:04:14 +02:00
|
|
|
MatchResults() = default;
|
2017-11-12 05:30:21 +01:00
|
|
|
MatchResults(Vector<Iterator, MemoryDomain::Regex> values) : m_values{std::move(values)} {}
|
2017-10-09 08:04:14 +02:00
|
|
|
|
|
|
|
iterator begin() const { return iterator{m_values.begin()}; }
|
|
|
|
iterator cbegin() const { return iterator{m_values.cbegin()}; }
|
|
|
|
iterator end() const { return iterator{m_values.end()}; }
|
|
|
|
iterator cend() const { return iterator{m_values.cend()}; }
|
|
|
|
|
|
|
|
size_t size() const { return m_values.size() / 2; }
|
|
|
|
bool empty() const { return m_values.empty(); }
|
|
|
|
|
|
|
|
SubMatch operator[](size_t i) const
|
|
|
|
{
|
|
|
|
return i * 2 < m_values.size() ?
|
|
|
|
SubMatch{m_values[i*2], m_values[i*2+1]} : SubMatch{};
|
|
|
|
}
|
|
|
|
|
|
|
|
friend bool operator==(const MatchResults& lhs, const MatchResults& rhs)
|
|
|
|
{
|
|
|
|
return lhs.m_values == rhs.m_values;
|
|
|
|
}
|
2016-05-10 10:12:30 +02:00
|
|
|
|
2017-10-09 08:04:14 +02:00
|
|
|
friend bool operator!=(const MatchResults& lhs, const MatchResults& rhs)
|
|
|
|
{
|
|
|
|
return not (lhs == rhs);
|
|
|
|
}
|
|
|
|
|
|
|
|
void swap(MatchResults& other)
|
|
|
|
{
|
|
|
|
m_values.swap(other.m_values);
|
|
|
|
}
|
|
|
|
|
2017-11-29 07:07:04 +01:00
|
|
|
Vector<Iterator, MemoryDomain::Regex>& values() { return m_values; }
|
|
|
|
|
2017-10-09 08:04:14 +02:00
|
|
|
private:
|
2017-11-12 05:30:21 +01:00
|
|
|
Vector<Iterator, MemoryDomain::Regex> m_values;
|
2016-05-10 10:12:30 +02:00
|
|
|
};
|
2014-10-13 14:12:33 +02:00
|
|
|
|
2018-03-04 19:48:10 +01:00
|
|
|
inline RegexExecFlags match_flags(bool bol, bool eol, bool bow, bool eow)
|
2015-12-23 22:43:07 +01:00
|
|
|
{
|
2017-10-09 08:04:14 +02:00
|
|
|
return (bol ? RegexExecFlags::None : RegexExecFlags::NotBeginOfLine) |
|
|
|
|
(eol ? RegexExecFlags::None : RegexExecFlags::NotEndOfLine) |
|
|
|
|
(bow ? RegexExecFlags::None : RegexExecFlags::NotBeginOfWord) |
|
2018-03-04 19:48:10 +01:00
|
|
|
(eow ? RegexExecFlags::None : RegexExecFlags::NotEndOfWord);
|
2015-12-23 22:43:07 +01:00
|
|
|
}
|
|
|
|
|
2016-05-10 10:12:30 +02:00
|
|
|
template<typename It>
|
|
|
|
bool regex_match(It begin, It end, const Regex& re)
|
|
|
|
{
|
2018-03-04 17:10:47 +01:00
|
|
|
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
2018-03-04 19:48:10 +01:00
|
|
|
return vm.exec(begin, end, begin, end, RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
|
2016-05-10 10:12:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<typename It>
|
|
|
|
bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
|
|
|
|
{
|
2017-11-29 07:07:04 +01:00
|
|
|
res.values().clear();
|
2018-03-04 17:10:47 +01:00
|
|
|
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
2018-03-04 19:48:10 +01:00
|
|
|
if (vm.exec(begin, end, begin, end, RegexExecFlags::None))
|
2018-03-04 17:10:47 +01:00
|
|
|
{
|
|
|
|
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2016-05-10 10:12:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<typename It>
|
2018-03-04 19:48:10 +01:00
|
|
|
bool regex_search(It begin, It end, It subject_begin, It subject_end, const Regex& re,
|
2017-10-09 08:04:14 +02:00
|
|
|
RegexExecFlags flags = RegexExecFlags::None)
|
2016-05-10 10:12:30 +02:00
|
|
|
{
|
2018-03-04 17:10:47 +01:00
|
|
|
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
2018-03-04 19:48:10 +01:00
|
|
|
return vm.exec(begin, end, subject_begin, subject_end,
|
|
|
|
flags | RegexExecFlags::Search | RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
|
2016-05-10 10:12:30 +02:00
|
|
|
}
|
|
|
|
|
2017-10-09 08:56:47 +02:00
|
|
|
template<typename It, MatchDirection direction = MatchDirection::Forward>
|
2018-03-04 19:48:10 +01:00
|
|
|
bool regex_search(It begin, It end, It subject_begin, It subject_end,
|
|
|
|
MatchResults<It>& res, const Regex& re,
|
2017-10-09 08:04:14 +02:00
|
|
|
RegexExecFlags flags = RegexExecFlags::None)
|
2016-05-10 10:12:30 +02:00
|
|
|
{
|
2017-11-29 07:07:04 +01:00
|
|
|
res.values().clear();
|
2018-03-04 17:10:47 +01:00
|
|
|
ThreadedRegexVM<It, direction> vm{*re.impl()};
|
2018-03-04 19:48:10 +01:00
|
|
|
if (vm.exec(begin, end, subject_begin, subject_end, flags | RegexExecFlags::Search))
|
2018-03-04 17:10:47 +01:00
|
|
|
{
|
|
|
|
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2016-05-10 10:12:30 +02:00
|
|
|
}
|
|
|
|
|
2017-12-03 10:04:37 +01:00
|
|
|
template<typename It>
|
2018-03-04 19:48:10 +01:00
|
|
|
bool backward_regex_search(It begin, It end, It subject_begin, It subject_end,
|
|
|
|
MatchResults<It>& res, const Regex& re,
|
|
|
|
RegexExecFlags flags = RegexExecFlags::None)
|
2017-12-03 10:04:37 +01:00
|
|
|
{
|
2018-03-04 19:48:10 +01:00
|
|
|
return regex_search<It, MatchDirection::Backward>(begin, end, subject_begin, subject_end, res, re, flags);
|
2017-12-03 10:04:37 +01:00
|
|
|
}
|
|
|
|
|
2014-10-13 14:12:33 +02:00
|
|
|
String option_to_string(const Regex& re);
|
|
|
|
void option_from_string(StringView str, Regex& re);
|
|
|
|
|
2017-12-02 07:02:41 +01:00
|
|
|
template<typename Iterator, MatchDirection direction = MatchDirection::Forward>
|
2017-10-02 16:35:31 +02:00
|
|
|
struct RegexIterator
|
|
|
|
{
|
|
|
|
using ValueType = MatchResults<Iterator>;
|
|
|
|
|
|
|
|
RegexIterator() = default;
|
2018-03-04 19:48:10 +01:00
|
|
|
RegexIterator(Iterator begin, Iterator end,
|
|
|
|
Iterator subject_begin, Iterator subject_end,
|
|
|
|
const Regex& re,
|
2017-10-09 08:04:14 +02:00
|
|
|
RegexExecFlags flags = RegexExecFlags::None)
|
2018-04-25 00:13:53 +02:00
|
|
|
: m_program{re.impl()}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
|
|
|
|
m_begin{std::move(begin)}, m_end{std::move(end)},
|
|
|
|
m_subject_begin{std::move(subject_begin)}, m_subject_end{std::move(subject_end)},
|
2018-03-04 19:48:10 +01:00
|
|
|
m_flags{flags}
|
2017-10-02 16:35:31 +02:00
|
|
|
{
|
|
|
|
next();
|
|
|
|
}
|
|
|
|
|
2018-04-25 00:13:53 +02:00
|
|
|
RegexIterator(const Iterator& begin, const Iterator& end, const Regex& re,
|
2018-03-04 19:48:10 +01:00
|
|
|
RegexExecFlags flags = RegexExecFlags::None)
|
|
|
|
: RegexIterator{begin, end, begin, end, re, flags} {}
|
|
|
|
|
2018-04-25 00:13:53 +02:00
|
|
|
const ValueType& operator*() const { kak_assert(m_program); return m_results; }
|
|
|
|
const ValueType* operator->() const { kak_assert(m_program); return &m_results; }
|
2017-10-02 16:35:31 +02:00
|
|
|
|
|
|
|
RegexIterator& operator++()
|
|
|
|
{
|
|
|
|
next();
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
friend bool operator==(const RegexIterator& lhs, const RegexIterator& rhs)
|
|
|
|
{
|
2018-04-25 00:13:53 +02:00
|
|
|
if (lhs.m_program == nullptr and rhs.m_program == nullptr)
|
2017-10-02 16:35:31 +02:00
|
|
|
return true;
|
|
|
|
|
2018-04-25 00:13:53 +02:00
|
|
|
return lhs.m_program == rhs.m_program and
|
2017-12-02 07:02:41 +01:00
|
|
|
lhs.m_next_pos == rhs.m_next_pos and
|
2017-10-02 16:35:31 +02:00
|
|
|
lhs.m_end == rhs.m_end and
|
|
|
|
lhs.m_flags == rhs.m_flags and
|
|
|
|
lhs.m_results == rhs.m_results;
|
|
|
|
}
|
|
|
|
|
|
|
|
friend bool operator!=(const RegexIterator& lhs, const RegexIterator& rhs)
|
|
|
|
{
|
|
|
|
return not (lhs == rhs);
|
|
|
|
}
|
|
|
|
|
2017-12-02 07:02:41 +01:00
|
|
|
RegexIterator begin() { return *this; }
|
|
|
|
RegexIterator end() { return {}; }
|
|
|
|
|
2017-10-02 16:35:31 +02:00
|
|
|
private:
|
|
|
|
void next()
|
|
|
|
{
|
2018-04-25 00:13:53 +02:00
|
|
|
kak_assert(m_program);
|
2017-10-02 16:35:31 +02:00
|
|
|
|
2018-04-25 00:13:53 +02:00
|
|
|
auto additional_flags = RegexExecFlags::Search;
|
2017-10-02 16:35:31 +02:00
|
|
|
if (m_results.size() and m_results[0].first == m_results[0].second)
|
2017-10-09 08:04:14 +02:00
|
|
|
additional_flags |= RegexExecFlags::NotInitialNull;
|
2017-10-02 16:35:31 +02:00
|
|
|
|
2018-04-25 00:13:53 +02:00
|
|
|
ThreadedRegexVM<Iterator, direction> vm{*m_program};
|
|
|
|
constexpr bool forward = direction == MatchDirection::Forward;
|
|
|
|
|
|
|
|
if (vm.exec(forward ? m_next_pos : m_begin, forward ? m_end : m_next_pos,
|
|
|
|
m_subject_begin, m_subject_end, m_flags | additional_flags))
|
2017-12-02 07:02:41 +01:00
|
|
|
{
|
2018-04-25 00:13:53 +02:00
|
|
|
m_results.values().clear();
|
|
|
|
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(m_results.values()));
|
|
|
|
m_next_pos = (direction == MatchDirection::Forward) ? m_results[0].second : m_results[0].first;
|
2017-12-02 07:02:41 +01:00
|
|
|
}
|
2017-10-02 16:35:31 +02:00
|
|
|
else
|
2018-04-25 00:13:53 +02:00
|
|
|
m_program = nullptr;
|
2017-10-02 16:35:31 +02:00
|
|
|
}
|
|
|
|
|
2018-04-25 00:13:53 +02:00
|
|
|
const CompiledRegex* m_program = nullptr;
|
2017-10-02 16:35:31 +02:00
|
|
|
MatchResults<Iterator> m_results;
|
2017-12-02 07:02:41 +01:00
|
|
|
Iterator m_next_pos{};
|
2017-10-02 16:35:31 +02:00
|
|
|
const Iterator m_begin{};
|
|
|
|
const Iterator m_end{};
|
2018-03-04 19:48:10 +01:00
|
|
|
const Iterator m_subject_begin{};
|
|
|
|
const Iterator m_subject_end{};
|
2017-10-09 08:04:14 +02:00
|
|
|
const RegexExecFlags m_flags = RegexExecFlags::None;
|
2017-10-02 16:35:31 +02:00
|
|
|
};
|
|
|
|
|
2014-10-13 14:12:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif // regex_hh_INCLUDED
|