Regex: take the full subject range as a parameter
To allow more general look arounds out of the actual search range, pass a second range (the actual subject). This allows us to remove various flags such as PrevAvailable or NotBeginOfSubject, which are now easy to check from the subject range. Fixes #1902
This commit is contained in:
parent
d9e44dfacf
commit
fb65fa60f8
|
@ -363,13 +363,12 @@ private:
|
||||||
kak_assert(matches.size() % m_faces.size() == 0);
|
kak_assert(matches.size() % m_faces.size() == 0);
|
||||||
using RegexIt = RegexIterator<BufferIterator>;
|
using RegexIt = RegexIterator<BufferIterator>;
|
||||||
RegexIt re_it{get_iterator(buffer, range.begin),
|
RegexIt re_it{get_iterator(buffer, range.begin),
|
||||||
get_iterator(buffer, range.end), m_regex,
|
get_iterator(buffer, range.end),
|
||||||
|
buffer.begin(), buffer.end(), m_regex,
|
||||||
match_flags(is_bol(range.begin),
|
match_flags(is_bol(range.begin),
|
||||||
is_eol(buffer, range.end),
|
is_eol(buffer, range.end),
|
||||||
is_bow(buffer, range.begin),
|
is_bow(buffer, range.begin),
|
||||||
is_eow(buffer, range.end),
|
is_eow(buffer, range.end))};
|
||||||
range.begin == BufferCoord{0,0},
|
|
||||||
buffer.is_end(range.end))};
|
|
||||||
RegexIt re_end;
|
RegexIt re_end;
|
||||||
for (; re_it != re_end; ++re_it)
|
for (; re_it != re_end; ++re_it)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1041,10 +1041,9 @@ void keep(Context& context, NormalParams params)
|
||||||
// give more intuitive behaviours in keep use cases.
|
// give more intuitive behaviours in keep use cases.
|
||||||
const auto flags = match_flags(is_bol(begin.coord()), false,
|
const auto flags = match_flags(is_bol(begin.coord()), false,
|
||||||
is_bow(buffer, begin.coord()),
|
is_bow(buffer, begin.coord()),
|
||||||
is_eow(buffer, end.coord()),
|
is_eow(buffer, end.coord())) |
|
||||||
true, true) |
|
|
||||||
RegexExecFlags::AnyMatch;
|
RegexExecFlags::AnyMatch;
|
||||||
if (regex_search(begin, end, regex, flags) == matching)
|
if (regex_search(begin, end, begin, end, regex, flags) == matching)
|
||||||
keep.push_back(sel);
|
keep.push_back(sel);
|
||||||
}
|
}
|
||||||
if (keep.empty())
|
if (keep.empty())
|
||||||
|
|
50
src/regex.hh
50
src/regex.hh
|
@ -100,21 +100,19 @@ private:
|
||||||
Vector<Iterator, MemoryDomain::Regex> m_values;
|
Vector<Iterator, MemoryDomain::Regex> m_values;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline RegexExecFlags match_flags(bool bol, bool eol, bool bow, bool eow, bool bos, bool eos)
|
inline RegexExecFlags match_flags(bool bol, bool eol, bool bow, bool eow)
|
||||||
{
|
{
|
||||||
return (bol ? RegexExecFlags::None : RegexExecFlags::NotBeginOfLine) |
|
return (bol ? RegexExecFlags::None : RegexExecFlags::NotBeginOfLine) |
|
||||||
(eol ? RegexExecFlags::None : RegexExecFlags::NotEndOfLine) |
|
(eol ? RegexExecFlags::None : RegexExecFlags::NotEndOfLine) |
|
||||||
(bow ? RegexExecFlags::None : RegexExecFlags::NotBeginOfWord) |
|
(bow ? RegexExecFlags::None : RegexExecFlags::NotBeginOfWord) |
|
||||||
(eow ? RegexExecFlags::None : RegexExecFlags::NotEndOfWord) |
|
(eow ? RegexExecFlags::None : RegexExecFlags::NotEndOfWord);
|
||||||
(bos ? RegexExecFlags::None : RegexExecFlags::NotBeginOfSubject) |
|
|
||||||
(eos ? RegexExecFlags::None : RegexExecFlags::NotEndOfSubject);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename It>
|
template<typename It>
|
||||||
bool regex_match(It begin, It end, const Regex& re)
|
bool regex_match(It begin, It end, const Regex& re)
|
||||||
{
|
{
|
||||||
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
||||||
return vm.exec(begin, end, RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
|
return vm.exec(begin, end, begin, end, RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename It>
|
template<typename It>
|
||||||
|
@ -122,7 +120,7 @@ bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
|
||||||
{
|
{
|
||||||
res.values().clear();
|
res.values().clear();
|
||||||
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
||||||
if (vm.exec(begin, end, RegexExecFlags::None))
|
if (vm.exec(begin, end, begin, end, RegexExecFlags::None))
|
||||||
{
|
{
|
||||||
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
|
std::copy(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
|
||||||
return true;
|
return true;
|
||||||
|
@ -131,20 +129,22 @@ bool regex_match(It begin, It end, MatchResults<It>& res, const Regex& re)
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename It>
|
template<typename It>
|
||||||
bool regex_search(It begin, It end, const Regex& re,
|
bool regex_search(It begin, It end, It subject_begin, It subject_end, const Regex& re,
|
||||||
RegexExecFlags flags = RegexExecFlags::None)
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
{
|
{
|
||||||
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
ThreadedRegexVM<It, MatchDirection::Forward> vm{*re.impl()};
|
||||||
return vm.exec(begin, end, flags | RegexExecFlags::Search | RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
|
return vm.exec(begin, end, subject_begin, subject_end,
|
||||||
|
flags | RegexExecFlags::Search | RegexExecFlags::AnyMatch | RegexExecFlags::NoSaves);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename It, MatchDirection direction = MatchDirection::Forward>
|
template<typename It, MatchDirection direction = MatchDirection::Forward>
|
||||||
bool regex_search(It begin, It end, MatchResults<It>& res, const Regex& re,
|
bool regex_search(It begin, It end, It subject_begin, It subject_end,
|
||||||
|
MatchResults<It>& res, const Regex& re,
|
||||||
RegexExecFlags flags = RegexExecFlags::None)
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
{
|
{
|
||||||
res.values().clear();
|
res.values().clear();
|
||||||
ThreadedRegexVM<It, direction> vm{*re.impl()};
|
ThreadedRegexVM<It, direction> vm{*re.impl()};
|
||||||
if (vm.exec(begin, end, flags | RegexExecFlags::Search))
|
if (vm.exec(begin, end, subject_begin, subject_end, flags | RegexExecFlags::Search))
|
||||||
{
|
{
|
||||||
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
|
std::move(vm.captures().begin(), vm.captures().end(), std::back_inserter(res.values()));
|
||||||
return true;
|
return true;
|
||||||
|
@ -153,10 +153,11 @@ bool regex_search(It begin, It end, MatchResults<It>& res, const Regex& re,
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename It>
|
template<typename It>
|
||||||
bool backward_regex_search(It begin, It end, MatchResults<It>& res, const Regex& re,
|
bool backward_regex_search(It begin, It end, It subject_begin, It subject_end,
|
||||||
|
MatchResults<It>& res, const Regex& re,
|
||||||
RegexExecFlags flags = RegexExecFlags::None)
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
{
|
{
|
||||||
return regex_search<It, MatchDirection::Backward>(std::move(begin), std::move(end), res, re, flags);
|
return regex_search<It, MatchDirection::Backward>(begin, end, subject_begin, subject_end, res, re, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
String option_to_string(const Regex& re);
|
String option_to_string(const Regex& re);
|
||||||
|
@ -168,14 +169,22 @@ struct RegexIterator
|
||||||
using ValueType = MatchResults<Iterator>;
|
using ValueType = MatchResults<Iterator>;
|
||||||
|
|
||||||
RegexIterator() = default;
|
RegexIterator() = default;
|
||||||
RegexIterator(Iterator begin, Iterator end, const Regex& re,
|
RegexIterator(Iterator begin, Iterator end,
|
||||||
|
Iterator subject_begin, Iterator subject_end,
|
||||||
|
const Regex& re,
|
||||||
RegexExecFlags flags = RegexExecFlags::None)
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
: m_regex{&re}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
|
: m_regex{&re}, m_next_pos{direction == MatchDirection::Forward ? begin : end},
|
||||||
m_begin{begin}, m_end{end}, m_flags{flags}
|
m_begin{begin}, m_end{end},
|
||||||
|
m_subject_begin{subject_begin}, m_subject_end{subject_end},
|
||||||
|
m_flags{flags}
|
||||||
{
|
{
|
||||||
next();
|
next();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RegexIterator(Iterator begin, Iterator end, const Regex& re,
|
||||||
|
RegexExecFlags flags = RegexExecFlags::None)
|
||||||
|
: RegexIterator{begin, end, begin, end, re, flags} {}
|
||||||
|
|
||||||
const ValueType& operator*() const { kak_assert(m_regex); return m_results; }
|
const ValueType& operator*() const { kak_assert(m_regex); return m_results; }
|
||||||
const ValueType* operator->() const { kak_assert(m_regex); return &m_results; }
|
const ValueType* operator->() const { kak_assert(m_regex); return &m_results; }
|
||||||
|
|
||||||
|
@ -216,19 +225,16 @@ private:
|
||||||
|
|
||||||
if (direction == MatchDirection::Forward)
|
if (direction == MatchDirection::Forward)
|
||||||
{
|
{
|
||||||
if (m_begin != m_next_pos)
|
if (not regex_search(m_next_pos, m_end, m_subject_begin, m_subject_end,
|
||||||
additional_flags |= RegexExecFlags::NotBeginOfSubject | RegexExecFlags::PrevAvailable;
|
m_results, *m_regex, m_flags | additional_flags))
|
||||||
|
|
||||||
if (not regex_search(m_next_pos, m_end, m_results, *m_regex,
|
|
||||||
m_flags | additional_flags))
|
|
||||||
m_regex = nullptr;
|
m_regex = nullptr;
|
||||||
else
|
else
|
||||||
m_next_pos = m_results[0].second;
|
m_next_pos = m_results[0].second;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (not backward_regex_search(m_begin, m_next_pos, m_results, *m_regex,
|
if (not backward_regex_search(m_begin, m_next_pos, m_subject_begin, m_subject_end,
|
||||||
m_flags | additional_flags))
|
m_results, *m_regex, m_flags | additional_flags))
|
||||||
m_regex = nullptr;
|
m_regex = nullptr;
|
||||||
else
|
else
|
||||||
m_next_pos = m_results[0].first;
|
m_next_pos = m_results[0].first;
|
||||||
|
@ -240,6 +246,8 @@ private:
|
||||||
Iterator m_next_pos{};
|
Iterator m_next_pos{};
|
||||||
const Iterator m_begin{};
|
const Iterator m_begin{};
|
||||||
const Iterator m_end{};
|
const Iterator m_end{};
|
||||||
|
const Iterator m_subject_begin{};
|
||||||
|
const Iterator m_subject_end{};
|
||||||
const RegexExecFlags m_flags = RegexExecFlags::None;
|
const RegexExecFlags m_flags = RegexExecFlags::None;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1141,7 +1141,7 @@ struct TestVM : CompiledRegex, ThreadedRegexVM<const char*, dir>
|
||||||
|
|
||||||
bool exec(StringView re, RegexExecFlags flags = RegexExecFlags::AnyMatch)
|
bool exec(StringView re, RegexExecFlags flags = RegexExecFlags::AnyMatch)
|
||||||
{
|
{
|
||||||
return VMType::exec(re.begin(), re.end(), flags);
|
return VMType::exec(re.begin(), re.end(), re.begin(), re.end(), flags);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -132,12 +132,9 @@ enum class RegexExecFlags
|
||||||
NotEndOfLine = 1 << 2,
|
NotEndOfLine = 1 << 2,
|
||||||
NotBeginOfWord = 1 << 3,
|
NotBeginOfWord = 1 << 3,
|
||||||
NotEndOfWord = 1 << 4,
|
NotEndOfWord = 1 << 4,
|
||||||
NotBeginOfSubject = 1 << 5,
|
NotInitialNull = 1 << 5,
|
||||||
NotEndOfSubject = 1 << 6,
|
AnyMatch = 1 << 6,
|
||||||
NotInitialNull = 1 << 7,
|
NoSaves = 1 << 7,
|
||||||
AnyMatch = 1 << 8,
|
|
||||||
NoSaves = 1 << 9,
|
|
||||||
PrevAvailable = 1 << 10,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
|
constexpr bool with_bit_ops(Meta::Type<RegexExecFlags>) { return true; }
|
||||||
|
@ -167,18 +164,21 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool exec(Iterator begin, Iterator end, RegexExecFlags flags)
|
bool exec(Iterator begin, Iterator end,
|
||||||
|
Iterator subject_begin, Iterator subject_end,
|
||||||
|
RegexExecFlags flags)
|
||||||
{
|
{
|
||||||
if (flags & RegexExecFlags::NotInitialNull and begin == end)
|
if (flags & RegexExecFlags::NotInitialNull and begin == end)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
constexpr bool forward = direction == MatchDirection::Forward;
|
constexpr bool forward = direction == MatchDirection::Forward;
|
||||||
const bool prev_avail = flags & RegexExecFlags::PrevAvailable;
|
|
||||||
|
|
||||||
m_begin = Utf8It{utf8::iterator<Iterator>{forward ? begin : end,
|
m_begin = EffectiveIt{Utf8It{forward ? begin : end, subject_begin, subject_end}};
|
||||||
prev_avail ? begin-1 : begin, end}};
|
m_end = EffectiveIt{Utf8It{forward ? end : begin, subject_begin, subject_end}};
|
||||||
m_end = Utf8It{utf8::iterator<Iterator>{forward ? end : begin,
|
|
||||||
prev_avail ? begin-1 : begin, end}};
|
m_subject_begin = EffectiveIt{Utf8It{forward ? subject_begin : subject_end, subject_begin, subject_end}};
|
||||||
|
m_subject_end = EffectiveIt{Utf8It{forward ? subject_end : subject_begin, subject_begin, subject_end}};
|
||||||
|
|
||||||
if (forward)
|
if (forward)
|
||||||
m_flags = flags;
|
m_flags = flags;
|
||||||
else // Flip line begin/end flags as we flipped the instructions on compilation.
|
else // Flip line begin/end flags as we flipped the instructions on compilation.
|
||||||
|
@ -187,7 +187,7 @@ public:
|
||||||
((flags & RegexExecFlags::NotBeginOfLine) ? RegexExecFlags::NotEndOfLine : RegexExecFlags::None);
|
((flags & RegexExecFlags::NotBeginOfLine) ? RegexExecFlags::NotEndOfLine : RegexExecFlags::None);
|
||||||
|
|
||||||
const bool search = (flags & RegexExecFlags::Search);
|
const bool search = (flags & RegexExecFlags::Search);
|
||||||
Utf8It start{m_begin};
|
EffectiveIt start{m_begin};
|
||||||
const auto& start_desc = direction == MatchDirection::Forward ? m_program.forward_start_desc
|
const auto& start_desc = direction == MatchDirection::Forward ? m_program.forward_start_desc
|
||||||
: m_program.backward_start_desc;
|
: m_program.backward_start_desc;
|
||||||
if (start_desc)
|
if (start_desc)
|
||||||
|
@ -273,9 +273,9 @@ private:
|
||||||
Saves* saves;
|
Saves* saves;
|
||||||
};
|
};
|
||||||
|
|
||||||
using Utf8It = std::conditional_t<direction == MatchDirection::Forward,
|
using Utf8It = utf8::iterator<Iterator>;
|
||||||
utf8::iterator<Iterator>,
|
using EffectiveIt = std::conditional_t<direction == MatchDirection::Forward,
|
||||||
std::reverse_iterator<utf8::iterator<Iterator>>>;
|
Utf8It, std::reverse_iterator<Utf8It>>;
|
||||||
|
|
||||||
struct ExecState
|
struct ExecState
|
||||||
{
|
{
|
||||||
|
@ -287,7 +287,7 @@ private:
|
||||||
enum class StepResult { Consumed, Matched, Failed, FindNextStart };
|
enum class StepResult { Consumed, Matched, Failed, FindNextStart };
|
||||||
|
|
||||||
// Steps a thread until it consumes the current character, matches or fail
|
// Steps a thread until it consumes the current character, matches or fail
|
||||||
StepResult step(Utf8It& pos, Thread& thread, ExecState& state)
|
StepResult step(EffectiveIt& pos, Thread& thread, ExecState& state)
|
||||||
{
|
{
|
||||||
const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
|
const bool no_saves = (m_flags & RegexExecFlags::NoSaves);
|
||||||
auto* instructions = m_program.instructions.data();
|
auto* instructions = m_program.instructions.data();
|
||||||
|
@ -371,11 +371,11 @@ private:
|
||||||
return StepResult::Failed;
|
return StepResult::Failed;
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::SubjectBegin:
|
case CompiledRegex::SubjectBegin:
|
||||||
if (pos != m_begin or (m_flags & RegexExecFlags::NotBeginOfSubject))
|
if (pos != m_subject_begin)
|
||||||
return StepResult::Failed;
|
return StepResult::Failed;
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::SubjectEnd:
|
case CompiledRegex::SubjectEnd:
|
||||||
if (pos != m_end or (m_flags & RegexExecFlags::NotEndOfSubject))
|
if (pos != m_subject_end)
|
||||||
return StepResult::Failed;
|
return StepResult::Failed;
|
||||||
break;
|
break;
|
||||||
case CompiledRegex::LookAhead:
|
case CompiledRegex::LookAhead:
|
||||||
|
@ -414,7 +414,7 @@ private:
|
||||||
return StepResult::Failed;
|
return StepResult::Failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool exec_program(Utf8It pos, ConstArrayView<CompiledRegex::Instruction> instructions)
|
bool exec_program(EffectiveIt pos, ConstArrayView<CompiledRegex::Instruction> instructions)
|
||||||
{
|
{
|
||||||
ExecState state;
|
ExecState state;
|
||||||
state.current_threads.push_back({instructions.begin(), nullptr});
|
state.current_threads.push_back({instructions.begin(), nullptr});
|
||||||
|
@ -495,7 +495,7 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void to_next_start(Utf8It& start, const Utf8It& end,
|
void to_next_start(EffectiveIt& start, const EffectiveIt& end,
|
||||||
const CompiledRegex::StartDesc& start_desc)
|
const CompiledRegex::StartDesc& start_desc)
|
||||||
{
|
{
|
||||||
while (start != end and *start >= 0 and
|
while (start != end and *start >= 0 and
|
||||||
|
@ -504,11 +504,12 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
template<MatchDirection look_direction, bool ignore_case>
|
template<MatchDirection look_direction, bool ignore_case>
|
||||||
bool lookaround(uint32_t index, Utf8It pos) const
|
bool lookaround(uint32_t index, EffectiveIt pos) const
|
||||||
{
|
{
|
||||||
|
const auto end = (look_direction == MatchDirection::Forward ? m_subject_end : m_subject_begin);
|
||||||
for (auto it = m_program.lookarounds.begin() + index; *it != -1; ++it)
|
for (auto it = m_program.lookarounds.begin() + index; *it != -1; ++it)
|
||||||
{
|
{
|
||||||
if (pos == (look_direction == MatchDirection::Forward ? m_end : m_begin))
|
if (pos == end)
|
||||||
return false;
|
return false;
|
||||||
Codepoint cp = (look_direction == MatchDirection::Forward ? *pos : *(pos-1));
|
Codepoint cp = (look_direction == MatchDirection::Forward ? *pos : *(pos-1));
|
||||||
if (ignore_case)
|
if (ignore_case)
|
||||||
|
@ -535,36 +536,38 @@ private:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_line_start(const Utf8It& pos) const
|
bool is_line_start(const EffectiveIt& pos) const
|
||||||
{
|
{
|
||||||
if (not (m_flags & RegexExecFlags::PrevAvailable) and pos == m_begin)
|
if (pos == m_subject_begin)
|
||||||
return not (m_flags & RegexExecFlags::NotBeginOfLine);
|
return not (m_flags & RegexExecFlags::NotBeginOfLine);
|
||||||
return *(pos-1) == '\n';
|
return *(pos-1) == '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_line_end(const Utf8It& pos) const
|
bool is_line_end(const EffectiveIt& pos) const
|
||||||
{
|
{
|
||||||
if (pos == m_end)
|
if (pos == m_subject_end)
|
||||||
return not (m_flags & RegexExecFlags::NotEndOfLine);
|
return not (m_flags & RegexExecFlags::NotEndOfLine);
|
||||||
return *pos == '\n';
|
return *pos == '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_word_boundary(const Utf8It& pos) const
|
bool is_word_boundary(const EffectiveIt& pos) const
|
||||||
{
|
{
|
||||||
if (not (m_flags & RegexExecFlags::PrevAvailable) and pos == m_begin)
|
if (pos == m_subject_begin)
|
||||||
return not (m_flags & RegexExecFlags::NotBeginOfWord);
|
return not (m_flags & RegexExecFlags::NotBeginOfWord);
|
||||||
if (pos == m_end)
|
if (pos == m_subject_end)
|
||||||
return not (m_flags & RegexExecFlags::NotEndOfWord);
|
return not (m_flags & RegexExecFlags::NotEndOfWord);
|
||||||
return is_word(*(pos-1)) != is_word(*pos);
|
return is_word(*(pos-1)) != is_word(*pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const Iterator& get_base(const utf8::iterator<Iterator>& it) { return it.base(); }
|
static const Iterator& get_base(const Utf8It& it) { return it.base(); }
|
||||||
static Iterator get_base(const std::reverse_iterator<utf8::iterator<Iterator>>& it) { return it.base().base(); }
|
static Iterator get_base(const std::reverse_iterator<Utf8It>& it) { return it.base().base(); }
|
||||||
|
|
||||||
const CompiledRegex& m_program;
|
const CompiledRegex& m_program;
|
||||||
|
|
||||||
Utf8It m_begin;
|
EffectiveIt m_begin;
|
||||||
Utf8It m_end;
|
EffectiveIt m_end;
|
||||||
|
EffectiveIt m_subject_begin;
|
||||||
|
EffectiveIt m_subject_end;
|
||||||
RegexExecFlags m_flags;
|
RegexExecFlags m_flags;
|
||||||
|
|
||||||
Vector<Saves*, MemoryDomain::Regex> m_saves;
|
Vector<Saves*, MemoryDomain::Regex> m_saves;
|
||||||
|
|
|
@ -271,22 +271,24 @@ select_matching(const Context& context, const Selection& selection)
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Container>
|
||||||
Optional<std::pair<Iterator, Iterator>>
|
Optional<std::pair<Iterator, Iterator>>
|
||||||
find_opening(const Iterator& begin, Iterator pos,
|
find_opening(Iterator pos, const Container& container,
|
||||||
const Regex& opening, const Regex& closing,
|
const Regex& opening, const Regex& closing,
|
||||||
int level, bool nestable)
|
int level, bool nestable)
|
||||||
{
|
{
|
||||||
MatchResults<Iterator> res;
|
MatchResults<Iterator> res;
|
||||||
if (backward_regex_search(begin, pos, res, closing) and
|
if (backward_regex_search(container.begin(), pos,
|
||||||
|
container.begin(), container.end(), res, closing) and
|
||||||
res[0].second == pos)
|
res[0].second == pos)
|
||||||
pos = res[0].first;
|
pos = res[0].first;
|
||||||
|
|
||||||
for (auto match : RegexIterator<Iterator, MatchDirection::Backward>{begin, pos, opening})
|
using RegexIt = RegexIterator<Iterator, MatchDirection::Backward>;
|
||||||
|
for (auto match : RegexIt{container.begin(), pos, container.begin(), container.end(), opening})
|
||||||
{
|
{
|
||||||
if (nestable)
|
if (nestable)
|
||||||
{
|
{
|
||||||
for (auto m : RegexIterator<Iterator, MatchDirection::Backward>{match[0].second, pos, closing})
|
for (auto m : RegexIt{match[0].second, pos, container.begin(), container.end(), closing})
|
||||||
++level;
|
++level;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -298,22 +300,23 @@ find_opening(const Iterator& begin, Iterator pos,
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Iterator>
|
template<typename Iterator, typename Container>
|
||||||
Optional<std::pair<Iterator, Iterator>>
|
Optional<std::pair<Iterator, Iterator>>
|
||||||
find_closing(Iterator pos, const Iterator& end,
|
find_closing(Iterator pos, const Container& container,
|
||||||
const Regex& opening, const Regex& closing,
|
const Regex& opening, const Regex& closing,
|
||||||
int level, bool nestable)
|
int level, bool nestable)
|
||||||
{
|
{
|
||||||
MatchResults<Iterator> res;
|
MatchResults<Iterator> res;
|
||||||
if (regex_search(pos, end, res, opening) and
|
if (regex_search(pos, container.end(), container.begin(), container.end(),
|
||||||
res[0].first == pos)
|
res, opening) and res[0].first == pos)
|
||||||
pos = res[0].second;
|
pos = res[0].second;
|
||||||
|
|
||||||
for (auto match : RegexIterator<Iterator, MatchDirection::Forward>{pos, end, closing})
|
using RegexIt = RegexIterator<Iterator, MatchDirection::Forward>;
|
||||||
|
for (auto match : RegexIt{pos, container.end(), container.begin(), container.end(), closing})
|
||||||
{
|
{
|
||||||
if (nestable)
|
if (nestable)
|
||||||
{
|
{
|
||||||
for (auto m : RegexIterator<Iterator, MatchDirection::Forward>{pos, match[0].first, opening})
|
for (auto m : RegexIt{pos, match[0].first, container.begin(), container.end(), opening})
|
||||||
++level;
|
++level;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -335,7 +338,8 @@ find_surrounding(const Container& container, Iterator pos,
|
||||||
|
|
||||||
// When onto the token of a non nestable block, consider it as an opening.
|
// When onto the token of a non nestable block, consider it as an opening.
|
||||||
MatchResults<Iterator> matches;
|
MatchResults<Iterator> matches;
|
||||||
if (not nestable and regex_search(pos, container.end(), matches, opening) and
|
if (not nestable and regex_search(pos, container.end(), container.begin(),
|
||||||
|
container.end(), matches, opening) and
|
||||||
matches[0].first == pos)
|
matches[0].first == pos)
|
||||||
pos = matches[0].second;
|
pos = matches[0].second;
|
||||||
|
|
||||||
|
@ -344,10 +348,11 @@ find_surrounding(const Container& container, Iterator pos,
|
||||||
{
|
{
|
||||||
// When positionned onto opening and searching to opening, search the parent one
|
// When positionned onto opening and searching to opening, search the parent one
|
||||||
if (nestable and first != container.begin() and not (flags & ObjectFlags::ToEnd) and
|
if (nestable and first != container.begin() and not (flags & ObjectFlags::ToEnd) and
|
||||||
regex_search(first, container.end(), matches, opening) and matches[0].first == first)
|
regex_search(first, container.end(), container.begin(), container.end(),
|
||||||
|
matches, opening) and matches[0].first == first)
|
||||||
first = utf8::previous(first, container.begin());
|
first = utf8::previous(first, container.begin());
|
||||||
|
|
||||||
if (auto res = find_opening(container.begin(), first+1, opening, closing, level, nestable))
|
if (auto res = find_opening(first+1, container, opening, closing, level, nestable))
|
||||||
first = (flags & ObjectFlags::Inner) ? res->second : res->first;
|
first = (flags & ObjectFlags::Inner) ? res->second : res->first;
|
||||||
else
|
else
|
||||||
return {};
|
return {};
|
||||||
|
@ -359,10 +364,11 @@ find_surrounding(const Container& container, Iterator pos,
|
||||||
// When positionned onto closing and searching to closing, search the parent one
|
// When positionned onto closing and searching to closing, search the parent one
|
||||||
auto next = utf8::next(last, container.end());
|
auto next = utf8::next(last, container.end());
|
||||||
if (nestable and next != container.end() and not (flags & ObjectFlags::ToBegin) and
|
if (nestable and next != container.end() and not (flags & ObjectFlags::ToBegin) and
|
||||||
backward_regex_search(container.begin(), next, matches, closing) and matches[0].second == next)
|
backward_regex_search(container.begin(), next, container.begin(), container.end(),
|
||||||
|
matches, closing) and matches[0].second == next)
|
||||||
last = next;
|
last = next;
|
||||||
|
|
||||||
if (auto res = find_closing(last, container.end(), opening, closing, level, nestable))
|
if (auto res = find_closing(last, container, opening, closing, level, nestable))
|
||||||
last = (flags & ObjectFlags::Inner) ? utf8::previous(res->first, container.begin())
|
last = (flags & ObjectFlags::Inner) ? utf8::previous(res->first, container.begin())
|
||||||
: utf8::previous(res->second, container.begin());
|
: utf8::previous(res->second, container.begin());
|
||||||
else
|
else
|
||||||
|
@ -835,12 +841,10 @@ void select_buffer(SelectionList& selections)
|
||||||
}
|
}
|
||||||
|
|
||||||
static RegexExecFlags
|
static RegexExecFlags
|
||||||
match_flags(const Buffer& buf, const BufferIterator& begin, const BufferIterator& end,
|
match_flags(const Buffer& buf, const BufferIterator& begin, const BufferIterator& end)
|
||||||
bool bos, bool eos)
|
|
||||||
{
|
{
|
||||||
return match_flags(is_bol(begin.coord()), is_eol(buf, end.coord()),
|
return match_flags(is_bol(begin.coord()), is_eol(buf, end.coord()),
|
||||||
is_bow(buf, begin.coord()), is_eow(buf, end.coord()),
|
is_bow(buf, begin.coord()), is_eow(buf, end.coord()));
|
||||||
bos, eos);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool find_next(const Buffer& buffer, const BufferIterator& pos,
|
static bool find_next(const Buffer& buffer, const BufferIterator& pos,
|
||||||
|
@ -848,12 +852,12 @@ static bool find_next(const Buffer& buffer, const BufferIterator& pos,
|
||||||
const Regex& ex, bool& wrapped)
|
const Regex& ex, bool& wrapped)
|
||||||
{
|
{
|
||||||
if (pos != buffer.end() and
|
if (pos != buffer.end() and
|
||||||
regex_search(pos, buffer.end(), matches, ex,
|
regex_search(pos, buffer.end(), buffer.begin(), buffer.end(),
|
||||||
match_flags(buffer, pos, buffer.end(), pos.coord() == BufferCoord{0,0}, true)))
|
matches, ex, match_flags(buffer, pos, buffer.end())))
|
||||||
return true;
|
return true;
|
||||||
wrapped = true;
|
wrapped = true;
|
||||||
return regex_search(buffer.begin(), buffer.end(), matches, ex,
|
return regex_search(buffer.begin(), buffer.end(), buffer.begin(), buffer.end(),
|
||||||
match_flags(buffer, buffer.begin(), buffer.end(), true, true));
|
matches, ex, match_flags(buffer, buffer.begin(), buffer.end()));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool find_prev(const Buffer& buffer, const BufferIterator& pos,
|
static bool find_prev(const Buffer& buffer, const BufferIterator& pos,
|
||||||
|
@ -861,13 +865,15 @@ static bool find_prev(const Buffer& buffer, const BufferIterator& pos,
|
||||||
const Regex& ex, bool& wrapped)
|
const Regex& ex, bool& wrapped)
|
||||||
{
|
{
|
||||||
if (pos != buffer.begin() and
|
if (pos != buffer.begin() and
|
||||||
backward_regex_search(buffer.begin(), pos, matches, ex,
|
backward_regex_search(buffer.begin(), pos, buffer.begin(), buffer.end(),
|
||||||
match_flags(buffer, buffer.begin(), pos, true, buffer.is_end(pos.coord())) |
|
matches, ex,
|
||||||
|
match_flags(buffer, buffer.begin(), pos) |
|
||||||
RegexExecFlags::NotInitialNull))
|
RegexExecFlags::NotInitialNull))
|
||||||
return true;
|
return true;
|
||||||
wrapped = true;
|
wrapped = true;
|
||||||
return backward_regex_search(buffer.begin(), buffer.end(), matches, ex,
|
return backward_regex_search(buffer.begin(), buffer.end(), buffer.begin(), buffer.end(),
|
||||||
match_flags(buffer, buffer.begin(), buffer.end(), true, true) |
|
matches, ex,
|
||||||
|
match_flags(buffer, buffer.begin(), buffer.end()) |
|
||||||
RegexExecFlags::NotInitialNull);
|
RegexExecFlags::NotInitialNull);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -913,8 +919,7 @@ void select_all_matches(SelectionList& selections, const Regex& regex, int captu
|
||||||
{
|
{
|
||||||
auto sel_beg = buffer.iterator_at(sel.min());
|
auto sel_beg = buffer.iterator_at(sel.min());
|
||||||
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
||||||
RegexIt re_it(sel_beg, sel_end, regex,
|
RegexIt re_it(sel_beg, sel_end, regex, match_flags(buffer, sel_beg, sel_end));
|
||||||
match_flags(buffer, sel_beg, sel_end, true, true));
|
|
||||||
RegexIt re_end;
|
RegexIt re_end;
|
||||||
|
|
||||||
for (; re_it != re_end; ++re_it)
|
for (; re_it != re_end; ++re_it)
|
||||||
|
@ -958,8 +963,7 @@ void split_selections(SelectionList& selections, const Regex& regex, int capture
|
||||||
auto begin = buffer.iterator_at(sel.min());
|
auto begin = buffer.iterator_at(sel.min());
|
||||||
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
auto sel_end = utf8::next(buffer.iterator_at(sel.max()), buffer.end());
|
||||||
|
|
||||||
RegexIt re_it(begin, sel_end, regex,
|
RegexIt re_it(begin, sel_end, regex, match_flags(buffer, begin, sel_end));
|
||||||
match_flags(buffer, begin, sel_end, true, true));
|
|
||||||
RegexIt re_end;
|
RegexIt re_end;
|
||||||
|
|
||||||
for (; re_it != re_end; ++re_it)
|
for (; re_it != re_end; ++re_it)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user