From f5d5274c5f7c7dcb12d181b69ccad31f6bb05e46 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Mon, 23 Jan 2023 17:38:02 +1100 Subject: [PATCH] Fix incorrect use of subject end/begin in regex execution This could lead to reading past subject string end in certain conditions Fixes #4794 --- src/regex.hh | 1 + src/regex_impl.cc | 8 ++++++++ src/regex_impl.hh | 4 ++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/regex.hh b/src/regex.hh index 07a58959..a235470d 100644 --- a/src/regex.hh +++ b/src/regex.hh @@ -220,6 +220,7 @@ private: m_results.values().clear(); std::move(m_vm.captures().begin(), m_vm.captures().end(), std::back_inserter(m_results.values())); m_next_pos = forward ? m_results[0].second : m_results[0].first; + kak_assert(forward ? (m_next_pos <= m_end) : (m_next_pos >= m_begin)); return true; } diff --git a/src/regex_impl.cc b/src/regex_impl.cc index 40701e48..1aa95a82 100644 --- a/src/regex_impl.cc +++ b/src/regex_impl.cc @@ -1204,6 +1204,8 @@ struct TestVM : CompiledRegex, ThreadedRegexVM { return TestVM::ThreadedRegexVM::exec(re.begin(), re.end(), re.begin(), re.end(), flags); } + + using TestVM::ThreadedRegexVM::exec; }; } @@ -1564,6 +1566,12 @@ auto test_regex = UnitTest{[]{ kak_assert(vm.exec("д", RegexExecFlags::None)); } + { + TestVM vm{"ab"}; + const char str[] = "fa😄ab"; + kak_assert(not vm.exec(str, str+4, str, str + sizeof(str)-1, RegexExecFlags::None)); + } + { TestVM<> vm{R"(\0\x0A\u00260e\u00260F)"}; const char str[] = "\0\n☎☏"; // work around the null byte in the literal diff --git a/src/regex_impl.hh b/src/regex_impl.hh index bf21edce..cd6b10c2 100644 --- a/src/regex_impl.hh +++ b/src/regex_impl.hh @@ -492,8 +492,8 @@ private: return m_found_match; } - forward ? utf8::to_next(pos, config.subject_end) - : utf8::to_previous(pos, config.subject_begin); + forward ? utf8::to_next(pos, config.end) + : utf8::to_previous(pos, config.end); if (search and not m_found_match) {