Add support for named captures to the regex impl and regex highlighter
ECMAScript is adding support for it, and it is a pretty isolated change to do. Fixes #2293
This commit is contained in:
parent
56ee329d79
commit
328c497be2
|
@ -99,6 +99,10 @@ from the remaining parameters.
|
|||
add-highlighter window/ regex //\h*(TODO:)[^\n]* 0:cyan 1:yellow,red
|
||||
--------------------------------------------------------------------
|
||||
|
||||
capture_id can be either the capture number, or its name if a
|
||||
named capture is used in the regex (See
|
||||
<<regex#Groups, `:doc regex Groups`>>)
|
||||
|
||||
*dynregex* <expression> <capture_id>:<face> ...::
|
||||
similar to regex, but expand (like a command parameter would) the
|
||||
given expression before building a regex from the result.
|
||||
|
|
|
@ -78,17 +78,21 @@ Regex atoms can be grouped using `(` and `)` or `(?:` and `)`. If `(` is
|
|||
used, the group will be a capturing group, which means the positions from
|
||||
the subject strings that matched between `(` and `)` will be recorded.
|
||||
|
||||
Capture groups are numbered starting at 1. They are numbered in the order of
|
||||
appearance of their `(` in the regex. A special capture group 0 is
|
||||
for the whole sequence that matched.
|
||||
Capture groups are numbered starting at 1. They are numbered in the
|
||||
order of appearance of their `(` in the regex. A special capture group
|
||||
0 is for the whole sequence that matched.
|
||||
|
||||
`(?:` introduces a non capturing group, which will not record the
|
||||
* `(?:` introduces a non capturing group, which will not record the
|
||||
matching positions.
|
||||
|
||||
* `(?<name>` introduces a named capturing group, which, in addition to
|
||||
being referred by number, can be, in certain contexts, referred by the
|
||||
given name.
|
||||
|
||||
== Alternations
|
||||
|
||||
`|` introduces an alternation, which will either match its left-hand side,
|
||||
or its right-hand side (preferring the left-hand side)
|
||||
The `|` character introduces an alternation, which will either match
|
||||
its left-hand side, or its right-hand side (preferring the left-hand side)
|
||||
|
||||
For example, `foo|bar` matches either `foo` or `bar`, `foo(bar|baz|qux)`
|
||||
matches `foo` followed by either `bar`, `baz` or `qux`.
|
||||
|
|
|
@ -307,19 +307,25 @@ public:
|
|||
if (params.size() < 2)
|
||||
throw runtime_error("wrong parameter count");
|
||||
|
||||
Regex re{params[0], RegexCompileFlags::Optimize};
|
||||
|
||||
FacesSpec faces;
|
||||
for (auto& spec : params.subrange(1))
|
||||
{
|
||||
auto colon = find(spec, ':');
|
||||
if (colon == spec.end())
|
||||
throw runtime_error(format("wrong face spec: '{}' expected <capture>:<facespec>", spec));
|
||||
int capture = str_to_int({spec.begin(), colon});
|
||||
const StringView capture_name{spec.begin(), colon};
|
||||
const int capture = str_to_int_ifp(capture_name).value_or_compute([&] {
|
||||
return re.named_capture_index(capture_name);
|
||||
});
|
||||
if (capture < 0)
|
||||
throw runtime_error(format("capture name {} is neither a capture index, nor an existing capture name",
|
||||
capture_name));
|
||||
faces.emplace_back(capture, String{colon+1, spec.end()});
|
||||
}
|
||||
|
||||
Regex ex{params[0], RegexCompileFlags::Optimize};
|
||||
|
||||
return std::make_unique<RegexHighlighter>(std::move(ex), std::move(faces));
|
||||
return std::make_unique<RegexHighlighter>(std::move(re), std::move(faces));
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -492,7 +498,6 @@ std::unique_ptr<Highlighter> create_dynamic_regex_highlighter(HighlighterParamet
|
|||
faces.emplace_back(capture, String{colon+1, spec.end()});
|
||||
}
|
||||
|
||||
|
||||
auto make_hl = [](auto& regex_getter, auto& face_getter) {
|
||||
return std::make_unique<DynamicRegexHighlighter<std::decay_t<decltype(regex_getter)>,
|
||||
std::decay_t<decltype(face_getter)>>>(
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "regex.hh"
|
||||
#include "ranges.hh"
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
@ -8,6 +9,12 @@ Regex::Regex(StringView re, RegexCompileFlags flags)
|
|||
m_str{re.str()}
|
||||
{}
|
||||
|
||||
int Regex::named_capture_index(StringView name) const
|
||||
{
|
||||
auto it = find_if(m_impl->named_captures, [&](auto& c) { return c.name == name; });
|
||||
return it != m_impl->named_captures.end() ? it->index : -1;
|
||||
}
|
||||
|
||||
String option_to_string(const Regex& re)
|
||||
{
|
||||
return re.str();
|
||||
|
|
|
@ -21,6 +21,7 @@ public:
|
|||
const String& str() const { return m_str; }
|
||||
|
||||
size_t mark_count() const { return m_impl->save_count / 2 - 1; }
|
||||
int named_capture_index(StringView name) const;
|
||||
|
||||
static constexpr const char* option_type_name = "regex";
|
||||
|
||||
|
|
|
@ -85,7 +85,8 @@ struct ParsedRegex
|
|||
Vector<Node, MemoryDomain::Regex> nodes;
|
||||
|
||||
Vector<CharacterClass, MemoryDomain::Regex> character_classes;
|
||||
size_t capture_count;
|
||||
Vector<CompiledRegex::NamedCapture, MemoryDomain::Regex> named_captures;
|
||||
uint32_t capture_count;
|
||||
};
|
||||
|
||||
namespace
|
||||
|
@ -166,7 +167,7 @@ private:
|
|||
using Iterator = utf8::iterator<const char*, const char*, Codepoint, int, InvalidPolicy>;
|
||||
using NodeIndex = ParsedRegex::NodeIndex;
|
||||
|
||||
NodeIndex disjunction(unsigned capture = -1)
|
||||
NodeIndex disjunction(uint32_t capture = -1)
|
||||
{
|
||||
NodeIndex index = new_node(ParsedRegex::Alternation);
|
||||
get_node(index).value = capture;
|
||||
|
@ -301,15 +302,25 @@ private:
|
|||
return new_node(ParsedRegex::AnyCharExceptNewLine);
|
||||
case '(':
|
||||
{
|
||||
auto captures = [this, it = (++m_pos).base()]() mutable {
|
||||
if (m_regex.end() - it >= 2 and *it++ == '?' and *it++ == ':')
|
||||
uint32_t capture_group = -1;
|
||||
const char* it = (++m_pos).base();
|
||||
if (m_regex.end() - it < 2 or *it++ != '?')
|
||||
capture_group = m_parsed_regex.capture_count++;
|
||||
else if (*it == ':')
|
||||
m_pos = Iterator{++it, m_regex};
|
||||
else if (*it == '<')
|
||||
{
|
||||
m_pos = Iterator{it, m_regex};
|
||||
return false;
|
||||
const auto name_start = ++it;
|
||||
while (it != m_regex.end() and is_word(*it))
|
||||
++it;
|
||||
if (it == m_regex.end() or *it != '>')
|
||||
parse_error("named captures should be only ascii word characters");
|
||||
capture_group = m_parsed_regex.capture_count++;
|
||||
m_parsed_regex.named_captures.push_back({{name_start, it}, capture_group});
|
||||
m_pos = Iterator{++it, m_regex};
|
||||
}
|
||||
return true;
|
||||
};
|
||||
NodeIndex content = disjunction(captures() ? m_parsed_regex.capture_count++ : -1);
|
||||
|
||||
NodeIndex content = disjunction(capture_group);
|
||||
if (at_end() or *m_pos++ != ')')
|
||||
parse_error("unclosed parenthesis");
|
||||
return content;
|
||||
|
@ -682,6 +693,7 @@ struct RegexCompiler
|
|||
m_program.first_backward_inst = -1;
|
||||
|
||||
m_program.character_classes = std::move(m_parsed_regex.character_classes);
|
||||
m_program.named_captures = std::move(m_parsed_regex.named_captures);
|
||||
m_program.save_count = m_parsed_regex.capture_count * 2;
|
||||
}
|
||||
|
||||
|
@ -1526,6 +1538,24 @@ auto test_regex = UnitTest{[]{
|
|||
const char str[] = "\0\n☎☏"; // work around the null byte in the literal
|
||||
kak_assert(vm.exec({str, str + sizeof(str)-1}));
|
||||
}
|
||||
|
||||
{
|
||||
auto eq = [](const CompiledRegex::NamedCapture& lhs,
|
||||
const CompiledRegex::NamedCapture& rhs) {
|
||||
return lhs.name == rhs.name and
|
||||
lhs.index == rhs.index;
|
||||
};
|
||||
|
||||
TestVM<> vm{R"((?<year>\d+)-(?<month>\d+)-(?<day>\d+))"};
|
||||
kak_assert(vm.exec("2019-01-03", RegexExecFlags::None));
|
||||
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "2019");
|
||||
kak_assert(StringView{vm.captures()[4], vm.captures()[5]} == "01");
|
||||
kak_assert(StringView{vm.captures()[6], vm.captures()[7]} == "03");
|
||||
kak_assert(vm.named_captures.size() == 3);
|
||||
kak_assert(eq(vm.named_captures[0], {"year", 1}));
|
||||
kak_assert(eq(vm.named_captures[1], {"month", 2}));
|
||||
kak_assert(eq(vm.named_captures[2], {"day", 3}));
|
||||
}
|
||||
}};
|
||||
|
||||
}
|
||||
|
|
|
@ -107,9 +107,16 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
|||
|
||||
explicit operator bool() const { return not instructions.empty(); }
|
||||
|
||||
struct NamedCapture
|
||||
{
|
||||
String name;
|
||||
uint32_t index;
|
||||
};
|
||||
|
||||
Vector<Instruction, MemoryDomain::Regex> instructions;
|
||||
Vector<CharacterClass, MemoryDomain::Regex> character_classes;
|
||||
Vector<Lookaround, MemoryDomain::Regex> lookarounds;
|
||||
Vector<NamedCapture, MemoryDomain::Regex> named_captures;
|
||||
uint32_t first_backward_inst; // -1 if no backward support, 0 if only backward, >0 if both forward and backward
|
||||
uint32_t save_count;
|
||||
|
||||
|
|
1
test/highlight/named-captures/cmd
Normal file
1
test/highlight/named-captures/cmd
Normal file
|
@ -0,0 +1 @@
|
|||
|
1
test/highlight/named-captures/in
Normal file
1
test/highlight/named-captures/in
Normal file
|
@ -0,0 +1 @@
|
|||
2018-01-03
|
1
test/highlight/named-captures/rc
Normal file
1
test/highlight/named-captures/rc
Normal file
|
@ -0,0 +1 @@
|
|||
add-highlighter window/ regex (?<year>\d+)-(?<month>\d+)-(?<day>\d+) year:red month:green day:yellow
|
7
test/highlight/named-captures/ui-out
Normal file
7
test/highlight/named-captures/ui-out
Normal file
|
@ -0,0 +1,7 @@
|
|||
{ "jsonrpc": "2.0", "method": "set_ui_options", "params": [{}] }
|
||||
{ "jsonrpc": "2.0", "method": "draw", "params": [[[{ "face": { "fg": "black", "bg": "white", "attributes": [] }, "contents": "2" }, { "face": { "fg": "red", "bg": "default", "attributes": [] }, "contents": "018" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "-" }, { "face": { "fg": "green", "bg": "default", "attributes": [] }, "contents": "01" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "-" }, { "face": { "fg": "yellow", "bg": "default", "attributes": [] }, "contents": "03" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "\u000a" }]], { "fg": "default", "bg": "default", "attributes": [] }, { "fg": "blue", "bg": "default", "attributes": [] }] }
|
||||
{ "jsonrpc": "2.0", "method": "menu_hide", "params": [] }
|
||||
{ "jsonrpc": "2.0", "method": "info_hide", "params": [] }
|
||||
{ "jsonrpc": "2.0", "method": "draw_status", "params": [[], [{ "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "out 1:1 " }, { "face": { "fg": "black", "bg": "yellow", "attributes": [] }, "contents": "" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": " " }, { "face": { "fg": "blue", "bg": "default", "attributes": [] }, "contents": "1 sel" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": " - client0@[kak-tests]" }], { "fg": "cyan", "bg": "default", "attributes": [] }] }
|
||||
{ "jsonrpc": "2.0", "method": "set_cursor", "params": ["buffer", { "line": 0, "column": 0 }] }
|
||||
{ "jsonrpc": "2.0", "method": "refresh", "params": [true] }
|
Loading…
Reference in New Issue
Block a user