Add support for named captures to the regex impl and regex highlighter
ECMAScript is adding support for it, and it is a pretty isolated change to do. Fixes #2293
This commit is contained in:
parent
56ee329d79
commit
328c497be2
|
@ -99,6 +99,10 @@ from the remaining parameters.
|
||||||
add-highlighter window/ regex //\h*(TODO:)[^\n]* 0:cyan 1:yellow,red
|
add-highlighter window/ regex //\h*(TODO:)[^\n]* 0:cyan 1:yellow,red
|
||||||
--------------------------------------------------------------------
|
--------------------------------------------------------------------
|
||||||
|
|
||||||
|
capture_id can be either the capture number, or its name if a
|
||||||
|
named capture is used in the regex (See
|
||||||
|
<<regex#Groups, `:doc regex Groups`>>)
|
||||||
|
|
||||||
*dynregex* <expression> <capture_id>:<face> ...::
|
*dynregex* <expression> <capture_id>:<face> ...::
|
||||||
similar to regex, but expand (like a command parameter would) the
|
similar to regex, but expand (like a command parameter would) the
|
||||||
given expression before building a regex from the result.
|
given expression before building a regex from the result.
|
||||||
|
|
|
@ -78,17 +78,21 @@ Regex atoms can be grouped using `(` and `)` or `(?:` and `)`. If `(` is
|
||||||
used, the group will be a capturing group, which means the positions from
|
used, the group will be a capturing group, which means the positions from
|
||||||
the subject strings that matched between `(` and `)` will be recorded.
|
the subject strings that matched between `(` and `)` will be recorded.
|
||||||
|
|
||||||
Capture groups are numbered starting at 1. They are numbered in the order of
|
Capture groups are numbered starting at 1. They are numbered in the
|
||||||
appearance of their `(` in the regex. A special capture group 0 is
|
order of appearance of their `(` in the regex. A special capture group
|
||||||
for the whole sequence that matched.
|
0 is for the whole sequence that matched.
|
||||||
|
|
||||||
`(?:` introduces a non capturing group, which will not record the
|
* `(?:` introduces a non capturing group, which will not record the
|
||||||
matching positions.
|
matching positions.
|
||||||
|
|
||||||
|
* `(?<name>` introduces a named capturing group, which, in addition to
|
||||||
|
being referred by number, can be, in certain contexts, referred by the
|
||||||
|
given name.
|
||||||
|
|
||||||
== Alternations
|
== Alternations
|
||||||
|
|
||||||
`|` introduces an alternation, which will either match its left-hand side,
|
The `|` character introduces an alternation, which will either match
|
||||||
or its right-hand side (preferring the left-hand side)
|
its left-hand side, or its right-hand side (preferring the left-hand side)
|
||||||
|
|
||||||
For example, `foo|bar` matches either `foo` or `bar`, `foo(bar|baz|qux)`
|
For example, `foo|bar` matches either `foo` or `bar`, `foo(bar|baz|qux)`
|
||||||
matches `foo` followed by either `bar`, `baz` or `qux`.
|
matches `foo` followed by either `bar`, `baz` or `qux`.
|
||||||
|
|
|
@ -307,19 +307,25 @@ public:
|
||||||
if (params.size() < 2)
|
if (params.size() < 2)
|
||||||
throw runtime_error("wrong parameter count");
|
throw runtime_error("wrong parameter count");
|
||||||
|
|
||||||
|
Regex re{params[0], RegexCompileFlags::Optimize};
|
||||||
|
|
||||||
FacesSpec faces;
|
FacesSpec faces;
|
||||||
for (auto& spec : params.subrange(1))
|
for (auto& spec : params.subrange(1))
|
||||||
{
|
{
|
||||||
auto colon = find(spec, ':');
|
auto colon = find(spec, ':');
|
||||||
if (colon == spec.end())
|
if (colon == spec.end())
|
||||||
throw runtime_error(format("wrong face spec: '{}' expected <capture>:<facespec>", spec));
|
throw runtime_error(format("wrong face spec: '{}' expected <capture>:<facespec>", spec));
|
||||||
int capture = str_to_int({spec.begin(), colon});
|
const StringView capture_name{spec.begin(), colon};
|
||||||
|
const int capture = str_to_int_ifp(capture_name).value_or_compute([&] {
|
||||||
|
return re.named_capture_index(capture_name);
|
||||||
|
});
|
||||||
|
if (capture < 0)
|
||||||
|
throw runtime_error(format("capture name {} is neither a capture index, nor an existing capture name",
|
||||||
|
capture_name));
|
||||||
faces.emplace_back(capture, String{colon+1, spec.end()});
|
faces.emplace_back(capture, String{colon+1, spec.end()});
|
||||||
}
|
}
|
||||||
|
|
||||||
Regex ex{params[0], RegexCompileFlags::Optimize};
|
return std::make_unique<RegexHighlighter>(std::move(re), std::move(faces));
|
||||||
|
|
||||||
return std::make_unique<RegexHighlighter>(std::move(ex), std::move(faces));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -492,7 +498,6 @@ std::unique_ptr<Highlighter> create_dynamic_regex_highlighter(HighlighterParamet
|
||||||
faces.emplace_back(capture, String{colon+1, spec.end()});
|
faces.emplace_back(capture, String{colon+1, spec.end()});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
auto make_hl = [](auto& regex_getter, auto& face_getter) {
|
auto make_hl = [](auto& regex_getter, auto& face_getter) {
|
||||||
return std::make_unique<DynamicRegexHighlighter<std::decay_t<decltype(regex_getter)>,
|
return std::make_unique<DynamicRegexHighlighter<std::decay_t<decltype(regex_getter)>,
|
||||||
std::decay_t<decltype(face_getter)>>>(
|
std::decay_t<decltype(face_getter)>>>(
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#include "regex.hh"
|
#include "regex.hh"
|
||||||
|
#include "ranges.hh"
|
||||||
|
|
||||||
namespace Kakoune
|
namespace Kakoune
|
||||||
{
|
{
|
||||||
|
@ -8,6 +9,12 @@ Regex::Regex(StringView re, RegexCompileFlags flags)
|
||||||
m_str{re.str()}
|
m_str{re.str()}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
int Regex::named_capture_index(StringView name) const
|
||||||
|
{
|
||||||
|
auto it = find_if(m_impl->named_captures, [&](auto& c) { return c.name == name; });
|
||||||
|
return it != m_impl->named_captures.end() ? it->index : -1;
|
||||||
|
}
|
||||||
|
|
||||||
String option_to_string(const Regex& re)
|
String option_to_string(const Regex& re)
|
||||||
{
|
{
|
||||||
return re.str();
|
return re.str();
|
||||||
|
|
|
@ -21,6 +21,7 @@ public:
|
||||||
const String& str() const { return m_str; }
|
const String& str() const { return m_str; }
|
||||||
|
|
||||||
size_t mark_count() const { return m_impl->save_count / 2 - 1; }
|
size_t mark_count() const { return m_impl->save_count / 2 - 1; }
|
||||||
|
int named_capture_index(StringView name) const;
|
||||||
|
|
||||||
static constexpr const char* option_type_name = "regex";
|
static constexpr const char* option_type_name = "regex";
|
||||||
|
|
||||||
|
|
|
@ -85,7 +85,8 @@ struct ParsedRegex
|
||||||
Vector<Node, MemoryDomain::Regex> nodes;
|
Vector<Node, MemoryDomain::Regex> nodes;
|
||||||
|
|
||||||
Vector<CharacterClass, MemoryDomain::Regex> character_classes;
|
Vector<CharacterClass, MemoryDomain::Regex> character_classes;
|
||||||
size_t capture_count;
|
Vector<CompiledRegex::NamedCapture, MemoryDomain::Regex> named_captures;
|
||||||
|
uint32_t capture_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
|
@ -166,7 +167,7 @@ private:
|
||||||
using Iterator = utf8::iterator<const char*, const char*, Codepoint, int, InvalidPolicy>;
|
using Iterator = utf8::iterator<const char*, const char*, Codepoint, int, InvalidPolicy>;
|
||||||
using NodeIndex = ParsedRegex::NodeIndex;
|
using NodeIndex = ParsedRegex::NodeIndex;
|
||||||
|
|
||||||
NodeIndex disjunction(unsigned capture = -1)
|
NodeIndex disjunction(uint32_t capture = -1)
|
||||||
{
|
{
|
||||||
NodeIndex index = new_node(ParsedRegex::Alternation);
|
NodeIndex index = new_node(ParsedRegex::Alternation);
|
||||||
get_node(index).value = capture;
|
get_node(index).value = capture;
|
||||||
|
@ -301,15 +302,25 @@ private:
|
||||||
return new_node(ParsedRegex::AnyCharExceptNewLine);
|
return new_node(ParsedRegex::AnyCharExceptNewLine);
|
||||||
case '(':
|
case '(':
|
||||||
{
|
{
|
||||||
auto captures = [this, it = (++m_pos).base()]() mutable {
|
uint32_t capture_group = -1;
|
||||||
if (m_regex.end() - it >= 2 and *it++ == '?' and *it++ == ':')
|
const char* it = (++m_pos).base();
|
||||||
|
if (m_regex.end() - it < 2 or *it++ != '?')
|
||||||
|
capture_group = m_parsed_regex.capture_count++;
|
||||||
|
else if (*it == ':')
|
||||||
|
m_pos = Iterator{++it, m_regex};
|
||||||
|
else if (*it == '<')
|
||||||
{
|
{
|
||||||
m_pos = Iterator{it, m_regex};
|
const auto name_start = ++it;
|
||||||
return false;
|
while (it != m_regex.end() and is_word(*it))
|
||||||
|
++it;
|
||||||
|
if (it == m_regex.end() or *it != '>')
|
||||||
|
parse_error("named captures should be only ascii word characters");
|
||||||
|
capture_group = m_parsed_regex.capture_count++;
|
||||||
|
m_parsed_regex.named_captures.push_back({{name_start, it}, capture_group});
|
||||||
|
m_pos = Iterator{++it, m_regex};
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
};
|
NodeIndex content = disjunction(capture_group);
|
||||||
NodeIndex content = disjunction(captures() ? m_parsed_regex.capture_count++ : -1);
|
|
||||||
if (at_end() or *m_pos++ != ')')
|
if (at_end() or *m_pos++ != ')')
|
||||||
parse_error("unclosed parenthesis");
|
parse_error("unclosed parenthesis");
|
||||||
return content;
|
return content;
|
||||||
|
@ -682,6 +693,7 @@ struct RegexCompiler
|
||||||
m_program.first_backward_inst = -1;
|
m_program.first_backward_inst = -1;
|
||||||
|
|
||||||
m_program.character_classes = std::move(m_parsed_regex.character_classes);
|
m_program.character_classes = std::move(m_parsed_regex.character_classes);
|
||||||
|
m_program.named_captures = std::move(m_parsed_regex.named_captures);
|
||||||
m_program.save_count = m_parsed_regex.capture_count * 2;
|
m_program.save_count = m_parsed_regex.capture_count * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1526,6 +1538,24 @@ auto test_regex = UnitTest{[]{
|
||||||
const char str[] = "\0\n☎☏"; // work around the null byte in the literal
|
const char str[] = "\0\n☎☏"; // work around the null byte in the literal
|
||||||
kak_assert(vm.exec({str, str + sizeof(str)-1}));
|
kak_assert(vm.exec({str, str + sizeof(str)-1}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto eq = [](const CompiledRegex::NamedCapture& lhs,
|
||||||
|
const CompiledRegex::NamedCapture& rhs) {
|
||||||
|
return lhs.name == rhs.name and
|
||||||
|
lhs.index == rhs.index;
|
||||||
|
};
|
||||||
|
|
||||||
|
TestVM<> vm{R"((?<year>\d+)-(?<month>\d+)-(?<day>\d+))"};
|
||||||
|
kak_assert(vm.exec("2019-01-03", RegexExecFlags::None));
|
||||||
|
kak_assert(StringView{vm.captures()[2], vm.captures()[3]} == "2019");
|
||||||
|
kak_assert(StringView{vm.captures()[4], vm.captures()[5]} == "01");
|
||||||
|
kak_assert(StringView{vm.captures()[6], vm.captures()[7]} == "03");
|
||||||
|
kak_assert(vm.named_captures.size() == 3);
|
||||||
|
kak_assert(eq(vm.named_captures[0], {"year", 1}));
|
||||||
|
kak_assert(eq(vm.named_captures[1], {"month", 2}));
|
||||||
|
kak_assert(eq(vm.named_captures[2], {"day", 3}));
|
||||||
|
}
|
||||||
}};
|
}};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -107,9 +107,16 @@ struct CompiledRegex : RefCountable, UseMemoryDomain<MemoryDomain::Regex>
|
||||||
|
|
||||||
explicit operator bool() const { return not instructions.empty(); }
|
explicit operator bool() const { return not instructions.empty(); }
|
||||||
|
|
||||||
|
struct NamedCapture
|
||||||
|
{
|
||||||
|
String name;
|
||||||
|
uint32_t index;
|
||||||
|
};
|
||||||
|
|
||||||
Vector<Instruction, MemoryDomain::Regex> instructions;
|
Vector<Instruction, MemoryDomain::Regex> instructions;
|
||||||
Vector<CharacterClass, MemoryDomain::Regex> character_classes;
|
Vector<CharacterClass, MemoryDomain::Regex> character_classes;
|
||||||
Vector<Lookaround, MemoryDomain::Regex> lookarounds;
|
Vector<Lookaround, MemoryDomain::Regex> lookarounds;
|
||||||
|
Vector<NamedCapture, MemoryDomain::Regex> named_captures;
|
||||||
uint32_t first_backward_inst; // -1 if no backward support, 0 if only backward, >0 if both forward and backward
|
uint32_t first_backward_inst; // -1 if no backward support, 0 if only backward, >0 if both forward and backward
|
||||||
uint32_t save_count;
|
uint32_t save_count;
|
||||||
|
|
||||||
|
|
1
test/highlight/named-captures/cmd
Normal file
1
test/highlight/named-captures/cmd
Normal file
|
@ -0,0 +1 @@
|
||||||
|
|
1
test/highlight/named-captures/in
Normal file
1
test/highlight/named-captures/in
Normal file
|
@ -0,0 +1 @@
|
||||||
|
2018-01-03
|
1
test/highlight/named-captures/rc
Normal file
1
test/highlight/named-captures/rc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
add-highlighter window/ regex (?<year>\d+)-(?<month>\d+)-(?<day>\d+) year:red month:green day:yellow
|
7
test/highlight/named-captures/ui-out
Normal file
7
test/highlight/named-captures/ui-out
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
{ "jsonrpc": "2.0", "method": "set_ui_options", "params": [{}] }
|
||||||
|
{ "jsonrpc": "2.0", "method": "draw", "params": [[[{ "face": { "fg": "black", "bg": "white", "attributes": [] }, "contents": "2" }, { "face": { "fg": "red", "bg": "default", "attributes": [] }, "contents": "018" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "-" }, { "face": { "fg": "green", "bg": "default", "attributes": [] }, "contents": "01" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "-" }, { "face": { "fg": "yellow", "bg": "default", "attributes": [] }, "contents": "03" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "\u000a" }]], { "fg": "default", "bg": "default", "attributes": [] }, { "fg": "blue", "bg": "default", "attributes": [] }] }
|
||||||
|
{ "jsonrpc": "2.0", "method": "menu_hide", "params": [] }
|
||||||
|
{ "jsonrpc": "2.0", "method": "info_hide", "params": [] }
|
||||||
|
{ "jsonrpc": "2.0", "method": "draw_status", "params": [[], [{ "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": "out 1:1 " }, { "face": { "fg": "black", "bg": "yellow", "attributes": [] }, "contents": "" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": " " }, { "face": { "fg": "blue", "bg": "default", "attributes": [] }, "contents": "1 sel" }, { "face": { "fg": "default", "bg": "default", "attributes": [] }, "contents": " - client0@[kak-tests]" }], { "fg": "cyan", "bg": "default", "attributes": [] }] }
|
||||||
|
{ "jsonrpc": "2.0", "method": "set_cursor", "params": ["buffer", { "line": 0, "column": 0 }] }
|
||||||
|
{ "jsonrpc": "2.0", "method": "refresh", "params": [true] }
|
Loading…
Reference in New Issue
Block a user