Refactor command line parsing
Command line parsing now works as follow: * Quoted strings ('...', "..." and %~...~ with '~' non nestable) use 'doubling-up' for escaping their delimiter, if the delimiter appears twice in a row, it is considered as part of the string and represent one delimiter character. So 'abc''def' == "abc'def". No other escaping takes place in those strings. * Balanced strings (%{...}) do not support any kind of escaping, but finds the matching closing delimiter by taking nesting into account. So %{abc{def}} == "abc{def}". * Non quoted words support escaping of `;` and whitespaces with `\`, `%`, `'` and '"` can be escaped with `\` at the start of the word, they do not need escaping (and will not be escaped) else where in a word where they are treated literally. Any other use of '\' is a literal '\'. So \%abc%\;\ def == "%abc%; def" As discussed in #2046 this should make our command line syntax more robust, provide a simple programmatic way to escape a string content (s/<delim>/<delim><delim>/g), be well defined instead of ad-hoc undocumented behaviour, and interact nicely with other common escaping by avoiding escaping hell (:grep <regex> can in most case be written with the regex unquoted).
This commit is contained in:
parent
cad5f37efd
commit
c829595d01
|
@ -11,6 +11,7 @@
|
||||||
#include "register_manager.hh"
|
#include "register_manager.hh"
|
||||||
#include "shell_manager.hh"
|
#include "shell_manager.hh"
|
||||||
#include "utils.hh"
|
#include "utils.hh"
|
||||||
|
#include "unit_tests.hh"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
|
@ -67,9 +68,63 @@ bool is_command_separator(Codepoint c)
|
||||||
return c == ';' or c == '\n';
|
return c == ';' or c == '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Func, typename UnescapeFunc>
|
struct QuotedResult
|
||||||
String get_until_delimiter(Reader& reader, Func is_delimiter,
|
{
|
||||||
UnescapeFunc unescape = [](Codepoint) { return false; })
|
String content;
|
||||||
|
bool terminated;
|
||||||
|
};
|
||||||
|
|
||||||
|
QuotedResult parse_quoted(Reader& reader, Codepoint delimiter)
|
||||||
|
{
|
||||||
|
auto beg = reader.pos;
|
||||||
|
String str;
|
||||||
|
|
||||||
|
while (reader)
|
||||||
|
{
|
||||||
|
const Codepoint c = *reader;
|
||||||
|
if (c == delimiter)
|
||||||
|
{
|
||||||
|
str += reader.substr_from(beg);
|
||||||
|
++reader;
|
||||||
|
if (reader and *reader == delimiter)
|
||||||
|
{
|
||||||
|
str += String{c};
|
||||||
|
beg = reader.pos+1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return {str, true};
|
||||||
|
}
|
||||||
|
++reader;
|
||||||
|
}
|
||||||
|
if (beg < reader.str.end())
|
||||||
|
str += reader.substr_from(beg);
|
||||||
|
return {str, false};
|
||||||
|
}
|
||||||
|
|
||||||
|
QuotedResult parse_quoted_balanced(Reader& reader, Codepoint opening_delimiter,
|
||||||
|
Codepoint closing_delimiter)
|
||||||
|
{
|
||||||
|
kak_assert(utf8::codepoint(utf8::previous(reader.pos, reader.str.begin()),
|
||||||
|
reader.str.end()) == opening_delimiter);
|
||||||
|
int level = 0;
|
||||||
|
auto start = reader.pos;
|
||||||
|
while (reader)
|
||||||
|
{
|
||||||
|
const Codepoint c = *reader;
|
||||||
|
if (c == opening_delimiter)
|
||||||
|
++level;
|
||||||
|
else if (c == closing_delimiter and level-- == 0)
|
||||||
|
{
|
||||||
|
auto content = reader.substr_from(start);
|
||||||
|
++reader;
|
||||||
|
return {content.str(), true};
|
||||||
|
}
|
||||||
|
++reader;
|
||||||
|
}
|
||||||
|
return {reader.substr_from(start).str(), false};
|
||||||
|
}
|
||||||
|
|
||||||
|
String parse_unquoted(Reader& reader)
|
||||||
{
|
{
|
||||||
auto beg = reader.pos;
|
auto beg = reader.pos;
|
||||||
String str;
|
String str;
|
||||||
|
@ -78,7 +133,7 @@ String get_until_delimiter(Reader& reader, Func is_delimiter,
|
||||||
while (reader)
|
while (reader)
|
||||||
{
|
{
|
||||||
const Codepoint c = *reader;
|
const Codepoint c = *reader;
|
||||||
if (is_delimiter(c) or (was_antislash and unescape(c)))
|
if (is_command_separator(c) or is_horizontal_blank(c))
|
||||||
{
|
{
|
||||||
str += reader.substr_from(beg);
|
str += reader.substr_from(beg);
|
||||||
if (was_antislash)
|
if (was_antislash)
|
||||||
|
@ -97,36 +152,6 @@ String get_until_delimiter(Reader& reader, Func is_delimiter,
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[gnu::always_inline]]
|
|
||||||
inline String get_until_delimiter(Reader& reader, Codepoint c)
|
|
||||||
{
|
|
||||||
return get_until_delimiter(reader, [c](Codepoint ch) { return c == ch; }, [](Codepoint) { return false; });
|
|
||||||
}
|
|
||||||
|
|
||||||
StringView get_until_closing_delimiter(Reader& reader, Codepoint opening_delimiter,
|
|
||||||
Codepoint closing_delimiter)
|
|
||||||
{
|
|
||||||
kak_assert(utf8::codepoint(utf8::previous(reader.pos, reader.str.begin()),
|
|
||||||
reader.str.end()) == opening_delimiter);
|
|
||||||
int level = 0;
|
|
||||||
auto start = reader.pos;
|
|
||||||
while (reader)
|
|
||||||
{
|
|
||||||
const Codepoint c = *reader;
|
|
||||||
if (c == opening_delimiter)
|
|
||||||
++level;
|
|
||||||
else if (c == closing_delimiter)
|
|
||||||
{
|
|
||||||
if (level > 0)
|
|
||||||
--level;
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
++reader;
|
|
||||||
}
|
|
||||||
return reader.substr_from(start);
|
|
||||||
}
|
|
||||||
|
|
||||||
Token::Type token_type(StringView type_name, bool throw_on_invalid)
|
Token::Type token_type(StringView type_name, bool throw_on_invalid)
|
||||||
{
|
{
|
||||||
if (type_name == "")
|
if (type_name == "")
|
||||||
|
@ -203,25 +228,24 @@ Token parse_percent_token(Reader& reader, bool throw_on_unterminated)
|
||||||
if (it != std::end(matching_pairs))
|
if (it != std::end(matching_pairs))
|
||||||
{
|
{
|
||||||
const Codepoint closing_delimiter = it->closing;
|
const Codepoint closing_delimiter = it->closing;
|
||||||
auto token = get_until_closing_delimiter(reader, opening_delimiter,
|
auto quoted = parse_quoted_balanced(reader, opening_delimiter, closing_delimiter);
|
||||||
closing_delimiter);
|
if (throw_on_unterminated and not quoted.terminated)
|
||||||
if (throw_on_unterminated and not reader)
|
|
||||||
throw parse_error{format("{}:{}: unterminated string '%{}{}...{}'",
|
throw parse_error{format("{}:{}: unterminated string '%{}{}...{}'",
|
||||||
coord.line, coord.column, type_name,
|
coord.line, coord.column, type_name,
|
||||||
opening_delimiter, closing_delimiter)};
|
opening_delimiter, closing_delimiter)};
|
||||||
|
|
||||||
return {type, start - str_beg, coord, token.str()};
|
return {type, start - str_beg, coord, std::move(quoted.content)};
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
String token = get_until_delimiter(reader, opening_delimiter);
|
auto quoted = parse_quoted(reader, opening_delimiter);
|
||||||
|
|
||||||
if (throw_on_unterminated and not reader)
|
if (throw_on_unterminated and not quoted.terminated)
|
||||||
throw parse_error{format("{}:{}: unterminated string '%{}{}...{}'",
|
throw parse_error{format("{}:{}: unterminated string '%{}{}...{}'",
|
||||||
coord.line, coord.column, type_name,
|
coord.line, coord.column, type_name,
|
||||||
opening_delimiter, opening_delimiter)};
|
opening_delimiter, opening_delimiter)};
|
||||||
|
|
||||||
return {type, start - str_beg, coord, std::move(token)};
|
return {type, start - str_beg, coord, std::move(quoted.content)};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -297,20 +321,16 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
|
||||||
if (c == '"' or c == '\'')
|
if (c == '"' or c == '\'')
|
||||||
{
|
{
|
||||||
start = (++m_reader).pos;
|
start = (++m_reader).pos;
|
||||||
String token = get_until_delimiter(m_reader, c);
|
QuotedResult quoted = parse_quoted(m_reader, c);
|
||||||
if (throw_on_unterminated and not m_reader)
|
if (throw_on_unterminated and not quoted.terminated)
|
||||||
throw parse_error{format("unterminated string {0}...{0}", c)};
|
throw parse_error{format("unterminated string {0}...{0}", c)};
|
||||||
if (m_reader)
|
|
||||||
++m_reader;
|
|
||||||
return Token{c == '"' ? Token::Type::RawEval
|
return Token{c == '"' ? Token::Type::RawEval
|
||||||
: Token::Type::RawQuoted,
|
: Token::Type::RawQuoted,
|
||||||
start - line.begin(), coord, std::move(token)};
|
start - line.begin(), coord, std::move(quoted.content)};
|
||||||
}
|
}
|
||||||
else if (c == '%')
|
else if (c == '%')
|
||||||
{
|
{
|
||||||
auto token = parse_percent_token(m_reader, throw_on_unterminated);
|
auto token = parse_percent_token(m_reader, throw_on_unterminated);
|
||||||
if (m_reader)
|
|
||||||
++m_reader;
|
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
else if (is_command_separator(*m_reader))
|
else if (is_command_separator(*m_reader))
|
||||||
|
@ -321,11 +341,14 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
String str = get_until_delimiter(m_reader, [](Codepoint c) {
|
if (c == '\\')
|
||||||
return is_command_separator(c) or is_horizontal_blank(c);
|
{
|
||||||
}, [](Codepoint c) { return c == '%'; });
|
auto next = utf8::codepoint(utf8::next(m_reader.pos, m_reader.str.end()), m_reader.str.end());
|
||||||
|
if (next == '%' or next == '\'' or next == '"')
|
||||||
|
++m_reader;
|
||||||
|
}
|
||||||
return Token{Token::Type::Raw, start - line.begin(),
|
return Token{Token::Type::Raw, start - line.begin(),
|
||||||
coord, std::move(str)};
|
coord, parse_unquoted(m_reader)};
|
||||||
}
|
}
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
@ -350,7 +373,7 @@ String expand_impl(StringView str, const Context& context,
|
||||||
{
|
{
|
||||||
res += reader.substr_from(beg);
|
res += reader.substr_from(beg);
|
||||||
res.back() = c;
|
res.back() = c;
|
||||||
beg = (++reader).pos;
|
beg = reader.pos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (c == '%')
|
else if (c == '%')
|
||||||
|
@ -358,7 +381,7 @@ String expand_impl(StringView str, const Context& context,
|
||||||
res += reader.substr_from(beg);
|
res += reader.substr_from(beg);
|
||||||
res += postprocess(expand_token(parse_percent_token(reader, true),
|
res += postprocess(expand_token(parse_percent_token(reader, true),
|
||||||
context, shell_context));
|
context, shell_context));
|
||||||
beg = (++reader).pos;
|
beg = reader.pos;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
++reader;
|
++reader;
|
||||||
|
@ -660,4 +683,44 @@ Completions CommandManager::complete(const Context& context,
|
||||||
return Completions{};
|
return Completions{};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UnitTest test_command_parsing{[]
|
||||||
|
{
|
||||||
|
auto check_quoted = [](StringView str, bool terminated, StringView content)
|
||||||
|
{
|
||||||
|
Reader reader{str};
|
||||||
|
const Codepoint delimiter = *reader;
|
||||||
|
auto quoted = parse_quoted(++reader, delimiter);
|
||||||
|
kak_assert(quoted.terminated == terminated);
|
||||||
|
kak_assert(quoted.content == content);
|
||||||
|
};
|
||||||
|
|
||||||
|
check_quoted("'abc'", true, "abc");
|
||||||
|
check_quoted("'abc''def", false, "abc'def");
|
||||||
|
check_quoted("'abc''def'''", true, "abc'def'");
|
||||||
|
|
||||||
|
auto check_balanced = [](StringView str, Codepoint opening, Codepoint closing, bool terminated, StringView content)
|
||||||
|
{
|
||||||
|
Reader reader{str};
|
||||||
|
auto quoted = parse_quoted_balanced(++reader, opening, closing);
|
||||||
|
kak_assert(quoted.terminated == terminated);
|
||||||
|
kak_assert(quoted.content == content);
|
||||||
|
};
|
||||||
|
|
||||||
|
check_balanced("{abc}", '{', '}', true, "abc");
|
||||||
|
check_balanced("{abc{def}}", '{', '}', true, "abc{def}");
|
||||||
|
check_balanced("{{abc}{def}", '{', '}', false, "{abc}{def}");
|
||||||
|
|
||||||
|
auto check_unquoted = [](StringView str, StringView content)
|
||||||
|
{
|
||||||
|
Reader reader{str};
|
||||||
|
auto res = parse_unquoted(reader);
|
||||||
|
kak_assert(res == content);
|
||||||
|
};
|
||||||
|
|
||||||
|
check_unquoted("abc def", "abc");
|
||||||
|
check_unquoted("abc; def", "abc");
|
||||||
|
check_unquoted("abc\\; def", "abc;");
|
||||||
|
check_unquoted("abc\\;\\ def", "abc; def");
|
||||||
|
}};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user