CommandManager: refactor parsing of commands to iterate through tokens

Avoid storing a big vector of tokens, read them one by one, and
store only the current command.
This commit is contained in:
Maxime Coste 2018-02-15 21:23:12 +11:00
parent d95530b171
commit bde1f5349d
3 changed files with 169 additions and 181 deletions

View File

@ -43,22 +43,13 @@ struct parse_error : runtime_error
: runtime_error{format("parse error: {}", error)} {} : runtime_error{format("parse error: {}", error)} {}
}; };
namespace Codepoint Reader::operator*() const
{
struct Reader
{
public:
Reader(StringView s) : str{s}, pos{s.begin()}, line_start{s.begin()}, line{} {}
[[gnu::always_inline]]
Codepoint operator*() const
{ {
kak_assert(pos < str.end()); kak_assert(pos < str.end());
return utf8::codepoint(pos, str.end()); return utf8::codepoint(pos, str.end());
} }
Reader& operator++() Reader& Reader::operator++()
{ {
kak_assert(pos < str.end()); kak_assert(pos < str.end());
if (*pos == '\n') if (*pos == '\n')
@ -67,34 +58,8 @@ public:
return *this; return *this;
} }
[[gnu::always_inline]] namespace
explicit operator bool() const { return pos < str.end(); }
[[gnu::always_inline]]
StringView substr_from(const char* start) const
{ {
kak_assert(start <= pos);
return {start, pos};
}
Optional<Codepoint> peek_next() const
{
auto next = utf8::next(pos, str.end());
if (next != str.end())
return utf8::codepoint(next, str.end());
return {};
}
BufferCoord coord() const
{
return {line, (int)(pos - line_start)};
}
StringView str;
const char* pos;
const char* line_start;
LineCount line;
};
bool is_command_separator(Codepoint c) bool is_command_separator(Codepoint c)
{ {
@ -160,8 +125,7 @@ StringView get_until_closing_delimiter(Reader& reader, Codepoint opening_delimit
return reader.substr_from(start); return reader.substr_from(start);
} }
template<bool throw_on_invalid> Token::Type token_type(StringView type_name, bool throw_on_invalid)
Token::Type token_type(StringView type_name)
{ {
if (type_name == "") if (type_name == "")
return Token::Type::RawQuoted; return Token::Type::RawQuoted;
@ -188,7 +152,8 @@ void skip_blanks_and_comments(Reader& reader)
const Codepoint c = *reader; const Codepoint c = *reader;
if (is_horizontal_blank(c)) if (is_horizontal_blank(c))
++reader; ++reader;
else if (c == '\\' and reader.peek_next().value_or((Codepoint)'\0') == '\n') else if (c == '\\' and reader.pos + 1 != reader.str.end() and
*(reader.pos + 1) == '\n')
++(++reader); ++(++reader);
else if (c == '#') else if (c == '#')
{ {
@ -200,8 +165,7 @@ void skip_blanks_and_comments(Reader& reader)
} }
} }
template<bool throw_on_unterminated> Token parse_percent_token(Reader& reader, bool throw_on_unterminated)
Token parse_percent_token(Reader& reader)
{ {
++reader; ++reader;
const auto type_start = reader.pos; const auto type_start = reader.pos;
@ -217,7 +181,7 @@ Token parse_percent_token(Reader& reader)
return {}; return {};
} }
Token::Type type = token_type<throw_on_unterminated>(type_name); Token::Type type = token_type(type_name, throw_on_unterminated);
constexpr struct CharPair { Codepoint opening; Codepoint closing; } matching_pairs[] = { constexpr struct CharPair { Codepoint opening; Codepoint closing; } matching_pairs[] = {
{ '(', ')' }, { '[', ']' }, { '{', '}' }, { '<', '>' } { '(', ')' }, { '[', ']' }, { '{', '}' }, { '<', '>' }
@ -242,7 +206,7 @@ Token parse_percent_token(Reader& reader)
coord.line, coord.column, type_name, coord.line, coord.column, type_name,
opening_delimiter, closing_delimiter)}; opening_delimiter, closing_delimiter)};
return {type, start - str_beg, reader.pos - str_beg, coord, token.str()}; return {type, start - str_beg, coord, token.str()};
} }
else else
{ {
@ -253,7 +217,7 @@ Token parse_percent_token(Reader& reader)
coord.line, coord.column, type_name, coord.line, coord.column, type_name,
opening_delimiter, opening_delimiter)}; opening_delimiter, opening_delimiter)};
return {type, start - str_beg, reader.pos - str_beg, coord, std::move(token)}; return {type, start - str_beg, coord, std::move(token)};
} }
} }
@ -278,7 +242,6 @@ String expand_token(const Token& token, const Context& context,
} }
str.resize(str.length() - trailing_eol_count, 0); str.resize(str.length() - trailing_eol_count, 0);
return str; return str;
} }
case Token::Type::RegisterExpand: case Token::Type::RegisterExpand:
return context.main_sel_register_value(content).str(); return context.main_sel_register_value(content).str();
@ -314,55 +277,51 @@ String expand_token(const Token& token, const Context& context,
} }
template<bool throw_on_unterminated> CommandParser::CommandParser(StringView command_line) : m_reader{command_line} {}
TokenList parse(StringView line)
Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
{ {
TokenList result; skip_blanks_and_comments(m_reader);
if (not m_reader)
return {};
Reader reader{line}; const StringView line = m_reader.str;
while (true) const char* start = m_reader.pos;
{ auto coord = m_reader.coord();
skip_blanks_and_comments(reader);
if (not reader)
break;
const char* start = reader.pos; const Codepoint c = *m_reader;
auto coord = reader.coord();
const Codepoint c = *reader;
if (c == '"' or c == '\'') if (c == '"' or c == '\'')
{ {
start = (++reader).pos; start = (++m_reader).pos;
String token = get_until_delimiter(reader, c); String token = get_until_delimiter(m_reader, c);
if (throw_on_unterminated and not reader) if (throw_on_unterminated and not m_reader)
throw parse_error{format("unterminated string {0}...{0}", c)}; throw parse_error{format("unterminated string {0}...{0}", c)};
result.push_back({c == '"' ? Token::Type::RawEval ++m_reader;
return Token{c == '"' ? Token::Type::RawEval
: Token::Type::RawQuoted, : Token::Type::RawQuoted,
start - line.begin(), reader.pos - line.begin(), coord, std::move(token)}); start - line.begin(), coord, std::move(token)};
} }
else if (c == '%') else if (c == '%')
result.push_back( {
parse_percent_token<throw_on_unterminated>(reader)); auto token = parse_percent_token(m_reader, throw_on_unterminated);
++m_reader;
return token;
}
else if (is_command_separator(*m_reader))
{
++m_reader;
return Token{Token::Type::CommandSeparator,
m_reader.pos - line.begin(), coord, {}};
}
else else
{ {
String str = get_until_delimiter(reader, [](Codepoint c) { String str = get_until_delimiter(m_reader, [](Codepoint c) {
return is_command_separator(c) or is_horizontal_blank(c); return is_command_separator(c) or is_horizontal_blank(c);
}); });
return Token{Token::Type::Raw, start - line.begin(),
if (not str.empty()) coord, unescape(str, "%", '\\')};
result.push_back({Token::Type::Raw, start - line.begin(), reader.pos - line.begin(),
coord, unescape(str, "%", '\\')});
if (reader and is_command_separator(*reader))
result.push_back({Token::Type::CommandSeparator,
reader.pos - line.begin(), utf8::next(reader.pos, line.end()) - line.begin(), coord, {}});
} }
return {};
if (not reader)
break;
++reader;
}
return result;
} }
template<typename Postprocess> template<typename Postprocess>
@ -389,7 +348,7 @@ String expand_impl(StringView str, const Context& context,
else if (c == '%') else if (c == '%')
{ {
res += reader.substr_from(beg); res += reader.substr_from(beg);
res += postprocess(expand_token(parse_percent_token<true>(reader), res += postprocess(expand_token(parse_percent_token(reader, true),
context, shell_context)); context, shell_context));
beg = (++reader).pos; beg = (++reader).pos;
} }
@ -475,18 +434,29 @@ void CommandManager::execute_single_command(CommandParameters params,
void CommandManager::execute(StringView command_line, void CommandManager::execute(StringView command_line,
Context& context, const ShellContext& shell_context) Context& context, const ShellContext& shell_context)
{ {
TokenList tokens = parse<true>(command_line); CommandParser parser(command_line);
if (tokens.empty()) struct ShellParser {
return; ShellParser(String&& str) : output{std::move(str)}, parser{output} {}
// Tokens are going to be read as a stack String output;
std::reverse(tokens.begin(), tokens.end()); CommandParser parser;
};
Vector<ShellParser> shell_parser_stack;
auto next_token = [&] {
while (not shell_parser_stack.empty())
{
if (auto shell_token = shell_parser_stack.back().parser.read_token(true))
return shell_token;
shell_parser_stack.pop_back();
}
return parser.read_token(true);
};
BufferCoord command_coord; BufferCoord command_coord;
Vector<String> params; Vector<String> params;
while (not tokens.empty()) while (Optional<Token> token_opt = next_token())
{ {
Token token = std::move(tokens.back()); auto& token = *token_opt;
tokens.pop_back();
if (params.empty()) if (params.empty())
command_coord = token.coord; command_coord = token.coord;
@ -497,13 +467,7 @@ void CommandManager::execute(StringView command_line,
} }
// Shell expand are retokenized // Shell expand are retokenized
else if (token.type == Token::Type::ShellExpand) else if (token.type == Token::Type::ShellExpand)
{ shell_parser_stack.emplace_back(expand_token(token, context, shell_context));
auto new_tokens = parse<true>(expand_token(token, context,
shell_context));
tokens.insert(tokens.end(),
std::make_move_iterator(new_tokens.rbegin()),
std::make_move_iterator(new_tokens.rend()));
}
else if (token.type == Token::Type::ArgExpand and token.content == '@') else if (token.type == Token::Type::ArgExpand and token.content == '@')
params.insert(params.end(), shell_context.params.begin(), params.insert(params.end(), shell_context.params.begin(),
shell_context.params.end()); shell_context.params.end());
@ -515,20 +479,22 @@ void CommandManager::execute(StringView command_line,
Optional<CommandInfo> CommandManager::command_info(const Context& context, StringView command_line) const Optional<CommandInfo> CommandManager::command_info(const Context& context, StringView command_line) const
{ {
TokenList tokens = parse<false>(command_line); CommandParser parser{command_line};
size_t cmd_idx = 0; Vector<Token> tokens;
for (size_t i = 0; i < tokens.size(); ++i) while (auto token = parser.read_token(false))
{ {
if (tokens[i].type == Token::Type::CommandSeparator) if (token->type == Token::Type::CommandSeparator)
cmd_idx = i+1; tokens.clear();
else
tokens.push_back(std::move(*token));
} }
if (cmd_idx == tokens.size() or if (tokens.empty() or
(tokens[cmd_idx].type != Token::Type::Raw and (tokens.front().type != Token::Type::Raw and
tokens[cmd_idx].type != Token::Type::RawQuoted)) tokens.front().type != Token::Type::RawQuoted))
return {}; return {};
auto cmd = find_command(context, tokens[cmd_idx].content); auto cmd = find_command(context, tokens.front().content);
if (cmd == m_commands.end()) if (cmd == m_commands.end())
return {}; return {};
@ -540,9 +506,7 @@ Optional<CommandInfo> CommandManager::command_info(const Context& context, Strin
if (cmd->value.helper) if (cmd->value.helper)
{ {
Vector<String> params; Vector<String> params;
for (auto it = tokens.begin() + cmd_idx + 1; for (auto it = tokens.begin() + 1; it != tokens.end(); ++it)
it != tokens.end() and it->type != Token::Type::CommandSeparator;
++it)
{ {
if (it->type == Token::Type::Raw or if (it->type == Token::Type::Raw or
it->type == Token::Type::RawQuoted or it->type == Token::Type::RawQuoted or
@ -560,7 +524,6 @@ Optional<CommandInfo> CommandManager::command_info(const Context& context, Strin
if (not aliases.empty()) if (not aliases.empty())
res.info += format("Aliases:{}\n", aliases); res.info += format("Aliases:{}\n", aliases);
auto& switches = cmd->value.param_desc.switches; auto& switches = cmd->value.param_desc.switches;
if (not switches.empty()) if (not switches.empty())
res.info += format("Switches:\n{}", indent(generate_switches_doc(switches))); res.info += format("Switches:\n{}", indent(generate_switches_doc(switches)));
@ -582,66 +545,68 @@ Completions CommandManager::complete(const Context& context,
StringView command_line, StringView command_line,
ByteCount cursor_pos) ByteCount cursor_pos)
{ {
TokenList tokens = parse<false>(command_line); CommandParser parser{command_line};
const char* cursor = command_line.begin() + (int)cursor_pos;
Vector<Token> tokens;
size_t cmd_idx = 0; bool is_last_token = true;
size_t tok_idx = tokens.size(); while (auto token = parser.read_token(false))
for (size_t i = 0; i < tokens.size(); ++i)
{ {
if (tokens[i].type == Token::Type::CommandSeparator) if (token->type == Token::Type::CommandSeparator)
cmd_idx = i+1; {
tokens.clear();
continue;
}
if (tokens[i].begin <= cursor_pos and tokens[i].end >= cursor_pos) tokens.push_back(std::move(*token));
if (parser.pos() >= cursor)
{ {
tok_idx = i; is_last_token = false;
break; break;
} }
} }
const bool is_last_token = tok_idx == tokens.size(); if (is_last_token)
tokens.push_back({Token::Type::Raw, command_line.length(), parser.coord(), {}});
kak_assert(not tokens.empty());
const auto& token = tokens.back();
// command name completion // command name completion
if (tokens.empty() or if (tokens.size() == 1 and (token.type == Token::Type::Raw or
(tok_idx == cmd_idx and (is_last_token or token.type == Token::Type::RawQuoted))
tokens[tok_idx].type == Token::Type::Raw or
tokens[tok_idx].type == Token::Type::RawQuoted)))
{ {
auto cmd_start = is_last_token ? cursor_pos : tokens[tok_idx].begin; auto cmd_start = token.pos;
StringView query = command_line.substr(cmd_start, cursor_pos - cmd_start); StringView query = command_line.substr(cmd_start, cursor_pos - cmd_start);
return offset_pos(complete_command_name(context, query), cmd_start); return offset_pos(complete_command_name(context, query), cmd_start);
} }
kak_assert(not tokens.empty()); const ByteCount start = token.pos;
const ByteCount cursor_pos_in_token = cursor_pos - start;
ByteCount start = tok_idx < tokens.size() ? switch (token.type)
tokens[tok_idx].begin : cursor_pos;
ByteCount cursor_pos_in_token = cursor_pos - start;
const Token::Type type = tok_idx < tokens.size() ?
tokens[tok_idx].type : Token::Type::Raw;
switch (type)
{ {
case Token::Type::OptionExpand: case Token::Type::OptionExpand:
return {start , cursor_pos, return {start , cursor_pos,
GlobalScope::instance().option_registry().complete_option_name( GlobalScope::instance().option_registry().complete_option_name(
tokens[tok_idx].content, cursor_pos_in_token) }; token.content, cursor_pos_in_token) };
case Token::Type::ShellExpand: case Token::Type::ShellExpand:
return offset_pos(shell_complete(context, flags, tokens[tok_idx].content, return offset_pos(shell_complete(context, flags, token.content,
cursor_pos_in_token), start); cursor_pos_in_token), start);
case Token::Type::ValExpand: case Token::Type::ValExpand:
return {start , cursor_pos, return {start , cursor_pos,
ShellManager::instance().complete_env_var( ShellManager::instance().complete_env_var(
tokens[tok_idx].content, cursor_pos_in_token) }; token.content, cursor_pos_in_token) };
case Token::Type::Raw: case Token::Type::Raw:
case Token::Type::RawQuoted: case Token::Type::RawQuoted:
case Token::Type::RawEval: case Token::Type::RawEval:
{ {
if (tokens[cmd_idx].type != Token::Type::Raw) if (token.type != Token::Type::Raw)
return Completions{}; return Completions{};
StringView command_name = tokens[cmd_idx].content; StringView command_name = tokens.front().content;
if (command_name != m_last_complete_command) if (command_name != m_last_complete_command)
{ {
m_last_complete_command = command_name.str(); m_last_complete_command = command_name.str();
@ -654,17 +619,15 @@ Completions CommandManager::complete(const Context& context,
return Completions(); return Completions();
Vector<String> params; Vector<String> params;
for (auto it = tokens.begin() + cmd_idx + 1; it != tokens.end(); ++it) for (auto it = tokens.begin() + 1; it != tokens.end(); ++it)
params.push_back(it->content); params.push_back(it->content);
if (tok_idx == tokens.size())
params.emplace_back("");
Completions completions = offset_pos(command_it->value.completer( Completions completions = offset_pos(command_it->value.completer(
context, flags, params, tok_idx - cmd_idx - 1, context, flags, params, tokens.size() - 2,
cursor_pos_in_token), start); cursor_pos_in_token), start);
if (type != Token::Type::RawQuoted) if (token.type != Token::Type::RawQuoted)
{ {
StringView to_escape = type == Token::Type::Raw ? "% \t;" : "%"; StringView to_escape = token.type == Token::Type::Raw ? "% \t;" : "%";
for (auto& candidate : completions.candidates) for (auto& candidate : completions.candidates)
candidate = escape(candidate, to_escape, '\\'); candidate = escape(candidate, to_escape, '\\');
} }

View File

@ -55,16 +55,42 @@ struct Token
}; };
Type type; Type type;
ByteCount begin; ByteCount pos;
ByteCount end;
BufferCoord coord; BufferCoord coord;
String content; String content;
}; };
using TokenList = Vector<Token>; struct Reader
{
public:
Reader(StringView s) : str{s}, pos{s.begin()}, line_start{s.begin()}, line{} {}
template<bool throw_on_unterminated> Codepoint operator*() const;
TokenList parse(StringView line); Reader& operator++();
explicit operator bool() const { return pos < str.end(); }
StringView substr_from(const char* start) const { return {start, pos}; }
BufferCoord coord() const { return {line, (int)(pos - line_start)}; }
StringView str;
const char* pos;
const char* line_start;
LineCount line;
};
class CommandParser
{
public:
CommandParser(StringView command_line);
Optional<Token> read_token(bool throw_on_unterminated);
const char* pos() const { return m_reader.pos; }
BufferCoord coord() const { return m_reader.coord(); }
bool done() const { return not m_reader; }
private:
Reader m_reader;
};
class CommandManager : public Singleton<CommandManager> class CommandManager : public Singleton<CommandManager>
{ {

View File

@ -509,19 +509,18 @@ HighlighterAndId create_dynamic_regex_highlighter(HighlighterParameters params)
}; };
auto get_face = [faces](const Context& context){ return faces;; }; auto get_face = [faces](const Context& context){ return faces;; };
String expr = params[0]; CommandParser parser{params[0]};
auto tokens = parse<true>(expr); auto token = parser.read_token(true);
if (tokens.size() == 1 and tokens[0].type == Token::Type::OptionExpand and if (token and parser.done() and token->type == Token::Type::OptionExpand and
GlobalScope::instance().options()[tokens[0].content].is_of_type<Regex>()) GlobalScope::instance().options()[token->content].is_of_type<Regex>())
{ {
String option_name = tokens[0].content; auto get_regex = [option_name = token->content](const Context& context) {
auto get_regex = [option_name](const Context& context) {
return context.options()[option_name].get<Regex>(); return context.options()[option_name].get<Regex>();
}; };
return {format("dynregex_{}", expr), make_hl(get_regex, get_face)}; return {format("dynregex_{}", params[0]), make_hl(get_regex, get_face)};
} }
auto get_regex = [expr](const Context& context){ auto get_regex = [expr = params[0]](const Context& context){
try try
{ {
auto re = expand(expr, context); auto re = expand(expr, context);
@ -533,7 +532,7 @@ HighlighterAndId create_dynamic_regex_highlighter(HighlighterParameters params)
return Regex{}; return Regex{};
} }
}; };
return {format("dynregex_{}", expr), make_hl(get_regex, get_face)}; return {format("dynregex_{}", params[0]), make_hl(get_regex, get_face)};
} }
HighlighterAndId create_line_highlighter(HighlighterParameters params) HighlighterAndId create_line_highlighter(HighlighterParameters params)