CommandManager: refactor parsing of commands to iterate through tokens

Avoid storing a big vector of tokens, read them one by one, and
store only the current command.
This commit is contained in:
Maxime Coste 2018-02-15 21:23:12 +11:00
parent d95530b171
commit bde1f5349d
3 changed files with 169 additions and 181 deletions

View File

@ -43,22 +43,13 @@ struct parse_error : runtime_error
: runtime_error{format("parse error: {}", error)} {}
};
namespace
{
struct Reader
{
public:
Reader(StringView s) : str{s}, pos{s.begin()}, line_start{s.begin()}, line{} {}
[[gnu::always_inline]]
Codepoint operator*() const
Codepoint Reader::operator*() const
{
kak_assert(pos < str.end());
return utf8::codepoint(pos, str.end());
}
Reader& operator++()
Reader& Reader::operator++()
{
kak_assert(pos < str.end());
if (*pos == '\n')
@ -67,34 +58,8 @@ public:
return *this;
}
[[gnu::always_inline]]
explicit operator bool() const { return pos < str.end(); }
[[gnu::always_inline]]
StringView substr_from(const char* start) const
namespace
{
kak_assert(start <= pos);
return {start, pos};
}
Optional<Codepoint> peek_next() const
{
auto next = utf8::next(pos, str.end());
if (next != str.end())
return utf8::codepoint(next, str.end());
return {};
}
BufferCoord coord() const
{
return {line, (int)(pos - line_start)};
}
StringView str;
const char* pos;
const char* line_start;
LineCount line;
};
bool is_command_separator(Codepoint c)
{
@ -160,8 +125,7 @@ StringView get_until_closing_delimiter(Reader& reader, Codepoint opening_delimit
return reader.substr_from(start);
}
template<bool throw_on_invalid>
Token::Type token_type(StringView type_name)
Token::Type token_type(StringView type_name, bool throw_on_invalid)
{
if (type_name == "")
return Token::Type::RawQuoted;
@ -188,7 +152,8 @@ void skip_blanks_and_comments(Reader& reader)
const Codepoint c = *reader;
if (is_horizontal_blank(c))
++reader;
else if (c == '\\' and reader.peek_next().value_or((Codepoint)'\0') == '\n')
else if (c == '\\' and reader.pos + 1 != reader.str.end() and
*(reader.pos + 1) == '\n')
++(++reader);
else if (c == '#')
{
@ -200,8 +165,7 @@ void skip_blanks_and_comments(Reader& reader)
}
}
template<bool throw_on_unterminated>
Token parse_percent_token(Reader& reader)
Token parse_percent_token(Reader& reader, bool throw_on_unterminated)
{
++reader;
const auto type_start = reader.pos;
@ -217,7 +181,7 @@ Token parse_percent_token(Reader& reader)
return {};
}
Token::Type type = token_type<throw_on_unterminated>(type_name);
Token::Type type = token_type(type_name, throw_on_unterminated);
constexpr struct CharPair { Codepoint opening; Codepoint closing; } matching_pairs[] = {
{ '(', ')' }, { '[', ']' }, { '{', '}' }, { '<', '>' }
@ -242,7 +206,7 @@ Token parse_percent_token(Reader& reader)
coord.line, coord.column, type_name,
opening_delimiter, closing_delimiter)};
return {type, start - str_beg, reader.pos - str_beg, coord, token.str()};
return {type, start - str_beg, coord, token.str()};
}
else
{
@ -253,7 +217,7 @@ Token parse_percent_token(Reader& reader)
coord.line, coord.column, type_name,
opening_delimiter, opening_delimiter)};
return {type, start - str_beg, reader.pos - str_beg, coord, std::move(token)};
return {type, start - str_beg, coord, std::move(token)};
}
}
@ -278,7 +242,6 @@ String expand_token(const Token& token, const Context& context,
}
str.resize(str.length() - trailing_eol_count, 0);
return str;
}
case Token::Type::RegisterExpand:
return context.main_sel_register_value(content).str();
@ -314,55 +277,51 @@ String expand_token(const Token& token, const Context& context,
}
template<bool throw_on_unterminated>
TokenList parse(StringView line)
CommandParser::CommandParser(StringView command_line) : m_reader{command_line} {}
Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
{
TokenList result;
skip_blanks_and_comments(m_reader);
if (not m_reader)
return {};
Reader reader{line};
while (true)
{
skip_blanks_and_comments(reader);
if (not reader)
break;
const StringView line = m_reader.str;
const char* start = m_reader.pos;
auto coord = m_reader.coord();
const char* start = reader.pos;
auto coord = reader.coord();
const Codepoint c = *reader;
const Codepoint c = *m_reader;
if (c == '"' or c == '\'')
{
start = (++reader).pos;
String token = get_until_delimiter(reader, c);
if (throw_on_unterminated and not reader)
start = (++m_reader).pos;
String token = get_until_delimiter(m_reader, c);
if (throw_on_unterminated and not m_reader)
throw parse_error{format("unterminated string {0}...{0}", c)};
result.push_back({c == '"' ? Token::Type::RawEval
++m_reader;
return Token{c == '"' ? Token::Type::RawEval
: Token::Type::RawQuoted,
start - line.begin(), reader.pos - line.begin(), coord, std::move(token)});
start - line.begin(), coord, std::move(token)};
}
else if (c == '%')
result.push_back(
parse_percent_token<throw_on_unterminated>(reader));
{
auto token = parse_percent_token(m_reader, throw_on_unterminated);
++m_reader;
return token;
}
else if (is_command_separator(*m_reader))
{
++m_reader;
return Token{Token::Type::CommandSeparator,
m_reader.pos - line.begin(), coord, {}};
}
else
{
String str = get_until_delimiter(reader, [](Codepoint c) {
String str = get_until_delimiter(m_reader, [](Codepoint c) {
return is_command_separator(c) or is_horizontal_blank(c);
});
if (not str.empty())
result.push_back({Token::Type::Raw, start - line.begin(), reader.pos - line.begin(),
coord, unescape(str, "%", '\\')});
if (reader and is_command_separator(*reader))
result.push_back({Token::Type::CommandSeparator,
reader.pos - line.begin(), utf8::next(reader.pos, line.end()) - line.begin(), coord, {}});
return Token{Token::Type::Raw, start - line.begin(),
coord, unescape(str, "%", '\\')};
}
if (not reader)
break;
++reader;
}
return result;
return {};
}
template<typename Postprocess>
@ -389,7 +348,7 @@ String expand_impl(StringView str, const Context& context,
else if (c == '%')
{
res += reader.substr_from(beg);
res += postprocess(expand_token(parse_percent_token<true>(reader),
res += postprocess(expand_token(parse_percent_token(reader, true),
context, shell_context));
beg = (++reader).pos;
}
@ -475,18 +434,29 @@ void CommandManager::execute_single_command(CommandParameters params,
void CommandManager::execute(StringView command_line,
Context& context, const ShellContext& shell_context)
{
TokenList tokens = parse<true>(command_line);
if (tokens.empty())
return;
// Tokens are going to be read as a stack
std::reverse(tokens.begin(), tokens.end());
CommandParser parser(command_line);
struct ShellParser {
ShellParser(String&& str) : output{std::move(str)}, parser{output} {}
String output;
CommandParser parser;
};
Vector<ShellParser> shell_parser_stack;
auto next_token = [&] {
while (not shell_parser_stack.empty())
{
if (auto shell_token = shell_parser_stack.back().parser.read_token(true))
return shell_token;
shell_parser_stack.pop_back();
}
return parser.read_token(true);
};
BufferCoord command_coord;
Vector<String> params;
while (not tokens.empty())
while (Optional<Token> token_opt = next_token())
{
Token token = std::move(tokens.back());
tokens.pop_back();
auto& token = *token_opt;
if (params.empty())
command_coord = token.coord;
@ -497,13 +467,7 @@ void CommandManager::execute(StringView command_line,
}
// Shell expand are retokenized
else if (token.type == Token::Type::ShellExpand)
{
auto new_tokens = parse<true>(expand_token(token, context,
shell_context));
tokens.insert(tokens.end(),
std::make_move_iterator(new_tokens.rbegin()),
std::make_move_iterator(new_tokens.rend()));
}
shell_parser_stack.emplace_back(expand_token(token, context, shell_context));
else if (token.type == Token::Type::ArgExpand and token.content == '@')
params.insert(params.end(), shell_context.params.begin(),
shell_context.params.end());
@ -515,20 +479,22 @@ void CommandManager::execute(StringView command_line,
Optional<CommandInfo> CommandManager::command_info(const Context& context, StringView command_line) const
{
TokenList tokens = parse<false>(command_line);
size_t cmd_idx = 0;
for (size_t i = 0; i < tokens.size(); ++i)
CommandParser parser{command_line};
Vector<Token> tokens;
while (auto token = parser.read_token(false))
{
if (tokens[i].type == Token::Type::CommandSeparator)
cmd_idx = i+1;
if (token->type == Token::Type::CommandSeparator)
tokens.clear();
else
tokens.push_back(std::move(*token));
}
if (cmd_idx == tokens.size() or
(tokens[cmd_idx].type != Token::Type::Raw and
tokens[cmd_idx].type != Token::Type::RawQuoted))
if (tokens.empty() or
(tokens.front().type != Token::Type::Raw and
tokens.front().type != Token::Type::RawQuoted))
return {};
auto cmd = find_command(context, tokens[cmd_idx].content);
auto cmd = find_command(context, tokens.front().content);
if (cmd == m_commands.end())
return {};
@ -540,9 +506,7 @@ Optional<CommandInfo> CommandManager::command_info(const Context& context, Strin
if (cmd->value.helper)
{
Vector<String> params;
for (auto it = tokens.begin() + cmd_idx + 1;
it != tokens.end() and it->type != Token::Type::CommandSeparator;
++it)
for (auto it = tokens.begin() + 1; it != tokens.end(); ++it)
{
if (it->type == Token::Type::Raw or
it->type == Token::Type::RawQuoted or
@ -560,7 +524,6 @@ Optional<CommandInfo> CommandManager::command_info(const Context& context, Strin
if (not aliases.empty())
res.info += format("Aliases:{}\n", aliases);
auto& switches = cmd->value.param_desc.switches;
if (not switches.empty())
res.info += format("Switches:\n{}", indent(generate_switches_doc(switches)));
@ -582,66 +545,68 @@ Completions CommandManager::complete(const Context& context,
StringView command_line,
ByteCount cursor_pos)
{
TokenList tokens = parse<false>(command_line);
CommandParser parser{command_line};
const char* cursor = command_line.begin() + (int)cursor_pos;
Vector<Token> tokens;
size_t cmd_idx = 0;
size_t tok_idx = tokens.size();
for (size_t i = 0; i < tokens.size(); ++i)
bool is_last_token = true;
while (auto token = parser.read_token(false))
{
if (tokens[i].type == Token::Type::CommandSeparator)
cmd_idx = i+1;
if (token->type == Token::Type::CommandSeparator)
{
tokens.clear();
continue;
}
if (tokens[i].begin <= cursor_pos and tokens[i].end >= cursor_pos)
tokens.push_back(std::move(*token));
if (parser.pos() >= cursor)
{
tok_idx = i;
is_last_token = false;
break;
}
}
const bool is_last_token = tok_idx == tokens.size();
if (is_last_token)
tokens.push_back({Token::Type::Raw, command_line.length(), parser.coord(), {}});
kak_assert(not tokens.empty());
const auto& token = tokens.back();
// command name completion
if (tokens.empty() or
(tok_idx == cmd_idx and (is_last_token or
tokens[tok_idx].type == Token::Type::Raw or
tokens[tok_idx].type == Token::Type::RawQuoted)))
if (tokens.size() == 1 and (token.type == Token::Type::Raw or
token.type == Token::Type::RawQuoted))
{
auto cmd_start = is_last_token ? cursor_pos : tokens[tok_idx].begin;
auto cmd_start = token.pos;
StringView query = command_line.substr(cmd_start, cursor_pos - cmd_start);
return offset_pos(complete_command_name(context, query), cmd_start);
}
kak_assert(not tokens.empty());
const ByteCount start = token.pos;
const ByteCount cursor_pos_in_token = cursor_pos - start;
ByteCount start = tok_idx < tokens.size() ?
tokens[tok_idx].begin : cursor_pos;
ByteCount cursor_pos_in_token = cursor_pos - start;
const Token::Type type = tok_idx < tokens.size() ?
tokens[tok_idx].type : Token::Type::Raw;
switch (type)
switch (token.type)
{
case Token::Type::OptionExpand:
return {start , cursor_pos,
GlobalScope::instance().option_registry().complete_option_name(
tokens[tok_idx].content, cursor_pos_in_token) };
token.content, cursor_pos_in_token) };
case Token::Type::ShellExpand:
return offset_pos(shell_complete(context, flags, tokens[tok_idx].content,
return offset_pos(shell_complete(context, flags, token.content,
cursor_pos_in_token), start);
case Token::Type::ValExpand:
return {start , cursor_pos,
ShellManager::instance().complete_env_var(
tokens[tok_idx].content, cursor_pos_in_token) };
token.content, cursor_pos_in_token) };
case Token::Type::Raw:
case Token::Type::RawQuoted:
case Token::Type::RawEval:
{
if (tokens[cmd_idx].type != Token::Type::Raw)
if (token.type != Token::Type::Raw)
return Completions{};
StringView command_name = tokens[cmd_idx].content;
StringView command_name = tokens.front().content;
if (command_name != m_last_complete_command)
{
m_last_complete_command = command_name.str();
@ -654,17 +619,15 @@ Completions CommandManager::complete(const Context& context,
return Completions();
Vector<String> params;
for (auto it = tokens.begin() + cmd_idx + 1; it != tokens.end(); ++it)
for (auto it = tokens.begin() + 1; it != tokens.end(); ++it)
params.push_back(it->content);
if (tok_idx == tokens.size())
params.emplace_back("");
Completions completions = offset_pos(command_it->value.completer(
context, flags, params, tok_idx - cmd_idx - 1,
context, flags, params, tokens.size() - 2,
cursor_pos_in_token), start);
if (type != Token::Type::RawQuoted)
if (token.type != Token::Type::RawQuoted)
{
StringView to_escape = type == Token::Type::Raw ? "% \t;" : "%";
StringView to_escape = token.type == Token::Type::Raw ? "% \t;" : "%";
for (auto& candidate : completions.candidates)
candidate = escape(candidate, to_escape, '\\');
}

View File

@ -55,16 +55,42 @@ struct Token
};
Type type;
ByteCount begin;
ByteCount end;
ByteCount pos;
BufferCoord coord;
String content;
};
using TokenList = Vector<Token>;
struct Reader
{
public:
Reader(StringView s) : str{s}, pos{s.begin()}, line_start{s.begin()}, line{} {}
template<bool throw_on_unterminated>
TokenList parse(StringView line);
Codepoint operator*() const;
Reader& operator++();
explicit operator bool() const { return pos < str.end(); }
StringView substr_from(const char* start) const { return {start, pos}; }
BufferCoord coord() const { return {line, (int)(pos - line_start)}; }
StringView str;
const char* pos;
const char* line_start;
LineCount line;
};
class CommandParser
{
public:
CommandParser(StringView command_line);
Optional<Token> read_token(bool throw_on_unterminated);
const char* pos() const { return m_reader.pos; }
BufferCoord coord() const { return m_reader.coord(); }
bool done() const { return not m_reader; }
private:
Reader m_reader;
};
class CommandManager : public Singleton<CommandManager>
{

View File

@ -509,19 +509,18 @@ HighlighterAndId create_dynamic_regex_highlighter(HighlighterParameters params)
};
auto get_face = [faces](const Context& context){ return faces;; };
String expr = params[0];
auto tokens = parse<true>(expr);
if (tokens.size() == 1 and tokens[0].type == Token::Type::OptionExpand and
GlobalScope::instance().options()[tokens[0].content].is_of_type<Regex>())
CommandParser parser{params[0]};
auto token = parser.read_token(true);
if (token and parser.done() and token->type == Token::Type::OptionExpand and
GlobalScope::instance().options()[token->content].is_of_type<Regex>())
{
String option_name = tokens[0].content;
auto get_regex = [option_name](const Context& context) {
auto get_regex = [option_name = token->content](const Context& context) {
return context.options()[option_name].get<Regex>();
};
return {format("dynregex_{}", expr), make_hl(get_regex, get_face)};
return {format("dynregex_{}", params[0]), make_hl(get_regex, get_face)};
}
auto get_regex = [expr](const Context& context){
auto get_regex = [expr = params[0]](const Context& context){
try
{
auto re = expand(expr, context);
@ -533,7 +532,7 @@ HighlighterAndId create_dynamic_regex_highlighter(HighlighterParameters params)
return Regex{};
}
};
return {format("dynregex_{}", expr), make_hl(get_regex, get_face)};
return {format("dynregex_{}", params[0]), make_hl(get_regex, get_face)};
}
HighlighterAndId create_line_highlighter(HighlighterParameters params)