From bde1f5349d218298de04ee453b304de3c053800e Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Thu, 15 Feb 2018 21:23:12 +1100 Subject: [PATCH] CommandManager: refactor parsing of commands to iterate through tokens Avoid storing a big vector of tokens, read them one by one, and store only the current command. --- src/command_manager.cc | 297 ++++++++++++++++++----------------------- src/command_manager.hh | 36 ++++- src/highlighters.cc | 17 ++- 3 files changed, 169 insertions(+), 181 deletions(-) diff --git a/src/command_manager.cc b/src/command_manager.cc index e5854b7b..cbc18954 100644 --- a/src/command_manager.cc +++ b/src/command_manager.cc @@ -43,59 +43,24 @@ struct parse_error : runtime_error : runtime_error{format("parse error: {}", error)} {} }; +Codepoint Reader::operator*() const +{ + kak_assert(pos < str.end()); + return utf8::codepoint(pos, str.end()); +} + +Reader& Reader::operator++() +{ + kak_assert(pos < str.end()); + if (*pos == '\n') + ++line; + utf8::to_next(pos, str.end()); + return *this; +} + namespace { -struct Reader -{ -public: - Reader(StringView s) : str{s}, pos{s.begin()}, line_start{s.begin()}, line{} {} - - [[gnu::always_inline]] - Codepoint operator*() const - { - kak_assert(pos < str.end()); - return utf8::codepoint(pos, str.end()); - } - - Reader& operator++() - { - kak_assert(pos < str.end()); - if (*pos == '\n') - ++line; - utf8::to_next(pos, str.end()); - return *this; - } - - [[gnu::always_inline]] - explicit operator bool() const { return pos < str.end(); } - - [[gnu::always_inline]] - StringView substr_from(const char* start) const - { - kak_assert(start <= pos); - return {start, pos}; - } - - Optional peek_next() const - { - auto next = utf8::next(pos, str.end()); - if (next != str.end()) - return utf8::codepoint(next, str.end()); - return {}; - } - - BufferCoord coord() const - { - return {line, (int)(pos - line_start)}; - } - - StringView str; - const char* pos; - const char* line_start; - LineCount line; -}; - bool is_command_separator(Codepoint c) { return c == ';' or c == '\n'; @@ -160,8 +125,7 @@ StringView get_until_closing_delimiter(Reader& reader, Codepoint opening_delimit return reader.substr_from(start); } -template -Token::Type token_type(StringView type_name) +Token::Type token_type(StringView type_name, bool throw_on_invalid) { if (type_name == "") return Token::Type::RawQuoted; @@ -188,7 +152,8 @@ void skip_blanks_and_comments(Reader& reader) const Codepoint c = *reader; if (is_horizontal_blank(c)) ++reader; - else if (c == '\\' and reader.peek_next().value_or((Codepoint)'\0') == '\n') + else if (c == '\\' and reader.pos + 1 != reader.str.end() and + *(reader.pos + 1) == '\n') ++(++reader); else if (c == '#') { @@ -200,8 +165,7 @@ void skip_blanks_and_comments(Reader& reader) } } -template -Token parse_percent_token(Reader& reader) +Token parse_percent_token(Reader& reader, bool throw_on_unterminated) { ++reader; const auto type_start = reader.pos; @@ -217,7 +181,7 @@ Token parse_percent_token(Reader& reader) return {}; } - Token::Type type = token_type(type_name); + Token::Type type = token_type(type_name, throw_on_unterminated); constexpr struct CharPair { Codepoint opening; Codepoint closing; } matching_pairs[] = { { '(', ')' }, { '[', ']' }, { '{', '}' }, { '<', '>' } @@ -242,7 +206,7 @@ Token parse_percent_token(Reader& reader) coord.line, coord.column, type_name, opening_delimiter, closing_delimiter)}; - return {type, start - str_beg, reader.pos - str_beg, coord, token.str()}; + return {type, start - str_beg, coord, token.str()}; } else { @@ -253,7 +217,7 @@ Token parse_percent_token(Reader& reader) coord.line, coord.column, type_name, opening_delimiter, opening_delimiter)}; - return {type, start - str_beg, reader.pos - str_beg, coord, std::move(token)}; + return {type, start - str_beg, coord, std::move(token)}; } } @@ -278,7 +242,6 @@ String expand_token(const Token& token, const Context& context, } str.resize(str.length() - trailing_eol_count, 0); return str; - } case Token::Type::RegisterExpand: return context.main_sel_register_value(content).str(); @@ -314,55 +277,51 @@ String expand_token(const Token& token, const Context& context, } -template -TokenList parse(StringView line) +CommandParser::CommandParser(StringView command_line) : m_reader{command_line} {} + +Optional CommandParser::read_token(bool throw_on_unterminated) { - TokenList result; + skip_blanks_and_comments(m_reader); + if (not m_reader) + return {}; - Reader reader{line}; - while (true) + const StringView line = m_reader.str; + const char* start = m_reader.pos; + auto coord = m_reader.coord(); + + const Codepoint c = *m_reader; + if (c == '"' or c == '\'') { - skip_blanks_and_comments(reader); - if (not reader) - break; - - const char* start = reader.pos; - auto coord = reader.coord(); - - const Codepoint c = *reader; - if (c == '"' or c == '\'') - { - start = (++reader).pos; - String token = get_until_delimiter(reader, c); - if (throw_on_unterminated and not reader) - throw parse_error{format("unterminated string {0}...{0}", c)}; - result.push_back({c == '"' ? Token::Type::RawEval - : Token::Type::RawQuoted, - start - line.begin(), reader.pos - line.begin(), coord, std::move(token)}); - } - else if (c == '%') - result.push_back( - parse_percent_token(reader)); - else - { - String str = get_until_delimiter(reader, [](Codepoint c) { - return is_command_separator(c) or is_horizontal_blank(c); - }); - - if (not str.empty()) - result.push_back({Token::Type::Raw, start - line.begin(), reader.pos - line.begin(), - coord, unescape(str, "%", '\\')}); - - if (reader and is_command_separator(*reader)) - result.push_back({Token::Type::CommandSeparator, - reader.pos - line.begin(), utf8::next(reader.pos, line.end()) - line.begin(), coord, {}}); - } - - if (not reader) - break; - ++reader; + start = (++m_reader).pos; + String token = get_until_delimiter(m_reader, c); + if (throw_on_unterminated and not m_reader) + throw parse_error{format("unterminated string {0}...{0}", c)}; + ++m_reader; + return Token{c == '"' ? Token::Type::RawEval + : Token::Type::RawQuoted, + start - line.begin(), coord, std::move(token)}; } - return result; + else if (c == '%') + { + auto token = parse_percent_token(m_reader, throw_on_unterminated); + ++m_reader; + return token; + } + else if (is_command_separator(*m_reader)) + { + ++m_reader; + return Token{Token::Type::CommandSeparator, + m_reader.pos - line.begin(), coord, {}}; + } + else + { + String str = get_until_delimiter(m_reader, [](Codepoint c) { + return is_command_separator(c) or is_horizontal_blank(c); + }); + return Token{Token::Type::Raw, start - line.begin(), + coord, unescape(str, "%", '\\')}; + } + return {}; } template @@ -389,7 +348,7 @@ String expand_impl(StringView str, const Context& context, else if (c == '%') { res += reader.substr_from(beg); - res += postprocess(expand_token(parse_percent_token(reader), + res += postprocess(expand_token(parse_percent_token(reader, true), context, shell_context)); beg = (++reader).pos; } @@ -475,18 +434,29 @@ void CommandManager::execute_single_command(CommandParameters params, void CommandManager::execute(StringView command_line, Context& context, const ShellContext& shell_context) { - TokenList tokens = parse(command_line); - if (tokens.empty()) - return; - // Tokens are going to be read as a stack - std::reverse(tokens.begin(), tokens.end()); + CommandParser parser(command_line); + struct ShellParser { + ShellParser(String&& str) : output{std::move(str)}, parser{output} {} + String output; + CommandParser parser; + }; + Vector shell_parser_stack; + + auto next_token = [&] { + while (not shell_parser_stack.empty()) + { + if (auto shell_token = shell_parser_stack.back().parser.read_token(true)) + return shell_token; + shell_parser_stack.pop_back(); + } + return parser.read_token(true); + }; BufferCoord command_coord; Vector params; - while (not tokens.empty()) + while (Optional token_opt = next_token()) { - Token token = std::move(tokens.back()); - tokens.pop_back(); + auto& token = *token_opt; if (params.empty()) command_coord = token.coord; @@ -497,13 +467,7 @@ void CommandManager::execute(StringView command_line, } // Shell expand are retokenized else if (token.type == Token::Type::ShellExpand) - { - auto new_tokens = parse(expand_token(token, context, - shell_context)); - tokens.insert(tokens.end(), - std::make_move_iterator(new_tokens.rbegin()), - std::make_move_iterator(new_tokens.rend())); - } + shell_parser_stack.emplace_back(expand_token(token, context, shell_context)); else if (token.type == Token::Type::ArgExpand and token.content == '@') params.insert(params.end(), shell_context.params.begin(), shell_context.params.end()); @@ -515,20 +479,22 @@ void CommandManager::execute(StringView command_line, Optional CommandManager::command_info(const Context& context, StringView command_line) const { - TokenList tokens = parse(command_line); - size_t cmd_idx = 0; - for (size_t i = 0; i < tokens.size(); ++i) + CommandParser parser{command_line}; + Vector tokens; + while (auto token = parser.read_token(false)) { - if (tokens[i].type == Token::Type::CommandSeparator) - cmd_idx = i+1; + if (token->type == Token::Type::CommandSeparator) + tokens.clear(); + else + tokens.push_back(std::move(*token)); } - if (cmd_idx == tokens.size() or - (tokens[cmd_idx].type != Token::Type::Raw and - tokens[cmd_idx].type != Token::Type::RawQuoted)) + if (tokens.empty() or + (tokens.front().type != Token::Type::Raw and + tokens.front().type != Token::Type::RawQuoted)) return {}; - auto cmd = find_command(context, tokens[cmd_idx].content); + auto cmd = find_command(context, tokens.front().content); if (cmd == m_commands.end()) return {}; @@ -540,9 +506,7 @@ Optional CommandManager::command_info(const Context& context, Strin if (cmd->value.helper) { Vector params; - for (auto it = tokens.begin() + cmd_idx + 1; - it != tokens.end() and it->type != Token::Type::CommandSeparator; - ++it) + for (auto it = tokens.begin() + 1; it != tokens.end(); ++it) { if (it->type == Token::Type::Raw or it->type == Token::Type::RawQuoted or @@ -560,7 +524,6 @@ Optional CommandManager::command_info(const Context& context, Strin if (not aliases.empty()) res.info += format("Aliases:{}\n", aliases); - auto& switches = cmd->value.param_desc.switches; if (not switches.empty()) res.info += format("Switches:\n{}", indent(generate_switches_doc(switches))); @@ -582,66 +545,68 @@ Completions CommandManager::complete(const Context& context, StringView command_line, ByteCount cursor_pos) { - TokenList tokens = parse(command_line); + CommandParser parser{command_line}; + const char* cursor = command_line.begin() + (int)cursor_pos; + Vector tokens; - size_t cmd_idx = 0; - size_t tok_idx = tokens.size(); - for (size_t i = 0; i < tokens.size(); ++i) + bool is_last_token = true; + while (auto token = parser.read_token(false)) { - if (tokens[i].type == Token::Type::CommandSeparator) - cmd_idx = i+1; - - if (tokens[i].begin <= cursor_pos and tokens[i].end >= cursor_pos) + if (token->type == Token::Type::CommandSeparator) { - tok_idx = i; + tokens.clear(); + continue; + } + + tokens.push_back(std::move(*token)); + if (parser.pos() >= cursor) + { + is_last_token = false; break; } } - const bool is_last_token = tok_idx == tokens.size(); + if (is_last_token) + tokens.push_back({Token::Type::Raw, command_line.length(), parser.coord(), {}}); + kak_assert(not tokens.empty()); + const auto& token = tokens.back(); + // command name completion - if (tokens.empty() or - (tok_idx == cmd_idx and (is_last_token or - tokens[tok_idx].type == Token::Type::Raw or - tokens[tok_idx].type == Token::Type::RawQuoted))) + if (tokens.size() == 1 and (token.type == Token::Type::Raw or + token.type == Token::Type::RawQuoted)) { - auto cmd_start = is_last_token ? cursor_pos : tokens[tok_idx].begin; + auto cmd_start = token.pos; StringView query = command_line.substr(cmd_start, cursor_pos - cmd_start); return offset_pos(complete_command_name(context, query), cmd_start); } - kak_assert(not tokens.empty()); + const ByteCount start = token.pos; + const ByteCount cursor_pos_in_token = cursor_pos - start; - ByteCount start = tok_idx < tokens.size() ? - tokens[tok_idx].begin : cursor_pos; - ByteCount cursor_pos_in_token = cursor_pos - start; - - const Token::Type type = tok_idx < tokens.size() ? - tokens[tok_idx].type : Token::Type::Raw; - switch (type) + switch (token.type) { case Token::Type::OptionExpand: return {start , cursor_pos, GlobalScope::instance().option_registry().complete_option_name( - tokens[tok_idx].content, cursor_pos_in_token) }; + token.content, cursor_pos_in_token) }; case Token::Type::ShellExpand: - return offset_pos(shell_complete(context, flags, tokens[tok_idx].content, + return offset_pos(shell_complete(context, flags, token.content, cursor_pos_in_token), start); case Token::Type::ValExpand: return {start , cursor_pos, ShellManager::instance().complete_env_var( - tokens[tok_idx].content, cursor_pos_in_token) }; + token.content, cursor_pos_in_token) }; case Token::Type::Raw: case Token::Type::RawQuoted: case Token::Type::RawEval: { - if (tokens[cmd_idx].type != Token::Type::Raw) + if (token.type != Token::Type::Raw) return Completions{}; - StringView command_name = tokens[cmd_idx].content; + StringView command_name = tokens.front().content; if (command_name != m_last_complete_command) { m_last_complete_command = command_name.str(); @@ -654,17 +619,15 @@ Completions CommandManager::complete(const Context& context, return Completions(); Vector params; - for (auto it = tokens.begin() + cmd_idx + 1; it != tokens.end(); ++it) + for (auto it = tokens.begin() + 1; it != tokens.end(); ++it) params.push_back(it->content); - if (tok_idx == tokens.size()) - params.emplace_back(""); Completions completions = offset_pos(command_it->value.completer( - context, flags, params, tok_idx - cmd_idx - 1, + context, flags, params, tokens.size() - 2, cursor_pos_in_token), start); - if (type != Token::Type::RawQuoted) + if (token.type != Token::Type::RawQuoted) { - StringView to_escape = type == Token::Type::Raw ? "% \t;" : "%"; + StringView to_escape = token.type == Token::Type::Raw ? "% \t;" : "%"; for (auto& candidate : completions.candidates) candidate = escape(candidate, to_escape, '\\'); } diff --git a/src/command_manager.hh b/src/command_manager.hh index 87ce3f1c..b5b49dd9 100644 --- a/src/command_manager.hh +++ b/src/command_manager.hh @@ -55,16 +55,42 @@ struct Token }; Type type; - ByteCount begin; - ByteCount end; + ByteCount pos; BufferCoord coord; String content; }; -using TokenList = Vector; +struct Reader +{ +public: + Reader(StringView s) : str{s}, pos{s.begin()}, line_start{s.begin()}, line{} {} -template -TokenList parse(StringView line); + Codepoint operator*() const; + Reader& operator++(); + + explicit operator bool() const { return pos < str.end(); } + StringView substr_from(const char* start) const { return {start, pos}; } + BufferCoord coord() const { return {line, (int)(pos - line_start)}; } + + StringView str; + const char* pos; + const char* line_start; + LineCount line; +}; + +class CommandParser +{ +public: + CommandParser(StringView command_line); + Optional read_token(bool throw_on_unterminated); + + const char* pos() const { return m_reader.pos; } + BufferCoord coord() const { return m_reader.coord(); } + bool done() const { return not m_reader; } + +private: + Reader m_reader; +}; class CommandManager : public Singleton { diff --git a/src/highlighters.cc b/src/highlighters.cc index 37e1185f..626a4e60 100644 --- a/src/highlighters.cc +++ b/src/highlighters.cc @@ -509,19 +509,18 @@ HighlighterAndId create_dynamic_regex_highlighter(HighlighterParameters params) }; auto get_face = [faces](const Context& context){ return faces;; }; - String expr = params[0]; - auto tokens = parse(expr); - if (tokens.size() == 1 and tokens[0].type == Token::Type::OptionExpand and - GlobalScope::instance().options()[tokens[0].content].is_of_type()) + CommandParser parser{params[0]}; + auto token = parser.read_token(true); + if (token and parser.done() and token->type == Token::Type::OptionExpand and + GlobalScope::instance().options()[token->content].is_of_type()) { - String option_name = tokens[0].content; - auto get_regex = [option_name](const Context& context) { + auto get_regex = [option_name = token->content](const Context& context) { return context.options()[option_name].get(); }; - return {format("dynregex_{}", expr), make_hl(get_regex, get_face)}; + return {format("dynregex_{}", params[0]), make_hl(get_regex, get_face)}; } - auto get_regex = [expr](const Context& context){ + auto get_regex = [expr = params[0]](const Context& context){ try { auto re = expand(expr, context); @@ -533,7 +532,7 @@ HighlighterAndId create_dynamic_regex_highlighter(HighlighterParameters params) return Regex{}; } }; - return {format("dynregex_{}", expr), make_hl(get_regex, get_face)}; + return {format("dynregex_{}", params[0]), make_hl(get_regex, get_face)}; } HighlighterAndId create_line_highlighter(HighlighterParameters params)