Refactor command parsing, maintain coordinates while reading the string

This commit is contained in:
Maxime Coste 2015-08-27 20:48:51 +01:00
parent b5bdae8271
commit 36b82c42e5

View File

@ -6,6 +6,7 @@
#include "register_manager.hh" #include "register_manager.hh"
#include "shell_manager.hh" #include "shell_manager.hh"
#include "utils.hh" #include "utils.hh"
#include "optional.hh"
#include <algorithm> #include <algorithm>
@ -56,76 +57,125 @@ struct Token
}; };
Token() : m_type(Type::Raw) {} Token() : m_type(Type::Raw) {}
Token(Type type, ByteCount b, ByteCount e, String str = "") Token(Type type, ByteCount b, ByteCount e, CharCoord coord, String str = "")
: m_type(type), m_begin(b), m_end(e), m_content(str) {} : m_type(type), m_begin(b), m_end(e), m_coord(coord), m_content(str) {}
Type type() const { return m_type; } Type type() const { return m_type; }
ByteCount begin() const { return m_begin; } ByteCount begin() const { return m_begin; }
ByteCount end() const { return m_end; } ByteCount end() const { return m_end; }
CharCoord coord() const { return m_coord; }
const String& content() const { return m_content; } const String& content() const { return m_content; }
private: private:
Type m_type; Type m_type;
ByteCount m_begin; ByteCount m_begin;
ByteCount m_end; ByteCount m_end;
CharCoord m_coord;
String m_content; String m_content;
}; };
using TokenList = Vector<Token>; using TokenList = Vector<Token>;
struct Reader
{
public:
[[gnu::always_inline]]
char operator*() const { return str[pos]; }
Reader& operator++()
{
if (str[pos++] == '\n')
{
++coord.line;
coord.column = 0;
}
else
++coord.column;
return *this;
}
[[gnu::always_inline]]
explicit operator bool() const { return pos < str.length(); }
[[gnu::always_inline]]
StringView substr_from(ByteCount start) const
{
return str.substr(start, pos - start);
}
Optional<char> peek_next() const
{
if (pos+1 != str.length())
return str[pos+1];
return {};
}
StringView str;
ByteCount pos;
CharCoord coord;
};
bool is_command_separator(char c) bool is_command_separator(char c)
{ {
return c == ';' or c == '\n'; return c == ';' or c == '\n';
} }
String get_until_delimiter(StringView base, ByteCount& pos, char delimiter) template<typename Func>
String get_until_delimiter(Reader& reader, Func is_delimiter)
{ {
const ByteCount length = base.length(); auto beg = reader.pos;
ByteCount beg = pos;
String str; String str;
while (pos < length) bool was_antislash = false;
while (reader)
{ {
char c = base[pos]; const char c = *reader;
if (c == delimiter) if (is_delimiter(c))
{ {
str += base.substr(beg, pos - beg); str += reader.substr_from(beg);
if (pos != 0 and base[pos-1] == '\\') if (was_antislash)
{ {
str.back() = delimiter; str.back() = c;
beg = pos+1; beg = reader.pos+1;
} }
else else
return str; return str;
} }
++pos; was_antislash = c == '\\';
++reader;
} }
if (beg < length) if (beg < reader.str.length())
str += base.substr(beg, pos - beg); str += reader.substr_from(beg);
return str; return str;
} }
String get_until_delimiter(StringView base, ByteCount& pos, [[gnu::always_inline]]
char opening_delimiter, char closing_delimiter) inline String get_until_delimiter(Reader& reader, char c)
{ {
kak_assert(base[pos-1] == opening_delimiter); return get_until_delimiter(reader, [c](char ch) { return c == ch; });
const ByteCount length = base.length(); }
StringView get_until_closing_delimiter(Reader& reader, char opening_delimiter,
char closing_delimiter)
{
kak_assert(reader.str[reader.pos-1] == opening_delimiter);
int level = 0; int level = 0;
ByteCount start = pos; auto start = reader.pos;
while (pos != length) while (reader)
{ {
if (base[pos] == opening_delimiter) const char c = *reader;
if (c == opening_delimiter)
++level; ++level;
else if (base[pos] == closing_delimiter) else if (c == closing_delimiter)
{ {
if (level > 0) if (level > 0)
--level; --level;
else else
break; break;
} }
++pos; ++reader;
} }
return base.substr(start, pos - start).str(); return reader.substr_from(start);
} }
struct unknown_expand : parse_error struct unknown_expand : parse_error
@ -153,22 +203,19 @@ Token::Type token_type(StringView type_name)
return Token::Type::Raw; return Token::Type::Raw;
} }
void skip_blanks_and_comments(StringView base, ByteCount& pos) void skip_blanks_and_comments(Reader& reader)
{ {
const ByteCount length = base.length(); while (reader)
while (pos != length)
{ {
if (is_horizontal_blank(base[pos])) const char c = *reader;
++pos; if (is_horizontal_blank(c))
else if (base[pos] == '\\' and pos+1 < length and base[pos+1] == '\n') ++reader;
pos += 2; else if (c == '\\' and reader.peek_next().value_or('\0') == '\n')
else if (base[pos] == '#') ++(++reader);
else if (c == '#')
{ {
while (pos != length) for (bool eol = false; reader and not eol; ++reader)
{ eol = *reader == '\n';
if (base[pos++] == '\n')
break;
}
} }
else else
break; break;
@ -176,15 +223,15 @@ void skip_blanks_and_comments(StringView base, ByteCount& pos)
} }
template<bool throw_on_unterminated> template<bool throw_on_unterminated>
Token parse_percent_token(StringView line, ByteCount& pos) Token parse_percent_token(Reader& reader)
{ {
const ByteCount length = line.length(); ++reader;
const ByteCount type_start = ++pos; const ByteCount type_start = reader.pos;
while (pos < length and isalpha(line[pos])) while (reader and isalpha(*reader))
++pos; ++reader;
StringView type_name = line.substr(type_start, pos - type_start); StringView type_name = reader.substr_from(type_start);
if (throw_on_unterminated and pos == length) if (throw_on_unterminated and not reader)
throw parse_error{format("expected a string delimiter after '%{}'", throw parse_error{format("expected a string delimiter after '%{}'",
type_name)}; type_name)};
@ -193,25 +240,34 @@ Token parse_percent_token(StringView line, ByteCount& pos)
{ '(', ')' }, { '[', ']' }, { '{', '}' }, { '<', '>' } { '(', ')' }, { '[', ']' }, { '{', '}' }, { '<', '>' }
}; };
char opening_delimiter = line[pos]; char opening_delimiter = *reader;
ByteCount token_start = ++pos; ++reader;
auto start = reader.pos;
auto coord = reader.coord;
auto delim_it = matching_delimiters.find(opening_delimiter); auto delim_it = matching_delimiters.find(opening_delimiter);
if (delim_it != matching_delimiters.end()) if (delim_it != matching_delimiters.end())
{ {
char closing_delimiter = delim_it->second; const char closing_delimiter = delim_it->second;
String token = get_until_delimiter(line, pos, opening_delimiter, auto token = get_until_closing_delimiter(reader, opening_delimiter,
closing_delimiter); closing_delimiter);
if (throw_on_unterminated and pos == length) if (throw_on_unterminated and not reader)
throw parse_error{format("unterminated string '%{}{}...{}'", throw parse_error{format("{}:{}: unterminated string '%{}{}...{}'",
type_name, opening_delimiter, coord.line, coord.column, type_name,
closing_delimiter)}; opening_delimiter, closing_delimiter)};
return {type, token_start, pos, std::move(token)};
return {type, start, reader.pos, coord, token.str()};
} }
else else
{ {
String token = get_until_delimiter(line, pos, opening_delimiter); String token = get_until_delimiter(reader, opening_delimiter);
return {type, token_start, pos, std::move(token)};
if (throw_on_unterminated and not reader)
throw parse_error{format("{}:{}: unterminated string '%{}{}...{}'",
coord.line, coord.column, type_name,
opening_delimiter, opening_delimiter)};
return {type, start, reader.pos, coord, std::move(token)};
} }
} }
@ -220,50 +276,44 @@ TokenList parse(StringView line)
{ {
TokenList result; TokenList result;
const ByteCount length = line.length(); Reader reader{line};
ByteCount pos = 0; while (reader)
while (pos < length)
{ {
skip_blanks_and_comments(line, pos); skip_blanks_and_comments(reader);
ByteCount token_start = pos; ByteCount start = reader.pos;
ByteCount start_pos = pos; auto coord = reader.coord;
if (line[pos] == '"' or line[pos] == '\'') const char c = *reader;
if (c == '"' or c == '\'')
{ {
char delimiter = line[pos]; start = (++reader).pos;
String token = get_until_delimiter(reader, c);
token_start = ++pos; if (throw_on_unterminated and not reader)
String token = get_until_delimiter(line, pos, delimiter); throw parse_error{format("unterminated string {0}...{0}", c)};
if (throw_on_unterminated and pos == length) result.emplace_back(c == '"' ? Token::Type::RawEval
throw parse_error{format("unterminated string {0}...{0}", delimiter)}; : Token::Type::Raw,
result.emplace_back(delimiter == '"' ? Token::Type::RawEval start, reader.pos, coord, std::move(token));
: Token::Type::Raw,
token_start, pos, std::move(token));
} }
else if (line[pos] == '%') else if (c == '%')
result.push_back( result.push_back(
parse_percent_token<throw_on_unterminated>(line, pos)); parse_percent_token<throw_on_unterminated>(reader));
else else
{ {
while (pos != length and String str = get_until_delimiter(reader, [](char c) {
((not is_command_separator(line[pos]) and return is_command_separator(c) or is_horizontal_blank(c);
not is_horizontal_blank(line[pos])) });
or (pos != 0 and line[pos-1] == '\\')))
++pos; if (not str.empty())
if (start_pos != pos) result.emplace_back(Token::Type::Raw, start, reader.pos,
{ coord, std::move(str));
result.emplace_back(
Token::Type::Raw, token_start, pos,
unescape(line.substr(token_start, pos - token_start),
" \t;\n", '\\'));
}
} }
if (is_command_separator(line[pos])) if (is_command_separator(*reader))
result.emplace_back(Token::Type::CommandSeparator, pos, pos+1); result.emplace_back(Token::Type::CommandSeparator,
reader.pos, reader.pos+1, coord);
++pos; ++reader;
} }
return result; return result;
} }
@ -305,32 +355,33 @@ String expand(StringView str, const Context& context,
ConstArrayView<String> shell_params, ConstArrayView<String> shell_params,
const EnvVarMap& env_vars) const EnvVarMap& env_vars)
{ {
Reader reader{str};
String res; String res;
auto pos = 0_byte, beg = 0_byte; auto beg = 0_byte;
auto length = str.length(); while (reader)
while (pos < length)
{ {
if (str[pos] == '\\') char c = *reader;
if (c == '\\')
{ {
char c = str[++pos]; c = *++reader;
if (c == '%' or c == '\\') if (c == '%' or c == '\\')
{ {
res += str.substr(beg, pos - beg); res += reader.substr_from(beg);
res.back() = c; res.back() = c;
beg = ++pos; beg = (++reader).pos;
} }
} }
else if (str[pos] == '%') else if (c == '%')
{ {
res += str.substr(beg, pos - beg); res += reader.substr_from(beg);
Token token = parse_percent_token<true>(str, pos); Token token = parse_percent_token<true>(reader);
res += expand_token(token, context, shell_params, env_vars); res += expand_token(token, context, shell_params, env_vars);
beg = ++pos; beg = (++reader).pos;
} }
else else
++pos; ++reader;
} }
res += str.substr(beg, pos - beg); res += reader.substr_from(beg);
return res; return res;
} }
@ -374,25 +425,6 @@ void CommandManager::execute_single_command(CommandParameters params,
} }
} }
static CharCoord find_coord(StringView str, ByteCount offset)
{
CharCoord res;
auto it = str.begin();
auto line_start = it;
while (it != str.end() and offset > 0)
{
if (*it == '\n')
{
line_start = it + 1;
++res.line;
}
++it;
--offset;
}
res.column = utf8::distance(line_start, it);
return res;
}
void CommandManager::execute(StringView command_line, void CommandManager::execute(StringView command_line,
Context& context, Context& context,
ConstArrayView<String> shell_params, ConstArrayView<String> shell_params,
@ -407,7 +439,7 @@ void CommandManager::execute(StringView command_line,
for (auto it = tokens.begin(); it != tokens.end(); ++it) for (auto it = tokens.begin(); it != tokens.end(); ++it)
{ {
if (params.empty()) if (params.empty())
command_coord = find_coord(command_line, it->begin()); command_coord = it->coord();
if (it->type() == Token::Type::CommandSeparator) if (it->type() == Token::Type::CommandSeparator)
{ {
@ -545,9 +577,9 @@ Completions CommandManager::complete(const Context& context,
tokens[tok_idx].begin() : cursor_pos; tokens[tok_idx].begin() : cursor_pos;
ByteCount cursor_pos_in_token = cursor_pos - start; ByteCount cursor_pos_in_token = cursor_pos - start;
const Token::Type token_type = tok_idx < tokens.size() ? const Token::Type type = tok_idx < tokens.size() ?
tokens[tok_idx].type() : Token::Type::Raw; tokens[tok_idx].type() : Token::Type::Raw;
switch (token_type) switch (type)
{ {
case Token::Type::OptionExpand: case Token::Type::OptionExpand:
return {start , cursor_pos, return {start , cursor_pos,
@ -571,9 +603,8 @@ Completions CommandManager::complete(const Context& context,
return Completions(); return Completions();
Vector<String> params; Vector<String> params;
for (auto token_it = tokens.begin() + cmd_idx + 1; for (auto it = tokens.begin() + cmd_idx + 1; it != tokens.end(); ++it)
token_it != tokens.end(); ++token_it) params.push_back(it->content());
params.push_back(token_it->content());
if (tok_idx == tokens.size()) if (tok_idx == tokens.size())
params.push_back(""); params.push_back("");
Completions completions = command_it->second.completer( Completions completions = command_it->second.completer(