Refactor command parsing, maintain coordinates while reading the string
This commit is contained in:
parent
b5bdae8271
commit
36b82c42e5
|
@ -6,6 +6,7 @@
|
||||||
#include "register_manager.hh"
|
#include "register_manager.hh"
|
||||||
#include "shell_manager.hh"
|
#include "shell_manager.hh"
|
||||||
#include "utils.hh"
|
#include "utils.hh"
|
||||||
|
#include "optional.hh"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
|
@ -56,76 +57,125 @@ struct Token
|
||||||
};
|
};
|
||||||
Token() : m_type(Type::Raw) {}
|
Token() : m_type(Type::Raw) {}
|
||||||
|
|
||||||
Token(Type type, ByteCount b, ByteCount e, String str = "")
|
Token(Type type, ByteCount b, ByteCount e, CharCoord coord, String str = "")
|
||||||
: m_type(type), m_begin(b), m_end(e), m_content(str) {}
|
: m_type(type), m_begin(b), m_end(e), m_coord(coord), m_content(str) {}
|
||||||
|
|
||||||
Type type() const { return m_type; }
|
Type type() const { return m_type; }
|
||||||
ByteCount begin() const { return m_begin; }
|
ByteCount begin() const { return m_begin; }
|
||||||
ByteCount end() const { return m_end; }
|
ByteCount end() const { return m_end; }
|
||||||
|
CharCoord coord() const { return m_coord; }
|
||||||
const String& content() const { return m_content; }
|
const String& content() const { return m_content; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Type m_type;
|
Type m_type;
|
||||||
ByteCount m_begin;
|
ByteCount m_begin;
|
||||||
ByteCount m_end;
|
ByteCount m_end;
|
||||||
|
CharCoord m_coord;
|
||||||
String m_content;
|
String m_content;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
using TokenList = Vector<Token>;
|
using TokenList = Vector<Token>;
|
||||||
|
|
||||||
|
struct Reader
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
[[gnu::always_inline]]
|
||||||
|
char operator*() const { return str[pos]; }
|
||||||
|
|
||||||
|
Reader& operator++()
|
||||||
|
{
|
||||||
|
if (str[pos++] == '\n')
|
||||||
|
{
|
||||||
|
++coord.line;
|
||||||
|
coord.column = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
++coord.column;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[gnu::always_inline]]
|
||||||
|
explicit operator bool() const { return pos < str.length(); }
|
||||||
|
|
||||||
|
[[gnu::always_inline]]
|
||||||
|
StringView substr_from(ByteCount start) const
|
||||||
|
{
|
||||||
|
return str.substr(start, pos - start);
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<char> peek_next() const
|
||||||
|
{
|
||||||
|
if (pos+1 != str.length())
|
||||||
|
return str[pos+1];
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
StringView str;
|
||||||
|
ByteCount pos;
|
||||||
|
CharCoord coord;
|
||||||
|
};
|
||||||
|
|
||||||
bool is_command_separator(char c)
|
bool is_command_separator(char c)
|
||||||
{
|
{
|
||||||
return c == ';' or c == '\n';
|
return c == ';' or c == '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
String get_until_delimiter(StringView base, ByteCount& pos, char delimiter)
|
template<typename Func>
|
||||||
|
String get_until_delimiter(Reader& reader, Func is_delimiter)
|
||||||
{
|
{
|
||||||
const ByteCount length = base.length();
|
auto beg = reader.pos;
|
||||||
ByteCount beg = pos;
|
|
||||||
String str;
|
String str;
|
||||||
while (pos < length)
|
bool was_antislash = false;
|
||||||
|
|
||||||
|
while (reader)
|
||||||
{
|
{
|
||||||
char c = base[pos];
|
const char c = *reader;
|
||||||
if (c == delimiter)
|
if (is_delimiter(c))
|
||||||
{
|
{
|
||||||
str += base.substr(beg, pos - beg);
|
str += reader.substr_from(beg);
|
||||||
if (pos != 0 and base[pos-1] == '\\')
|
if (was_antislash)
|
||||||
{
|
{
|
||||||
str.back() = delimiter;
|
str.back() = c;
|
||||||
beg = pos+1;
|
beg = reader.pos+1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
++pos;
|
was_antislash = c == '\\';
|
||||||
|
++reader;
|
||||||
}
|
}
|
||||||
if (beg < length)
|
if (beg < reader.str.length())
|
||||||
str += base.substr(beg, pos - beg);
|
str += reader.substr_from(beg);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
String get_until_delimiter(StringView base, ByteCount& pos,
|
[[gnu::always_inline]]
|
||||||
char opening_delimiter, char closing_delimiter)
|
inline String get_until_delimiter(Reader& reader, char c)
|
||||||
{
|
{
|
||||||
kak_assert(base[pos-1] == opening_delimiter);
|
return get_until_delimiter(reader, [c](char ch) { return c == ch; });
|
||||||
const ByteCount length = base.length();
|
}
|
||||||
|
|
||||||
|
StringView get_until_closing_delimiter(Reader& reader, char opening_delimiter,
|
||||||
|
char closing_delimiter)
|
||||||
|
{
|
||||||
|
kak_assert(reader.str[reader.pos-1] == opening_delimiter);
|
||||||
int level = 0;
|
int level = 0;
|
||||||
ByteCount start = pos;
|
auto start = reader.pos;
|
||||||
while (pos != length)
|
while (reader)
|
||||||
{
|
{
|
||||||
if (base[pos] == opening_delimiter)
|
const char c = *reader;
|
||||||
|
if (c == opening_delimiter)
|
||||||
++level;
|
++level;
|
||||||
else if (base[pos] == closing_delimiter)
|
else if (c == closing_delimiter)
|
||||||
{
|
{
|
||||||
if (level > 0)
|
if (level > 0)
|
||||||
--level;
|
--level;
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
++pos;
|
++reader;
|
||||||
}
|
}
|
||||||
return base.substr(start, pos - start).str();
|
return reader.substr_from(start);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct unknown_expand : parse_error
|
struct unknown_expand : parse_error
|
||||||
|
@ -153,22 +203,19 @@ Token::Type token_type(StringView type_name)
|
||||||
return Token::Type::Raw;
|
return Token::Type::Raw;
|
||||||
}
|
}
|
||||||
|
|
||||||
void skip_blanks_and_comments(StringView base, ByteCount& pos)
|
void skip_blanks_and_comments(Reader& reader)
|
||||||
{
|
{
|
||||||
const ByteCount length = base.length();
|
while (reader)
|
||||||
while (pos != length)
|
|
||||||
{
|
{
|
||||||
if (is_horizontal_blank(base[pos]))
|
const char c = *reader;
|
||||||
++pos;
|
if (is_horizontal_blank(c))
|
||||||
else if (base[pos] == '\\' and pos+1 < length and base[pos+1] == '\n')
|
++reader;
|
||||||
pos += 2;
|
else if (c == '\\' and reader.peek_next().value_or('\0') == '\n')
|
||||||
else if (base[pos] == '#')
|
++(++reader);
|
||||||
|
else if (c == '#')
|
||||||
{
|
{
|
||||||
while (pos != length)
|
for (bool eol = false; reader and not eol; ++reader)
|
||||||
{
|
eol = *reader == '\n';
|
||||||
if (base[pos++] == '\n')
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
|
@ -176,15 +223,15 @@ void skip_blanks_and_comments(StringView base, ByteCount& pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool throw_on_unterminated>
|
template<bool throw_on_unterminated>
|
||||||
Token parse_percent_token(StringView line, ByteCount& pos)
|
Token parse_percent_token(Reader& reader)
|
||||||
{
|
{
|
||||||
const ByteCount length = line.length();
|
++reader;
|
||||||
const ByteCount type_start = ++pos;
|
const ByteCount type_start = reader.pos;
|
||||||
while (pos < length and isalpha(line[pos]))
|
while (reader and isalpha(*reader))
|
||||||
++pos;
|
++reader;
|
||||||
StringView type_name = line.substr(type_start, pos - type_start);
|
StringView type_name = reader.substr_from(type_start);
|
||||||
|
|
||||||
if (throw_on_unterminated and pos == length)
|
if (throw_on_unterminated and not reader)
|
||||||
throw parse_error{format("expected a string delimiter after '%{}'",
|
throw parse_error{format("expected a string delimiter after '%{}'",
|
||||||
type_name)};
|
type_name)};
|
||||||
|
|
||||||
|
@ -193,25 +240,34 @@ Token parse_percent_token(StringView line, ByteCount& pos)
|
||||||
{ '(', ')' }, { '[', ']' }, { '{', '}' }, { '<', '>' }
|
{ '(', ')' }, { '[', ']' }, { '{', '}' }, { '<', '>' }
|
||||||
};
|
};
|
||||||
|
|
||||||
char opening_delimiter = line[pos];
|
char opening_delimiter = *reader;
|
||||||
ByteCount token_start = ++pos;
|
++reader;
|
||||||
|
auto start = reader.pos;
|
||||||
|
auto coord = reader.coord;
|
||||||
|
|
||||||
auto delim_it = matching_delimiters.find(opening_delimiter);
|
auto delim_it = matching_delimiters.find(opening_delimiter);
|
||||||
if (delim_it != matching_delimiters.end())
|
if (delim_it != matching_delimiters.end())
|
||||||
{
|
{
|
||||||
char closing_delimiter = delim_it->second;
|
const char closing_delimiter = delim_it->second;
|
||||||
String token = get_until_delimiter(line, pos, opening_delimiter,
|
auto token = get_until_closing_delimiter(reader, opening_delimiter,
|
||||||
closing_delimiter);
|
closing_delimiter);
|
||||||
if (throw_on_unterminated and pos == length)
|
if (throw_on_unterminated and not reader)
|
||||||
throw parse_error{format("unterminated string '%{}{}...{}'",
|
throw parse_error{format("{}:{}: unterminated string '%{}{}...{}'",
|
||||||
type_name, opening_delimiter,
|
coord.line, coord.column, type_name,
|
||||||
closing_delimiter)};
|
opening_delimiter, closing_delimiter)};
|
||||||
return {type, token_start, pos, std::move(token)};
|
|
||||||
|
return {type, start, reader.pos, coord, token.str()};
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
String token = get_until_delimiter(line, pos, opening_delimiter);
|
String token = get_until_delimiter(reader, opening_delimiter);
|
||||||
return {type, token_start, pos, std::move(token)};
|
|
||||||
|
if (throw_on_unterminated and not reader)
|
||||||
|
throw parse_error{format("{}:{}: unterminated string '%{}{}...{}'",
|
||||||
|
coord.line, coord.column, type_name,
|
||||||
|
opening_delimiter, opening_delimiter)};
|
||||||
|
|
||||||
|
return {type, start, reader.pos, coord, std::move(token)};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -220,50 +276,44 @@ TokenList parse(StringView line)
|
||||||
{
|
{
|
||||||
TokenList result;
|
TokenList result;
|
||||||
|
|
||||||
const ByteCount length = line.length();
|
Reader reader{line};
|
||||||
ByteCount pos = 0;
|
while (reader)
|
||||||
while (pos < length)
|
|
||||||
{
|
{
|
||||||
skip_blanks_and_comments(line, pos);
|
skip_blanks_and_comments(reader);
|
||||||
|
|
||||||
ByteCount token_start = pos;
|
ByteCount start = reader.pos;
|
||||||
ByteCount start_pos = pos;
|
auto coord = reader.coord;
|
||||||
|
|
||||||
if (line[pos] == '"' or line[pos] == '\'')
|
const char c = *reader;
|
||||||
|
if (c == '"' or c == '\'')
|
||||||
{
|
{
|
||||||
char delimiter = line[pos];
|
start = (++reader).pos;
|
||||||
|
String token = get_until_delimiter(reader, c);
|
||||||
token_start = ++pos;
|
if (throw_on_unterminated and not reader)
|
||||||
String token = get_until_delimiter(line, pos, delimiter);
|
throw parse_error{format("unterminated string {0}...{0}", c)};
|
||||||
if (throw_on_unterminated and pos == length)
|
result.emplace_back(c == '"' ? Token::Type::RawEval
|
||||||
throw parse_error{format("unterminated string {0}...{0}", delimiter)};
|
: Token::Type::Raw,
|
||||||
result.emplace_back(delimiter == '"' ? Token::Type::RawEval
|
start, reader.pos, coord, std::move(token));
|
||||||
: Token::Type::Raw,
|
|
||||||
token_start, pos, std::move(token));
|
|
||||||
}
|
}
|
||||||
else if (line[pos] == '%')
|
else if (c == '%')
|
||||||
result.push_back(
|
result.push_back(
|
||||||
parse_percent_token<throw_on_unterminated>(line, pos));
|
parse_percent_token<throw_on_unterminated>(reader));
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
while (pos != length and
|
String str = get_until_delimiter(reader, [](char c) {
|
||||||
((not is_command_separator(line[pos]) and
|
return is_command_separator(c) or is_horizontal_blank(c);
|
||||||
not is_horizontal_blank(line[pos]))
|
});
|
||||||
or (pos != 0 and line[pos-1] == '\\')))
|
|
||||||
++pos;
|
if (not str.empty())
|
||||||
if (start_pos != pos)
|
result.emplace_back(Token::Type::Raw, start, reader.pos,
|
||||||
{
|
coord, std::move(str));
|
||||||
result.emplace_back(
|
|
||||||
Token::Type::Raw, token_start, pos,
|
|
||||||
unescape(line.substr(token_start, pos - token_start),
|
|
||||||
" \t;\n", '\\'));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_command_separator(line[pos]))
|
if (is_command_separator(*reader))
|
||||||
result.emplace_back(Token::Type::CommandSeparator, pos, pos+1);
|
result.emplace_back(Token::Type::CommandSeparator,
|
||||||
|
reader.pos, reader.pos+1, coord);
|
||||||
|
|
||||||
++pos;
|
++reader;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -305,32 +355,33 @@ String expand(StringView str, const Context& context,
|
||||||
ConstArrayView<String> shell_params,
|
ConstArrayView<String> shell_params,
|
||||||
const EnvVarMap& env_vars)
|
const EnvVarMap& env_vars)
|
||||||
{
|
{
|
||||||
|
Reader reader{str};
|
||||||
String res;
|
String res;
|
||||||
auto pos = 0_byte, beg = 0_byte;
|
auto beg = 0_byte;
|
||||||
auto length = str.length();
|
while (reader)
|
||||||
while (pos < length)
|
|
||||||
{
|
{
|
||||||
if (str[pos] == '\\')
|
char c = *reader;
|
||||||
|
if (c == '\\')
|
||||||
{
|
{
|
||||||
char c = str[++pos];
|
c = *++reader;
|
||||||
if (c == '%' or c == '\\')
|
if (c == '%' or c == '\\')
|
||||||
{
|
{
|
||||||
res += str.substr(beg, pos - beg);
|
res += reader.substr_from(beg);
|
||||||
res.back() = c;
|
res.back() = c;
|
||||||
beg = ++pos;
|
beg = (++reader).pos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (str[pos] == '%')
|
else if (c == '%')
|
||||||
{
|
{
|
||||||
res += str.substr(beg, pos - beg);
|
res += reader.substr_from(beg);
|
||||||
Token token = parse_percent_token<true>(str, pos);
|
Token token = parse_percent_token<true>(reader);
|
||||||
res += expand_token(token, context, shell_params, env_vars);
|
res += expand_token(token, context, shell_params, env_vars);
|
||||||
beg = ++pos;
|
beg = (++reader).pos;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
++pos;
|
++reader;
|
||||||
}
|
}
|
||||||
res += str.substr(beg, pos - beg);
|
res += reader.substr_from(beg);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -374,25 +425,6 @@ void CommandManager::execute_single_command(CommandParameters params,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static CharCoord find_coord(StringView str, ByteCount offset)
|
|
||||||
{
|
|
||||||
CharCoord res;
|
|
||||||
auto it = str.begin();
|
|
||||||
auto line_start = it;
|
|
||||||
while (it != str.end() and offset > 0)
|
|
||||||
{
|
|
||||||
if (*it == '\n')
|
|
||||||
{
|
|
||||||
line_start = it + 1;
|
|
||||||
++res.line;
|
|
||||||
}
|
|
||||||
++it;
|
|
||||||
--offset;
|
|
||||||
}
|
|
||||||
res.column = utf8::distance(line_start, it);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CommandManager::execute(StringView command_line,
|
void CommandManager::execute(StringView command_line,
|
||||||
Context& context,
|
Context& context,
|
||||||
ConstArrayView<String> shell_params,
|
ConstArrayView<String> shell_params,
|
||||||
|
@ -407,7 +439,7 @@ void CommandManager::execute(StringView command_line,
|
||||||
for (auto it = tokens.begin(); it != tokens.end(); ++it)
|
for (auto it = tokens.begin(); it != tokens.end(); ++it)
|
||||||
{
|
{
|
||||||
if (params.empty())
|
if (params.empty())
|
||||||
command_coord = find_coord(command_line, it->begin());
|
command_coord = it->coord();
|
||||||
|
|
||||||
if (it->type() == Token::Type::CommandSeparator)
|
if (it->type() == Token::Type::CommandSeparator)
|
||||||
{
|
{
|
||||||
|
@ -545,9 +577,9 @@ Completions CommandManager::complete(const Context& context,
|
||||||
tokens[tok_idx].begin() : cursor_pos;
|
tokens[tok_idx].begin() : cursor_pos;
|
||||||
ByteCount cursor_pos_in_token = cursor_pos - start;
|
ByteCount cursor_pos_in_token = cursor_pos - start;
|
||||||
|
|
||||||
const Token::Type token_type = tok_idx < tokens.size() ?
|
const Token::Type type = tok_idx < tokens.size() ?
|
||||||
tokens[tok_idx].type() : Token::Type::Raw;
|
tokens[tok_idx].type() : Token::Type::Raw;
|
||||||
switch (token_type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case Token::Type::OptionExpand:
|
case Token::Type::OptionExpand:
|
||||||
return {start , cursor_pos,
|
return {start , cursor_pos,
|
||||||
|
@ -571,9 +603,8 @@ Completions CommandManager::complete(const Context& context,
|
||||||
return Completions();
|
return Completions();
|
||||||
|
|
||||||
Vector<String> params;
|
Vector<String> params;
|
||||||
for (auto token_it = tokens.begin() + cmd_idx + 1;
|
for (auto it = tokens.begin() + cmd_idx + 1; it != tokens.end(); ++it)
|
||||||
token_it != tokens.end(); ++token_it)
|
params.push_back(it->content());
|
||||||
params.push_back(token_it->content());
|
|
||||||
if (tok_idx == tokens.size())
|
if (tok_idx == tokens.size())
|
||||||
params.push_back("");
|
params.push_back("");
|
||||||
Completions completions = command_it->second.completer(
|
Completions completions = command_it->second.completer(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user