Handle invalid utf8 in command line a bit better

Reduce the amount of decoding by working directly on
bytes.

Fixes #3388
This commit is contained in:
Maxime Coste 2020-03-12 20:30:55 +11:00
parent aad4612387
commit 149da2064d
6 changed files with 16 additions and 11 deletions

View File

@ -101,7 +101,7 @@ Reader& Reader::operator++()
return *this; return *this;
} }
void Reader::next_byte() Reader& Reader::next_byte()
{ {
kak_assert(pos < str.end()); kak_assert(pos < str.end());
if (*pos++ == '\n') if (*pos++ == '\n')
@ -109,6 +109,7 @@ void Reader::next_byte()
++line; ++line;
line_start = pos; line_start = pos;
} }
return *this;
} }
namespace namespace
@ -226,16 +227,16 @@ void skip_blanks_and_comments(Reader& reader)
{ {
while (reader) while (reader)
{ {
const Codepoint c = *reader; const Codepoint c = *reader.pos;
if (is_horizontal_blank(c)) if (is_horizontal_blank(c))
++reader; reader.next_byte();
else if (c == '\\' and reader.pos + 1 != reader.str.end() and else if (c == '\\' and reader.pos + 1 != reader.str.end() and
*(reader.pos + 1) == '\n') *(reader.pos + 1) == '\n')
++(++reader); reader.next_byte().next_byte();
else if (c == '#') else if (c == '#')
{ {
while (reader and *reader != '\n') while (reader and *reader != '\n')
++reader; reader.next_byte();
} }
else else
break; break;
@ -404,10 +405,10 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
const char* start = m_reader.pos; const char* start = m_reader.pos;
auto coord = m_reader.coord(); auto coord = m_reader.coord();
const Codepoint c = *m_reader; const char c = *m_reader.pos;
if (c == '"' or c == '\'') if (c == '"' or c == '\'')
{ {
start = (++m_reader).pos; start = m_reader.next_byte().pos;
QuotedResult quoted = parse_quoted(m_reader, c); QuotedResult quoted = parse_quoted(m_reader, c);
if (throw_on_unterminated and not quoted.terminated) if (throw_on_unterminated and not quoted.terminated)
throw parse_error{format("unterminated string {0}...{0}", c)}; throw parse_error{format("unterminated string {0}...{0}", c)};
@ -420,9 +421,9 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
auto token = parse_percent_token(m_reader, throw_on_unterminated); auto token = parse_percent_token(m_reader, throw_on_unterminated);
return token; return token;
} }
else if (is_command_separator(*m_reader)) else if (is_command_separator(c))
{ {
++m_reader; m_reader.next_byte();
return Token{Token::Type::CommandSeparator, return Token{Token::Type::CommandSeparator,
m_reader.pos - line.begin(), coord, {}}; m_reader.pos - line.begin(), coord, {}};
} }
@ -432,7 +433,7 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
{ {
auto next = m_reader.peek_next(); auto next = m_reader.peek_next();
if (next == '%' or next == '\'' or next == '"') if (next == '%' or next == '\'' or next == '"')
++m_reader; m_reader.next_byte();
} }
return Token{Token::Type::Raw, start - line.begin(), return Token{Token::Type::Raw, start - line.begin(),
coord, parse_unquoted(m_reader)}; coord, parse_unquoted(m_reader)};

View File

@ -69,7 +69,7 @@ public:
Codepoint operator*() const; Codepoint operator*() const;
Codepoint peek_next() const; Codepoint peek_next() const;
Reader& operator++(); Reader& operator++();
void next_byte(); Reader& next_byte();
explicit operator bool() const { return pos < str.end(); } explicit operator bool() const { return pos < str.end(); }
StringView substr_from(const char* start) const { return {start, pos}; } StringView substr_from(const char* start) const { return {start, pos}; }

View File

@ -0,0 +1 @@
<EFBFBD><EFBFBD>=<3D>

View File

@ -0,0 +1 @@
evaluate-commands %sh{ printf 'set-register a "\xbd\xb2\x3d\xbc\x20\xe2\x8c\x98"' }