Handle invalid utf8 in command line a bit better

Reduce the amount of decoding by working directly on
bytes.

Fixes #3388
This commit is contained in:
Maxime Coste 2020-03-12 20:30:55 +11:00
parent aad4612387
commit 149da2064d
6 changed files with 16 additions and 11 deletions

View File

@ -101,7 +101,7 @@ Reader& Reader::operator++()
return *this;
}
void Reader::next_byte()
Reader& Reader::next_byte()
{
kak_assert(pos < str.end());
if (*pos++ == '\n')
@ -109,6 +109,7 @@ void Reader::next_byte()
++line;
line_start = pos;
}
return *this;
}
namespace
@ -226,16 +227,16 @@ void skip_blanks_and_comments(Reader& reader)
{
while (reader)
{
const Codepoint c = *reader;
const Codepoint c = *reader.pos;
if (is_horizontal_blank(c))
++reader;
reader.next_byte();
else if (c == '\\' and reader.pos + 1 != reader.str.end() and
*(reader.pos + 1) == '\n')
++(++reader);
reader.next_byte().next_byte();
else if (c == '#')
{
while (reader and *reader != '\n')
++reader;
reader.next_byte();
}
else
break;
@ -404,10 +405,10 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
const char* start = m_reader.pos;
auto coord = m_reader.coord();
const Codepoint c = *m_reader;
const char c = *m_reader.pos;
if (c == '"' or c == '\'')
{
start = (++m_reader).pos;
start = m_reader.next_byte().pos;
QuotedResult quoted = parse_quoted(m_reader, c);
if (throw_on_unterminated and not quoted.terminated)
throw parse_error{format("unterminated string {0}...{0}", c)};
@ -420,9 +421,9 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
auto token = parse_percent_token(m_reader, throw_on_unterminated);
return token;
}
else if (is_command_separator(*m_reader))
else if (is_command_separator(c))
{
++m_reader;
m_reader.next_byte();
return Token{Token::Type::CommandSeparator,
m_reader.pos - line.begin(), coord, {}};
}
@ -432,7 +433,7 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
{
auto next = m_reader.peek_next();
if (next == '%' or next == '\'' or next == '"')
++m_reader;
m_reader.next_byte();
}
return Token{Token::Type::Raw, start - line.begin(),
coord, parse_unquoted(m_reader)};

View File

@ -69,7 +69,7 @@ public:
Codepoint operator*() const;
Codepoint peek_next() const;
Reader& operator++();
void next_byte();
Reader& next_byte();
explicit operator bool() const { return pos < str.end(); }
StringView substr_from(const char* start) const { return {start, pos}; }

View File

@ -0,0 +1 @@
<EFBFBD><EFBFBD>=<3D>

View File

@ -0,0 +1 @@
evaluate-commands %sh{ printf 'set-register a "\xbd\xb2\x3d\xbc\x20\xe2\x8c\x98"' }