Handle invalid utf8 in command line a bit better
Reduce the amount of decoding by working directly on bytes. Fixes #3388
This commit is contained in:
parent
aad4612387
commit
149da2064d
|
@ -101,7 +101,7 @@ Reader& Reader::operator++()
|
|||
return *this;
|
||||
}
|
||||
|
||||
void Reader::next_byte()
|
||||
Reader& Reader::next_byte()
|
||||
{
|
||||
kak_assert(pos < str.end());
|
||||
if (*pos++ == '\n')
|
||||
|
@ -109,6 +109,7 @@ void Reader::next_byte()
|
|||
++line;
|
||||
line_start = pos;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
namespace
|
||||
|
@ -226,16 +227,16 @@ void skip_blanks_and_comments(Reader& reader)
|
|||
{
|
||||
while (reader)
|
||||
{
|
||||
const Codepoint c = *reader;
|
||||
const Codepoint c = *reader.pos;
|
||||
if (is_horizontal_blank(c))
|
||||
++reader;
|
||||
reader.next_byte();
|
||||
else if (c == '\\' and reader.pos + 1 != reader.str.end() and
|
||||
*(reader.pos + 1) == '\n')
|
||||
++(++reader);
|
||||
reader.next_byte().next_byte();
|
||||
else if (c == '#')
|
||||
{
|
||||
while (reader and *reader != '\n')
|
||||
++reader;
|
||||
reader.next_byte();
|
||||
}
|
||||
else
|
||||
break;
|
||||
|
@ -404,10 +405,10 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
|
|||
const char* start = m_reader.pos;
|
||||
auto coord = m_reader.coord();
|
||||
|
||||
const Codepoint c = *m_reader;
|
||||
const char c = *m_reader.pos;
|
||||
if (c == '"' or c == '\'')
|
||||
{
|
||||
start = (++m_reader).pos;
|
||||
start = m_reader.next_byte().pos;
|
||||
QuotedResult quoted = parse_quoted(m_reader, c);
|
||||
if (throw_on_unterminated and not quoted.terminated)
|
||||
throw parse_error{format("unterminated string {0}...{0}", c)};
|
||||
|
@ -420,9 +421,9 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
|
|||
auto token = parse_percent_token(m_reader, throw_on_unterminated);
|
||||
return token;
|
||||
}
|
||||
else if (is_command_separator(*m_reader))
|
||||
else if (is_command_separator(c))
|
||||
{
|
||||
++m_reader;
|
||||
m_reader.next_byte();
|
||||
return Token{Token::Type::CommandSeparator,
|
||||
m_reader.pos - line.begin(), coord, {}};
|
||||
}
|
||||
|
@ -432,7 +433,7 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
|
|||
{
|
||||
auto next = m_reader.peek_next();
|
||||
if (next == '%' or next == '\'' or next == '"')
|
||||
++m_reader;
|
||||
m_reader.next_byte();
|
||||
}
|
||||
return Token{Token::Type::Raw, start - line.begin(),
|
||||
coord, parse_unquoted(m_reader)};
|
||||
|
|
|
@ -69,7 +69,7 @@ public:
|
|||
Codepoint operator*() const;
|
||||
Codepoint peek_next() const;
|
||||
Reader& operator++();
|
||||
void next_byte();
|
||||
Reader& next_byte();
|
||||
|
||||
explicit operator bool() const { return pos < str.end(); }
|
||||
StringView substr_from(const char* start) const { return {start, pos}; }
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
"aR
|
|
@ -0,0 +1 @@
|
|||
|
|
@ -0,0 +1 @@
|
|||
<EFBFBD><EFBFBD>=<3D> ⌘
|
|
@ -0,0 +1 @@
|
|||
evaluate-commands %sh{ printf 'set-register a "\xbd\xb2\x3d\xbc\x20\xe2\x8c\x98"' }
|
Loading…
Reference in New Issue
Block a user