Handle invalid utf8 in command line a bit better
Reduce the amount of decoding by working directly on bytes. Fixes #3388
This commit is contained in:
parent
aad4612387
commit
149da2064d
|
@ -101,7 +101,7 @@ Reader& Reader::operator++()
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Reader::next_byte()
|
Reader& Reader::next_byte()
|
||||||
{
|
{
|
||||||
kak_assert(pos < str.end());
|
kak_assert(pos < str.end());
|
||||||
if (*pos++ == '\n')
|
if (*pos++ == '\n')
|
||||||
|
@ -109,6 +109,7 @@ void Reader::next_byte()
|
||||||
++line;
|
++line;
|
||||||
line_start = pos;
|
line_start = pos;
|
||||||
}
|
}
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
|
@ -226,16 +227,16 @@ void skip_blanks_and_comments(Reader& reader)
|
||||||
{
|
{
|
||||||
while (reader)
|
while (reader)
|
||||||
{
|
{
|
||||||
const Codepoint c = *reader;
|
const Codepoint c = *reader.pos;
|
||||||
if (is_horizontal_blank(c))
|
if (is_horizontal_blank(c))
|
||||||
++reader;
|
reader.next_byte();
|
||||||
else if (c == '\\' and reader.pos + 1 != reader.str.end() and
|
else if (c == '\\' and reader.pos + 1 != reader.str.end() and
|
||||||
*(reader.pos + 1) == '\n')
|
*(reader.pos + 1) == '\n')
|
||||||
++(++reader);
|
reader.next_byte().next_byte();
|
||||||
else if (c == '#')
|
else if (c == '#')
|
||||||
{
|
{
|
||||||
while (reader and *reader != '\n')
|
while (reader and *reader != '\n')
|
||||||
++reader;
|
reader.next_byte();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
|
@ -404,10 +405,10 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
|
||||||
const char* start = m_reader.pos;
|
const char* start = m_reader.pos;
|
||||||
auto coord = m_reader.coord();
|
auto coord = m_reader.coord();
|
||||||
|
|
||||||
const Codepoint c = *m_reader;
|
const char c = *m_reader.pos;
|
||||||
if (c == '"' or c == '\'')
|
if (c == '"' or c == '\'')
|
||||||
{
|
{
|
||||||
start = (++m_reader).pos;
|
start = m_reader.next_byte().pos;
|
||||||
QuotedResult quoted = parse_quoted(m_reader, c);
|
QuotedResult quoted = parse_quoted(m_reader, c);
|
||||||
if (throw_on_unterminated and not quoted.terminated)
|
if (throw_on_unterminated and not quoted.terminated)
|
||||||
throw parse_error{format("unterminated string {0}...{0}", c)};
|
throw parse_error{format("unterminated string {0}...{0}", c)};
|
||||||
|
@ -420,9 +421,9 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
|
||||||
auto token = parse_percent_token(m_reader, throw_on_unterminated);
|
auto token = parse_percent_token(m_reader, throw_on_unterminated);
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
else if (is_command_separator(*m_reader))
|
else if (is_command_separator(c))
|
||||||
{
|
{
|
||||||
++m_reader;
|
m_reader.next_byte();
|
||||||
return Token{Token::Type::CommandSeparator,
|
return Token{Token::Type::CommandSeparator,
|
||||||
m_reader.pos - line.begin(), coord, {}};
|
m_reader.pos - line.begin(), coord, {}};
|
||||||
}
|
}
|
||||||
|
@ -432,7 +433,7 @@ Optional<Token> CommandParser::read_token(bool throw_on_unterminated)
|
||||||
{
|
{
|
||||||
auto next = m_reader.peek_next();
|
auto next = m_reader.peek_next();
|
||||||
if (next == '%' or next == '\'' or next == '"')
|
if (next == '%' or next == '\'' or next == '"')
|
||||||
++m_reader;
|
m_reader.next_byte();
|
||||||
}
|
}
|
||||||
return Token{Token::Type::Raw, start - line.begin(),
|
return Token{Token::Type::Raw, start - line.begin(),
|
||||||
coord, parse_unquoted(m_reader)};
|
coord, parse_unquoted(m_reader)};
|
||||||
|
|
|
@ -69,7 +69,7 @@ public:
|
||||||
Codepoint operator*() const;
|
Codepoint operator*() const;
|
||||||
Codepoint peek_next() const;
|
Codepoint peek_next() const;
|
||||||
Reader& operator++();
|
Reader& operator++();
|
||||||
void next_byte();
|
Reader& next_byte();
|
||||||
|
|
||||||
explicit operator bool() const { return pos < str.end(); }
|
explicit operator bool() const { return pos < str.end(); }
|
||||||
StringView substr_from(const char* start) const { return {start, pos}; }
|
StringView substr_from(const char* start) const { return {start, pos}; }
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
"aR
|
|
@ -0,0 +1 @@
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
<EFBFBD><EFBFBD>=<3D> ⌘
|
|
@ -0,0 +1 @@
|
||||||
|
evaluate-commands %sh{ printf 'set-register a "\xbd\xb2\x3d\xbc\x20\xe2\x8c\x98"' }
|
Loading…
Reference in New Issue
Block a user