diff --git a/src/command_manager.cc b/src/command_manager.cc index e4b57de6..cd9f2879 100644 --- a/src/command_manager.cc +++ b/src/command_manager.cc @@ -101,7 +101,7 @@ Reader& Reader::operator++() return *this; } -void Reader::next_byte() +Reader& Reader::next_byte() { kak_assert(pos < str.end()); if (*pos++ == '\n') @@ -109,6 +109,7 @@ void Reader::next_byte() ++line; line_start = pos; } + return *this; } namespace @@ -226,16 +227,16 @@ void skip_blanks_and_comments(Reader& reader) { while (reader) { - const Codepoint c = *reader; + const Codepoint c = *reader.pos; if (is_horizontal_blank(c)) - ++reader; + reader.next_byte(); else if (c == '\\' and reader.pos + 1 != reader.str.end() and *(reader.pos + 1) == '\n') - ++(++reader); + reader.next_byte().next_byte(); else if (c == '#') { while (reader and *reader != '\n') - ++reader; + reader.next_byte(); } else break; @@ -404,10 +405,10 @@ Optional CommandParser::read_token(bool throw_on_unterminated) const char* start = m_reader.pos; auto coord = m_reader.coord(); - const Codepoint c = *m_reader; + const char c = *m_reader.pos; if (c == '"' or c == '\'') { - start = (++m_reader).pos; + start = m_reader.next_byte().pos; QuotedResult quoted = parse_quoted(m_reader, c); if (throw_on_unterminated and not quoted.terminated) throw parse_error{format("unterminated string {0}...{0}", c)}; @@ -420,9 +421,9 @@ Optional CommandParser::read_token(bool throw_on_unterminated) auto token = parse_percent_token(m_reader, throw_on_unterminated); return token; } - else if (is_command_separator(*m_reader)) + else if (is_command_separator(c)) { - ++m_reader; + m_reader.next_byte(); return Token{Token::Type::CommandSeparator, m_reader.pos - line.begin(), coord, {}}; } @@ -432,7 +433,7 @@ Optional CommandParser::read_token(bool throw_on_unterminated) { auto next = m_reader.peek_next(); if (next == '%' or next == '\'' or next == '"') - ++m_reader; + m_reader.next_byte(); } return Token{Token::Type::Raw, start - line.begin(), coord, parse_unquoted(m_reader)}; diff --git a/src/command_manager.hh b/src/command_manager.hh index affec4e3..35d73ba1 100644 --- a/src/command_manager.hh +++ b/src/command_manager.hh @@ -69,7 +69,7 @@ public: Codepoint operator*() const; Codepoint peek_next() const; Reader& operator++(); - void next_byte(); + Reader& next_byte(); explicit operator bool() const { return pos < str.end(); } StringView substr_from(const char* start) const { return {start, pos}; } diff --git a/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/cmd b/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/cmd new file mode 100644 index 00000000..4cad3ccb --- /dev/null +++ b/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/cmd @@ -0,0 +1 @@ +"aR diff --git a/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/in b/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/in new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/in @@ -0,0 +1 @@ + diff --git a/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/out b/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/out new file mode 100644 index 00000000..9b136df9 --- /dev/null +++ b/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/out @@ -0,0 +1 @@ +½²=¼ ⌘ diff --git a/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/rc b/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/rc new file mode 100644 index 00000000..735fe182 --- /dev/null +++ b/test/regression/3388-command-line-parsing-does-not-preserve-invalid-utf8/rc @@ -0,0 +1 @@ +evaluate-commands %sh{ printf 'set-register a "\xbd\xb2\x3d\xbc\x20\xe2\x8c\x98"' }