From 892c3647e44b98d5edf8ba0e92a67b68aa2e57c6 Mon Sep 17 00:00:00 2001 From: Maxime Coste Date: Wed, 11 Nov 2015 00:21:20 +0000 Subject: [PATCH] Fix to_lower/to_upper handling to correctly support non unicode chars require a proper unicode locale setup on the system Fixes #94 --- src/keys.cc | 4 +-- src/normal.cc | 56 +++++++++++++++++++++++------------------ src/ranked_match.cc | 4 +-- src/register_manager.cc | 2 +- src/unicode.hh | 6 +++++ 5 files changed, 42 insertions(+), 30 deletions(-) diff --git a/src/keys.cc b/src/keys.cc index 4b9a83ec..3e0277e0 100644 --- a/src/keys.cc +++ b/src/keys.cc @@ -78,7 +78,7 @@ KeyList parse_keys(StringView str) StringView desc{it.base()+1, end_it.base()}; if (desc.length() > 2 and desc[1_byte] == '-') { - switch(tolower(desc[0_byte])) + switch(to_lower(desc[0_byte])) { case 'c': modifier = Key::Modifiers::Control; break; case 'a': modifier = Key::Modifiers::Alt; break; @@ -94,7 +94,7 @@ KeyList parse_keys(StringView str) result.push_back(canonicalize_ifn({ modifier, name_it->key })); else if (desc.char_length() == 1) result.push_back(Key{ modifier, desc[0_char] }); - else if (tolower(desc[0_byte]) == 'f' and desc.length() <= 3) + else if (to_lower(desc[0_byte]) == 'f' and desc.length() <= 3) { int val = str_to_int(desc.substr(1_byte)); if (val >= 1 and val <= 12) diff --git a/src/normal.cc b/src/normal.cc index 9164342d..8d557dbf 100644 --- a/src/normal.cc +++ b/src/normal.cc @@ -114,7 +114,7 @@ void goto_commands(Context& context, NormalParams params) if (not cp) return; auto& buffer = context.buffer(); - switch (tolower(*cp)) + switch (to_lower(*cp)) { case 'g': case 'k': @@ -246,7 +246,7 @@ void view_commands(Context& context, NormalParams params) const ByteCoord cursor = context.selections().main().cursor(); Window& window = context.window(); - switch (tolower(*cp)) + switch (to_lower(*cp)) { case 'v': case 'c': @@ -306,26 +306,32 @@ void replace_with_char(Context& context, NormalParams) }, "replace with char", "enter char to replace with\n"); } -Codepoint to_lower(Codepoint cp) { return tolower(cp); } -Codepoint to_upper(Codepoint cp) { return toupper(cp); } - Codepoint swap_case(Codepoint cp) { - Codepoint res = std::tolower(cp); - return res == cp ? std::toupper(cp) : res; + Codepoint res = to_lower(cp); + return res == cp ? to_upper(cp) : res; } template -void for_each_char(Context& context, NormalParams) +void for_each_codepoint(Context& context, NormalParams) { + using Utf8It = utf8::iterator; + ScopedEdition edition(context); - Vector sels = context.selections_content(); - for (auto& sel : sels) + Buffer& buffer = context.buffer(); + SelectionList& selections = context.selections(); + Vector strings; + for (auto& sel : selections) { - for (auto& c : sel) - c = func(c); + String str; + for (auto begin = Utf8It{buffer.iterator_at(sel.min()), buffer}, + end = Utf8It{buffer.iterator_at(sel.max()), buffer}+1; + begin != end; ++begin) + utf8::dump(std::back_inserter(str), func(*begin)); + + strings.push_back(std::move(str)); } - context.selections().insert(sels, InsertMode::Replace); + selections.insert(strings, InsertMode::Replace); } void command(Context& context, NormalParams) @@ -612,7 +618,7 @@ void regex_prompt(Context& context, const String prompt, T func) template void search(Context& context, NormalParams params) { - const char reg = tolower(params.reg ? params.reg : '/'); + const char reg = to_lower(params.reg ? params.reg : '/'); regex_prompt(context, direction == Forward ? "search:" : "reverse search:", [reg](Regex ex, PromptEvent event, Context& context) { if (ex.empty()) @@ -627,7 +633,7 @@ void search(Context& context, NormalParams params) template void search_next(Context& context, NormalParams params) { - const char reg = tolower(params.reg ? params.reg : '/'); + const char reg = to_lower(params.reg ? params.reg : '/'); StringView str = context.main_sel_register_value(reg); if (not str.empty()) { @@ -664,7 +670,7 @@ void use_selection_as_search_pattern(Context& context, NormalParams params) patterns.push_back(std::move(content)); } - const char reg = tolower(params.reg ? params.reg : '/'); + const char reg = to_lower(params.reg ? params.reg : '/'); context.print_status({ format("register '{}' set to '{}'", reg, patterns[sels.main_index()]), @@ -679,7 +685,7 @@ void use_selection_as_search_pattern(Context& context, NormalParams params) void select_regex(Context& context, NormalParams params) { - const char reg = tolower(params.reg ? params.reg : '/'); + const char reg = to_lower(params.reg ? params.reg : '/'); regex_prompt(context, "select:", [reg](Regex ex, PromptEvent event, Context& context) { if (ex.empty()) ex = Regex{context.main_sel_register_value(reg)}; @@ -692,7 +698,7 @@ void select_regex(Context& context, NormalParams params) void split_regex(Context& context, NormalParams params) { - const char reg = tolower(params.reg ? params.reg : '/'); + const char reg = to_lower(params.reg ? params.reg : '/'); regex_prompt(context, "split:", [reg](Regex ex, PromptEvent event, Context& context) { if (ex.empty()) ex = Regex{context.main_sel_register_value(reg)}; @@ -1074,7 +1080,7 @@ void start_or_end_macro_recording(Context& context, NormalParams params) context.input_handler().stop_recording(); else { - const char reg = tolower(params.reg ? params.reg : '@'); + const char reg = to_lower(params.reg ? params.reg : '@'); if (not is_basic_alpha(reg) and reg != '@') throw runtime_error("Macros can only use the '@' and alphabetic registers"); context.input_handler().start_recording(reg); @@ -1089,7 +1095,7 @@ void end_macro_recording(Context& context, NormalParams) void replay_macro(Context& context, NormalParams params) { - const char reg = tolower(params.reg ? params.reg : '@'); + const char reg = to_lower(params.reg ? params.reg : '@'); if (not is_basic_alpha(reg) and reg != '@') throw runtime_error("Macros can only use the '@' and alphabetic registers"); @@ -1280,7 +1286,7 @@ void spaces_to_tabs(Context& context, NormalParams params) void save_selections(Context& context, NormalParams params) { - const char reg = tolower(params.reg ? params.reg : '^'); + const char reg = to_lower(params.reg ? params.reg : '^'); if (not is_basic_alpha(reg) and reg != '^') throw runtime_error("selections can only be saved to the '^' and alphabetic registers"); @@ -1296,7 +1302,7 @@ void save_selections(Context& context, NormalParams params) void restore_selections(Context& context, NormalParams params) { - const char reg = tolower(params.reg ? params.reg : '^'); + const char reg = to_lower(params.reg ? params.reg : '^'); if (not is_basic_alpha(reg) and reg != '^') throw runtime_error("selections can only be saved to the '^' and alphabetic registers"); @@ -1617,9 +1623,9 @@ static NormalCmdDesc cmds[] = { Key::Escape, "end macro recording", end_macro_recording }, - { '`', "convert to lower case in selections", for_each_char }, - { '~', "convert to upper case in selections", for_each_char }, - { alt('`'), "swap case in selections", for_each_char }, + { '`', "convert to lower case in selections", for_each_codepoint }, + { '~', "convert to upper case in selections", for_each_codepoint }, + { alt('`'), "swap case in selections", for_each_codepoint }, { '&', "align selection cursors", align }, { alt('&'), "copy indentation", copy_indent }, diff --git a/src/ranked_match.cc b/src/ranked_match.cc index a55df314..a7fd4fb3 100644 --- a/src/ranked_match.cc +++ b/src/ranked_match.cc @@ -24,7 +24,7 @@ static int count_word_boundaries_match(StringView candidate, StringView query) if (not is_word_boundary) continue; - const Codepoint lc = tolower(c); + const Codepoint lc = to_lower(c); for (; qit != query.end(); ++qit) { const Codepoint qc = *qit; @@ -43,7 +43,7 @@ static int count_word_boundaries_match(StringView candidate, StringView query) static bool smartcase_eq(Codepoint query, Codepoint candidate) { - return query == (islower(query) ? tolower(candidate) : candidate); + return query == (islower(query) ? to_lower(candidate) : candidate); } static bool subsequence_match_smart_case(StringView str, StringView subseq) diff --git a/src/register_manager.cc b/src/register_manager.cc index e3f52bcb..64e56926 100644 --- a/src/register_manager.cc +++ b/src/register_manager.cc @@ -76,7 +76,7 @@ Register& RegisterManager::operator[](StringView reg) Register& RegisterManager::operator[](Codepoint c) { - c = tolower(c); + c = to_lower(c); if (c < 32 or c > 127) throw runtime_error(format("invalid register name: '{}'", c)); diff --git a/src/unicode.hh b/src/unicode.hh index b74ebcfc..37f3541b 100644 --- a/src/unicode.hh +++ b/src/unicode.hh @@ -63,6 +63,12 @@ inline CharCategories categorize(Codepoint c) : CharCategories::Punctuation; } +inline Codepoint to_lower(Codepoint cp) { return towlower((wchar_t)cp); } +inline Codepoint to_upper(Codepoint cp) { return towupper((wchar_t)cp); } + +inline char to_lower(char c) { return c >= 'A' and c <= 'Z' ? c - 'A' + 'a' : c; } +inline char to_upper(char c) { return c >= 'a' and c <= 'z' ? c - 'a' + 'A' : c; } + } #endif // unicode_hh_INCLUDED