Fix to_lower/to_upper handling to correctly support non unicode chars

require a proper unicode locale setup on the system
Fixes #94
This commit is contained in:
Maxime Coste 2015-11-11 00:21:20 +00:00
parent 7bd3f4306d
commit 892c3647e4
5 changed files with 42 additions and 30 deletions

View File

@ -78,7 +78,7 @@ KeyList parse_keys(StringView str)
StringView desc{it.base()+1, end_it.base()}; StringView desc{it.base()+1, end_it.base()};
if (desc.length() > 2 and desc[1_byte] == '-') if (desc.length() > 2 and desc[1_byte] == '-')
{ {
switch(tolower(desc[0_byte])) switch(to_lower(desc[0_byte]))
{ {
case 'c': modifier = Key::Modifiers::Control; break; case 'c': modifier = Key::Modifiers::Control; break;
case 'a': modifier = Key::Modifiers::Alt; break; case 'a': modifier = Key::Modifiers::Alt; break;
@ -94,7 +94,7 @@ KeyList parse_keys(StringView str)
result.push_back(canonicalize_ifn({ modifier, name_it->key })); result.push_back(canonicalize_ifn({ modifier, name_it->key }));
else if (desc.char_length() == 1) else if (desc.char_length() == 1)
result.push_back(Key{ modifier, desc[0_char] }); result.push_back(Key{ modifier, desc[0_char] });
else if (tolower(desc[0_byte]) == 'f' and desc.length() <= 3) else if (to_lower(desc[0_byte]) == 'f' and desc.length() <= 3)
{ {
int val = str_to_int(desc.substr(1_byte)); int val = str_to_int(desc.substr(1_byte));
if (val >= 1 and val <= 12) if (val >= 1 and val <= 12)

View File

@ -114,7 +114,7 @@ void goto_commands(Context& context, NormalParams params)
if (not cp) if (not cp)
return; return;
auto& buffer = context.buffer(); auto& buffer = context.buffer();
switch (tolower(*cp)) switch (to_lower(*cp))
{ {
case 'g': case 'g':
case 'k': case 'k':
@ -246,7 +246,7 @@ void view_commands(Context& context, NormalParams params)
const ByteCoord cursor = context.selections().main().cursor(); const ByteCoord cursor = context.selections().main().cursor();
Window& window = context.window(); Window& window = context.window();
switch (tolower(*cp)) switch (to_lower(*cp))
{ {
case 'v': case 'v':
case 'c': case 'c':
@ -306,26 +306,32 @@ void replace_with_char(Context& context, NormalParams)
}, "replace with char", "enter char to replace with\n"); }, "replace with char", "enter char to replace with\n");
} }
Codepoint to_lower(Codepoint cp) { return tolower(cp); }
Codepoint to_upper(Codepoint cp) { return toupper(cp); }
Codepoint swap_case(Codepoint cp) Codepoint swap_case(Codepoint cp)
{ {
Codepoint res = std::tolower(cp); Codepoint res = to_lower(cp);
return res == cp ? std::toupper(cp) : res; return res == cp ? to_upper(cp) : res;
} }
template<Codepoint (*func)(Codepoint)> template<Codepoint (*func)(Codepoint)>
void for_each_char(Context& context, NormalParams) void for_each_codepoint(Context& context, NormalParams)
{ {
using Utf8It = utf8::iterator<BufferIterator, utf8::InvalidPolicy::Pass>;
ScopedEdition edition(context); ScopedEdition edition(context);
Vector<String> sels = context.selections_content(); Buffer& buffer = context.buffer();
for (auto& sel : sels) SelectionList& selections = context.selections();
Vector<String> strings;
for (auto& sel : selections)
{ {
for (auto& c : sel) String str;
c = func(c); for (auto begin = Utf8It{buffer.iterator_at(sel.min()), buffer},
end = Utf8It{buffer.iterator_at(sel.max()), buffer}+1;
begin != end; ++begin)
utf8::dump(std::back_inserter(str), func(*begin));
strings.push_back(std::move(str));
} }
context.selections().insert(sels, InsertMode::Replace); selections.insert(strings, InsertMode::Replace);
} }
void command(Context& context, NormalParams) void command(Context& context, NormalParams)
@ -612,7 +618,7 @@ void regex_prompt(Context& context, const String prompt, T func)
template<SelectMode mode, Direction direction> template<SelectMode mode, Direction direction>
void search(Context& context, NormalParams params) void search(Context& context, NormalParams params)
{ {
const char reg = tolower(params.reg ? params.reg : '/'); const char reg = to_lower(params.reg ? params.reg : '/');
regex_prompt(context, direction == Forward ? "search:" : "reverse search:", regex_prompt(context, direction == Forward ? "search:" : "reverse search:",
[reg](Regex ex, PromptEvent event, Context& context) { [reg](Regex ex, PromptEvent event, Context& context) {
if (ex.empty()) if (ex.empty())
@ -627,7 +633,7 @@ void search(Context& context, NormalParams params)
template<SelectMode mode, Direction direction> template<SelectMode mode, Direction direction>
void search_next(Context& context, NormalParams params) void search_next(Context& context, NormalParams params)
{ {
const char reg = tolower(params.reg ? params.reg : '/'); const char reg = to_lower(params.reg ? params.reg : '/');
StringView str = context.main_sel_register_value(reg); StringView str = context.main_sel_register_value(reg);
if (not str.empty()) if (not str.empty())
{ {
@ -664,7 +670,7 @@ void use_selection_as_search_pattern(Context& context, NormalParams params)
patterns.push_back(std::move(content)); patterns.push_back(std::move(content));
} }
const char reg = tolower(params.reg ? params.reg : '/'); const char reg = to_lower(params.reg ? params.reg : '/');
context.print_status({ context.print_status({
format("register '{}' set to '{}'", reg, patterns[sels.main_index()]), format("register '{}' set to '{}'", reg, patterns[sels.main_index()]),
@ -679,7 +685,7 @@ void use_selection_as_search_pattern(Context& context, NormalParams params)
void select_regex(Context& context, NormalParams params) void select_regex(Context& context, NormalParams params)
{ {
const char reg = tolower(params.reg ? params.reg : '/'); const char reg = to_lower(params.reg ? params.reg : '/');
regex_prompt(context, "select:", [reg](Regex ex, PromptEvent event, Context& context) { regex_prompt(context, "select:", [reg](Regex ex, PromptEvent event, Context& context) {
if (ex.empty()) if (ex.empty())
ex = Regex{context.main_sel_register_value(reg)}; ex = Regex{context.main_sel_register_value(reg)};
@ -692,7 +698,7 @@ void select_regex(Context& context, NormalParams params)
void split_regex(Context& context, NormalParams params) void split_regex(Context& context, NormalParams params)
{ {
const char reg = tolower(params.reg ? params.reg : '/'); const char reg = to_lower(params.reg ? params.reg : '/');
regex_prompt(context, "split:", [reg](Regex ex, PromptEvent event, Context& context) { regex_prompt(context, "split:", [reg](Regex ex, PromptEvent event, Context& context) {
if (ex.empty()) if (ex.empty())
ex = Regex{context.main_sel_register_value(reg)}; ex = Regex{context.main_sel_register_value(reg)};
@ -1074,7 +1080,7 @@ void start_or_end_macro_recording(Context& context, NormalParams params)
context.input_handler().stop_recording(); context.input_handler().stop_recording();
else else
{ {
const char reg = tolower(params.reg ? params.reg : '@'); const char reg = to_lower(params.reg ? params.reg : '@');
if (not is_basic_alpha(reg) and reg != '@') if (not is_basic_alpha(reg) and reg != '@')
throw runtime_error("Macros can only use the '@' and alphabetic registers"); throw runtime_error("Macros can only use the '@' and alphabetic registers");
context.input_handler().start_recording(reg); context.input_handler().start_recording(reg);
@ -1089,7 +1095,7 @@ void end_macro_recording(Context& context, NormalParams)
void replay_macro(Context& context, NormalParams params) void replay_macro(Context& context, NormalParams params)
{ {
const char reg = tolower(params.reg ? params.reg : '@'); const char reg = to_lower(params.reg ? params.reg : '@');
if (not is_basic_alpha(reg) and reg != '@') if (not is_basic_alpha(reg) and reg != '@')
throw runtime_error("Macros can only use the '@' and alphabetic registers"); throw runtime_error("Macros can only use the '@' and alphabetic registers");
@ -1280,7 +1286,7 @@ void spaces_to_tabs(Context& context, NormalParams params)
void save_selections(Context& context, NormalParams params) void save_selections(Context& context, NormalParams params)
{ {
const char reg = tolower(params.reg ? params.reg : '^'); const char reg = to_lower(params.reg ? params.reg : '^');
if (not is_basic_alpha(reg) and reg != '^') if (not is_basic_alpha(reg) and reg != '^')
throw runtime_error("selections can only be saved to the '^' and alphabetic registers"); throw runtime_error("selections can only be saved to the '^' and alphabetic registers");
@ -1296,7 +1302,7 @@ void save_selections(Context& context, NormalParams params)
void restore_selections(Context& context, NormalParams params) void restore_selections(Context& context, NormalParams params)
{ {
const char reg = tolower(params.reg ? params.reg : '^'); const char reg = to_lower(params.reg ? params.reg : '^');
if (not is_basic_alpha(reg) and reg != '^') if (not is_basic_alpha(reg) and reg != '^')
throw runtime_error("selections can only be saved to the '^' and alphabetic registers"); throw runtime_error("selections can only be saved to the '^' and alphabetic registers");
@ -1617,9 +1623,9 @@ static NormalCmdDesc cmds[] =
{ Key::Escape, "end macro recording", end_macro_recording }, { Key::Escape, "end macro recording", end_macro_recording },
{ '`', "convert to lower case in selections", for_each_char<to_lower> }, { '`', "convert to lower case in selections", for_each_codepoint<to_lower> },
{ '~', "convert to upper case in selections", for_each_char<to_upper> }, { '~', "convert to upper case in selections", for_each_codepoint<to_upper> },
{ alt('`'), "swap case in selections", for_each_char<swap_case> }, { alt('`'), "swap case in selections", for_each_codepoint<swap_case> },
{ '&', "align selection cursors", align }, { '&', "align selection cursors", align },
{ alt('&'), "copy indentation", copy_indent }, { alt('&'), "copy indentation", copy_indent },

View File

@ -24,7 +24,7 @@ static int count_word_boundaries_match(StringView candidate, StringView query)
if (not is_word_boundary) if (not is_word_boundary)
continue; continue;
const Codepoint lc = tolower(c); const Codepoint lc = to_lower(c);
for (; qit != query.end(); ++qit) for (; qit != query.end(); ++qit)
{ {
const Codepoint qc = *qit; const Codepoint qc = *qit;
@ -43,7 +43,7 @@ static int count_word_boundaries_match(StringView candidate, StringView query)
static bool smartcase_eq(Codepoint query, Codepoint candidate) static bool smartcase_eq(Codepoint query, Codepoint candidate)
{ {
return query == (islower(query) ? tolower(candidate) : candidate); return query == (islower(query) ? to_lower(candidate) : candidate);
} }
static bool subsequence_match_smart_case(StringView str, StringView subseq) static bool subsequence_match_smart_case(StringView str, StringView subseq)

View File

@ -76,7 +76,7 @@ Register& RegisterManager::operator[](StringView reg)
Register& RegisterManager::operator[](Codepoint c) Register& RegisterManager::operator[](Codepoint c)
{ {
c = tolower(c); c = to_lower(c);
if (c < 32 or c > 127) if (c < 32 or c > 127)
throw runtime_error(format("invalid register name: '{}'", c)); throw runtime_error(format("invalid register name: '{}'", c));

View File

@ -63,6 +63,12 @@ inline CharCategories categorize(Codepoint c)
: CharCategories::Punctuation; : CharCategories::Punctuation;
} }
inline Codepoint to_lower(Codepoint cp) { return towlower((wchar_t)cp); }
inline Codepoint to_upper(Codepoint cp) { return towupper((wchar_t)cp); }
inline char to_lower(char c) { return c >= 'A' and c <= 'Z' ? c - 'A' + 'a' : c; }
inline char to_upper(char c) { return c >= 'a' and c <= 'z' ? c - 'a' + 'A' : c; }
} }
#endif // unicode_hh_INCLUDED #endif // unicode_hh_INCLUDED