widelands-dev team mailing list archive
-
widelands-dev team
-
Mailing list archive
-
Message #07453
[Merge] lp:~widelands-dev/widelands/bug-1572879-broken_bidi into lp:widelands
Miroslav Remák has proposed merging lp:~widelands-dev/widelands/bug-1572879-broken_bidi into lp:widelands.
Commit message:
Specify encoding of icu::UnicodeString input
Requested reviews:
Widelands Developers (widelands-dev)
Related bugs:
Bug #1572879 in widelands: "Font selection and BiDi broken in Windows"
https://bugs.launchpad.net/widelands/+bug/1572879
For more details, see:
https://code.launchpad.net/~widelands-dev/widelands/bug-1572879-broken_bidi/+merge/293491
This has been causing all sorts of issues, from incorrect RTL and script detection to messing up texts beyond recognition.
--
Your team Widelands Developers is requested to review the proposed merge of lp:~widelands-dev/widelands/bug-1572879-broken_bidi into lp:widelands.
=== modified file 'src/graphic/text/bidi.cc'
--- src/graphic/text/bidi.cc 2016-02-03 22:42:34 +0000
+++ src/graphic/text/bidi.cc 2016-05-02 02:37:45 +0000
@@ -572,7 +572,7 @@
// True if a string does not contain Latin characters.
// Checks for the first 'limit' characters maximum.
bool has_rtl_character(const char* input, int32_t limit) {
- const icu::UnicodeString parseme(input);
+ const icu::UnicodeString parseme(input, "UTF-8");
for (int32_t i = 0; i < parseme.length() && i < limit; ++i) {
if (is_rtl_character(parseme.char32At(i))) {
return true;
@@ -598,7 +598,7 @@
if (!has_script_character(input, UI::FontSets::Selector::kArabic)) {
return input;
}
- const icu::UnicodeString parseme(input);
+ const icu::UnicodeString parseme(input, "UTF-8");
icu::UnicodeString queue;
UChar not_a_character = 0xFFFF;
UChar next = not_a_character;
@@ -678,7 +678,7 @@
// BiDi support for RTL languages
// This turns the logical order of the glyphs into the display order.
std::string line2bidi(const char* input) {
- const icu::UnicodeString parseme(input);
+ const icu::UnicodeString parseme(input, "UTF-8");
icu::UnicodeString stack;
icu::UnicodeString temp_stack;
UChar not_a_character = 0xFFFF;
@@ -734,7 +734,7 @@
// True if a string contains a character from the script's code blocks
bool has_script_character(const char* input, UI::FontSets::Selector script) {
- const icu::UnicodeString parseme(input);
+ const icu::UnicodeString parseme(input, "UTF-8");
for (int32_t i = 0; i < parseme.length(); ++i) {
if (is_script_character(parseme.char32At(i), script)) {
return true;
@@ -766,7 +766,7 @@
// Split a string of CJK characters into units that can have line breaks between them.
std::vector<std::string> split_cjk_word(const char* input) {
- const icu::UnicodeString parseme(input);
+ const icu::UnicodeString parseme(input, "UTF-8");
std::vector<std::string> result;
for (int i = 0; i < parseme.length(); ++i) {
icu::UnicodeString temp;
=== modified file 'src/graphic/text_layout.cc'
--- src/graphic/text_layout.cc 2016-03-30 08:38:59 +0000
+++ src/graphic/text_layout.cc 2016-05-02 02:37:45 +0000
@@ -179,7 +179,7 @@
uint32_t TextStyle::calc_width_for_wrapping(const std::string & text) const
{
int result = 0;
- const icu::UnicodeString parseme(text.c_str());
+ const icu::UnicodeString parseme(text.c_str(), "UTF-8");
for (int i = 0; i < parseme.length(); ++i) {
UChar c = parseme.charAt(i);
if (!i18n::is_diacritic(c)) {
=== modified file 'src/graphic/wordwrap.cc'
--- src/graphic/wordwrap.cc 2016-03-14 19:49:52 +0000
+++ src/graphic/wordwrap.cc 2016-05-02 02:37:45 +0000
@@ -191,7 +191,7 @@
// The line didn't fit.
// We just do a linear search ahead until we hit the max.
- const icu::UnicodeString unicode_word(text.substr(line_start, orig_end).c_str());
+ const icu::UnicodeString unicode_word(text.substr(line_start, orig_end).c_str(), "UTF-8");
uint32_t line_width = 0;
int32_t end = -1;
icu::UnicodeString unicode_line;
Follow ups