← Back to team overview

widelands-dev team mailing list archive

[Merge] lp:~widelands-dev/widelands/bug-1572879-broken_bidi into lp:widelands

 

Miroslav Remák has proposed merging lp:~widelands-dev/widelands/bug-1572879-broken_bidi into lp:widelands.

Commit message:
Specify encoding of icu::UnicodeString input

Requested reviews:
  Widelands Developers (widelands-dev)
Related bugs:
  Bug #1572879 in widelands: "Font selection and BiDi broken in Windows"
  https://bugs.launchpad.net/widelands/+bug/1572879

For more details, see:
https://code.launchpad.net/~widelands-dev/widelands/bug-1572879-broken_bidi/+merge/293491

This has been causing all sorts of issues, from incorrect RTL and script detection to messing up texts beyond recognition.
-- 
Your team Widelands Developers is requested to review the proposed merge of lp:~widelands-dev/widelands/bug-1572879-broken_bidi into lp:widelands.
=== modified file 'src/graphic/text/bidi.cc'
--- src/graphic/text/bidi.cc	2016-02-03 22:42:34 +0000
+++ src/graphic/text/bidi.cc	2016-05-02 02:37:45 +0000
@@ -572,7 +572,7 @@
 // True if a string does not contain Latin characters.
 // Checks for the first 'limit' characters maximum.
 bool has_rtl_character(const char* input, int32_t limit) {
-	const icu::UnicodeString parseme(input);
+	const icu::UnicodeString parseme(input, "UTF-8");
 	for (int32_t i = 0; i < parseme.length() && i < limit; ++i) {
 		if (is_rtl_character(parseme.char32At(i))) {
 			return true;
@@ -598,7 +598,7 @@
 	if (!has_script_character(input, UI::FontSets::Selector::kArabic)) {
 		return input;
 	}
-	const icu::UnicodeString parseme(input);
+	const icu::UnicodeString parseme(input, "UTF-8");
 	icu::UnicodeString queue;
 	UChar not_a_character = 0xFFFF;
 	UChar next = not_a_character;
@@ -678,7 +678,7 @@
 // BiDi support for RTL languages
 // This turns the logical order of the glyphs into the display order.
 std::string line2bidi(const char* input) {
-	const icu::UnicodeString parseme(input);
+	const icu::UnicodeString parseme(input, "UTF-8");
 	icu::UnicodeString stack;
 	icu::UnicodeString temp_stack;
 	UChar not_a_character = 0xFFFF;
@@ -734,7 +734,7 @@
 
 // True if a string contains a character from the script's code blocks
 bool has_script_character(const char* input, UI::FontSets::Selector script) {
-	const icu::UnicodeString parseme(input);
+	const icu::UnicodeString parseme(input, "UTF-8");
 	for (int32_t i = 0; i < parseme.length(); ++i) {
 		if (is_script_character(parseme.char32At(i), script)) {
 			return true;
@@ -766,7 +766,7 @@
 
 //  Split a string of CJK characters into units that can have line breaks between them.
 std::vector<std::string> split_cjk_word(const char* input) {
-	const icu::UnicodeString parseme(input);
+	const icu::UnicodeString parseme(input, "UTF-8");
 	std::vector<std::string> result;
 	for (int i = 0; i < parseme.length(); ++i) {
 		icu::UnicodeString temp;

=== modified file 'src/graphic/text_layout.cc'
--- src/graphic/text_layout.cc	2016-03-30 08:38:59 +0000
+++ src/graphic/text_layout.cc	2016-05-02 02:37:45 +0000
@@ -179,7 +179,7 @@
 uint32_t TextStyle::calc_width_for_wrapping(const std::string & text) const
 {
 	int result = 0;
-	const icu::UnicodeString parseme(text.c_str());
+	const icu::UnicodeString parseme(text.c_str(), "UTF-8");
 	for (int i = 0; i < parseme.length(); ++i) {
 		UChar c = parseme.charAt(i);
 		if (!i18n::is_diacritic(c)) {

=== modified file 'src/graphic/wordwrap.cc'
--- src/graphic/wordwrap.cc	2016-03-14 19:49:52 +0000
+++ src/graphic/wordwrap.cc	2016-05-02 02:37:45 +0000
@@ -191,7 +191,7 @@
 
 	// The line didn't fit.
 	// We just do a linear search ahead until we hit the max.
-	const icu::UnicodeString unicode_word(text.substr(line_start, orig_end).c_str());
+	const icu::UnicodeString unicode_word(text.substr(line_start, orig_end).c_str(), "UTF-8");
 	uint32_t line_width = 0;
 	int32_t end = -1;
 	icu::UnicodeString unicode_line;


Follow ups