← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~paul-lucas/zorba/bug-892532 into lp:zorba

 

Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/bug-892532 into lp:zorba.

Requested reviews:
  Paul J. Lucas (paul-lucas)
  Matthias Brantner (matthias-brantner)
Related bugs:
  Bug #892532 in Zorba: "Strict-aliasing warnings"
  https://bugs.launchpad.net/zorba/+bug/892532

For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/bug-892532/+merge/82828

Removed warnings.
-- 
https://code.launchpad.net/~paul-lucas/zorba/bug-892532/+merge/82828
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/full_text/icu_tokenizer.cpp'
--- src/runtime/full_text/icu_tokenizer.cpp	2011-10-03 09:18:49 +0000
+++ src/runtime/full_text/icu_tokenizer.cpp	2011-11-20 19:05:29 +0000
@@ -196,14 +196,14 @@
       *c = L' ';
   }
 
-  // This UnicodeString wraps the existing buffer: no copy is made.
-  UnicodeString const utf16_s( false, utf16_buf, utf16_len );
+  // This unicode::string wraps the existing buffer: no copy is made.
+  unicode::string const utf16_s( false, utf16_buf, utf16_len );
 
   word_->setText( utf16_s );
-  int32_t word_start = word_->first(), word_end = word_->next();
+  unicode::size_type word_start = word_->first(), word_end = word_->next();
 
   sent_->setText( utf16_s );
-  int32_t sent_start = sent_->first(), sent_end = sent_->next();
+  unicode::size_type sent_end = sent_->first(); sent_end = sent_->next();
 
   temp_token t;
 
@@ -230,7 +230,7 @@
     zstring_b utf8_word;
     utf8_word.wrap_memory( utf8_buf, utf8_len );
 
-    int32_t const rule_status = word_->getRuleStatus();
+    unicode::size_type const rule_status = word_->getRuleStatus();
 
     //
     // "Junk" tokens are whitespace and punctuation -- except some punctuation
@@ -364,7 +364,7 @@
 next:
     word_start = word_end, word_end = word_->next();
     if ( word_end >= sent_end && sent_end != BreakIterator::DONE ) {
-      sent_start = sent_end, sent_end = sent_->next();
+      sent_end = sent_->next();
       if ( sent_end != BreakIterator::DONE )
         ++numbers().sent;
     }

=== modified file 'src/runtime/full_text/thesauri/wn_synset.cpp'
--- src/runtime/full_text/thesauri/wn_synset.cpp	2011-06-24 19:58:33 +0000
+++ src/runtime/full_text/thesauri/wn_synset.cpp	2011-11-20 19:05:29 +0000
@@ -84,9 +84,11 @@
 }
 
 synset::mem_ptr_type* synset::skip_lemmas( size_type n, mem_ptr_type *pptr ) {
-  unsigned char const *&u = *reinterpret_cast<unsigned char const**>( pptr );
+  //
+  // This custom code is faster than using decode_base128() here.
+  //
   while ( n-- > 0 )
-    while ( *u++ & 0x80 )               // faster than decode_base128()
+    while ( static_cast<unsigned char>( *(*pptr)++ ) & 0x80 )
       ;
   return pptr;
 }

=== modified file 'src/util/unicode_util.h'
--- src/util/unicode_util.h	2011-07-18 14:25:21 +0000
+++ src/util/unicode_util.h	2011-11-20 19:05:29 +0000
@@ -45,6 +45,10 @@
  */
 typedef UChar32 code_point;
 
+/**
+ * The type that represents the size of a string.  Do not assume that this is
+ * an unsigned type.
+ */
 typedef int32_t size_type;
 
 /**


Follow ups