zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #04241
[Merge] lp:~paul-lucas/zorba/bug-924063 into lp:zorba
Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/bug-924063 into lp:zorba.
Requested reviews:
Paul J. Lucas (paul-lucas)
Matthias Brantner (matthias-brantner)
Related bugs:
Bug #924063 in Zorba: "Sentence is incorrectly incremented when token characters end without sentence terminator, take 2"
https://bugs.launchpad.net/zorba/+bug/924063
For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/bug-924063/+merge/90887
Real fix this time.
--
https://code.launchpad.net/~paul-lucas/zorba/bug-924063/+merge/90887
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/full_text/icu_tokenizer.cpp'
--- src/runtime/full_text/icu_tokenizer.cpp 2011-12-21 14:40:33 +0000
+++ src/runtime/full_text/icu_tokenizer.cpp 2012-01-31 15:33:31 +0000
@@ -357,7 +357,7 @@
t.append( utf8_buf, utf8_len );
else {
# if DEBUG_TOKENIZER
- cout << "setting token" << endl;
+ cout << " setting token" << endl;
# endif
t.set(
utf8_buf, utf8_len, numbers().token, numbers().sent, numbers().para
@@ -369,15 +369,24 @@
next:
# if DEBUG_TOKENIZER
cout << "at next" << endl;
+ cout << " word_start = " << word_start << endl;
+ cout << " word_end = " << word_end << endl;
+ cout << " sent_end = " << sent_end << endl;
# endif
word_start = word_end, word_end = word_it_->next();
+# if DEBUG_TOKENIZER
+ cout << " word_start = " << word_start << endl;
+ cout << " word_end = " << word_end << endl;
+# endif
+
if ( word_end >= sent_end && sent_end != BreakIterator::DONE ) {
sent_end = sent_it_->next();
+# if DEBUG_TOKENIZER
+ cout << " sent_end = " << sent_end << endl;
+# endif
// The addition of the "if" fixes:
// https://bugs.launchpad.net/bugs/863320
-#if 0
if ( sent_end != BreakIterator::DONE )
-#endif
++numbers().sent;
}
} // while
@@ -389,9 +398,7 @@
t.send( payload, callback );
// Incrementing "sent" here fixes:
// https://bugs.launchpad.net/bugs/897800
-#if 0
++numbers().sent;
-#endif
#if DEBUG_TOKENIZER
cout << "--------------------\n";
#endif /* DEBUG_TOKENIZER */
=== modified file 'test/rbkt/Queries/CMakeLists.txt'
--- test/rbkt/Queries/CMakeLists.txt 2012-01-25 11:57:01 +0000
+++ test/rbkt/Queries/CMakeLists.txt 2012-01-31 15:33:31 +0000
@@ -539,7 +539,3 @@
EXPECTED_FAILURE(test/rbkt/zorba/windowing/tumbling20 899364)
EXPECTED_FAILURE(test/rbkt/zorba/windowing/tumbling17 899364)
EXPECTED_FAILURE(test/rbkt/zorba/windowing/tumbling21 899364)
-
-IF (NOT ZORBA_NO_FULL_TEXT)
- EXPECTED_FAILURE(test/rbkt/zorba/fulltext/ft-same-sentence-true-4 900552)
-ENDIF (NOT ZORBA_NO_FULL_TEXT)
=== modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-false-2.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-false-2.xq 2011-12-21 14:40:33 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-false-2.xq 2012-01-31 15:33:31 +0000
@@ -1,2 +1,2 @@
-let $x := <msg>hello. world</msg>
+let $x := <msg>Hello. World</msg>
return $x contains text "hello" ftand "world" same sentence
=== modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-2.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-2.xq 2011-06-14 14:21:49 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-2.xq 2012-01-31 15:33:31 +0000
@@ -1,2 +1,2 @@
-let $x := <msg>hello world.</msg>
+let $x := <msg>Hello world.</msg>
return $x contains text "hello" ftand "world" same sentence
=== modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-3.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-3.xq 2011-06-14 14:21:49 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-3.xq 2012-01-31 15:33:31 +0000
@@ -1,3 +1,3 @@
-let $x := <msg>hello
+let $x := <msg>Hello
world.</msg>
return $x contains text "hello" ftand "world" same sentence
=== modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-4.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-4.xq 2011-09-30 14:31:41 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-4.xq 2012-01-31 15:33:31 +0000
@@ -1,2 +1,2 @@
-let $x := <msg>hello world</msg>
+let $x := <msg>Hello world</msg>
return $x contains text "hello" ftand "world" same sentence
Follow ups