← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~paul-lucas/zorba/bug-1131984 into lp:zorba

 

Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/bug-1131984 into lp:zorba.

Commit message:
Fixed bug 1131984.

Requested reviews:
  Paul J. Lucas (paul-lucas)
Related bugs:
  Bug #1131984 in Zorba: "apparently invalid regex in queries"
  https://bugs.launchpad.net/zorba/+bug/1131984

For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/bug-1131984/+merge/159750

Fixed bug 1131984.
-- 
https://code.launchpad.net/~paul-lucas/zorba/bug-1131984/+merge/159750
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'ChangeLog'
--- ChangeLog	2013-04-18 04:19:56 +0000
+++ ChangeLog	2013-04-19 04:11:29 +0000
@@ -48,6 +48,7 @@
   * Fixed bug #1085408 (xs:date(): casting large year values)
   * Fixed bug #867027 (XQST0059 error messages inconsistent)
   * Fixed bug #1095889 (Improve error message for xml-parsing error).
+  * Fixed bug #1131984 (apparently invalid regex in queries)
   * Fixed bug #1123163 (fn:format-integer failures)
   * Fixed bug in index join rule (no index join if inner clause has positional var).
   * Fixed bug in index join rule (copy var ids after cloning index domain expr).

=== modified file 'src/util/icu_regex.cpp'
--- src/util/icu_regex.cpp	2013-04-18 03:26:36 +0000
+++ src/util/icu_regex.cpp	2013-04-19 04:11:29 +0000
@@ -57,12 +57,20 @@
 #define bs_i "\\p{L}_:"                 /* \i equivalent contents */
 #define bs_W "\\p{P}\\p{Z}\\p{C}"       /* \W equivalent contents */
 
+/**
+ * Decremements an integer, but no lower that a certain limit (usually zero).
+ *
+ * @tparam IntegralType The integral type.
+ * @param i A pointer to the integer to decrement.
+ * @param limit The limit not to go lower than.
+ * @return Returns \c true only when the limit has been reached for the first
+ * time.
+ */
 template<typename IntegralType> inline
 typename std::enable_if<ZORBA_TR1_NS::is_integral<IntegralType>::value,
-                        void>::type
+                        bool>::type
 dec_limit( IntegralType *i, IntegralType limit = 0 ) {
-  if ( *i > limit )
-    --*i;
+  return *i > limit && --*i == limit;
 }
 
 static unsigned digits( long n ) {
@@ -108,13 +116,21 @@
   return icu_flags;
 }
 
+/**
+ * Checks whether the given iterator is positioned at the first character in a
+ * character range, e.g, the 'a' in "[a-z]" (assuming we're already within a
+ * character class [...]).
+ *
+ * @param s The string on which \a i is iterating.
+ * @param i The iterator marking the position of the character to check.
+ */
 inline bool is_char_range_begin( zstring const &s,
-                                 zstring::const_iterator i ) {
+  /* intentionally not const& */ zstring::const_iterator i ) {
   return ztd::peek( s, &i ) == '-' && ztd::peek( s, &i ) != '[';
 }
 
 inline bool is_non_capturing_begin( zstring const &s,
-                                    zstring::const_iterator i ) {
+  /* intentionally not const& */    zstring::const_iterator i ) {
   return ztd::peek_behind( s, &i ) == '?' && ztd::peek_behind( s, &i ) == '(';
 }
 
@@ -138,6 +154,7 @@
   char c_cooked;                        // current cooked XQuery char
   char prev_c_cooked = 0;               // previous c_cooked
   char char_range_begin_cooked = 0;     // the 'a' in [a-b]
+  bool char_range_possible = true;      // handles case like [a-h-o-z]
 
   bool got_backslash = false;
   int  got_quantifier = 0;
@@ -437,11 +454,15 @@
               // XQuery [A-Z-[OI]] becomes ICU [A-Z--[OI]].
               //
               *icu_re += '-';
-            } else if ( prev_c_cooked != '[' && next_c != ']' ) {
+            } else if ( char_range_possible &&
+                        prev_c_cooked != '[' && next_c != ']' ) {
               //
               // The '-' is neither the first or last character within a
               // character range (i.e., a literal '-') so therefore it's
-              // indicating a character range.
+              // indicating a character range -- except if we just completed a
+              // character range.  For example, in "[a-h-o-z]", there are two
+              // ranges: a-h and o-z.  The '-' between the 'h' and the 'o' is a
+              // literal '-' and NOT a range h-o.
               //
               char_range_begin_cooked = prev_c_cooked;
               in_char_range = 2;
@@ -536,7 +557,7 @@
     *icu_re += c;
 
 next:
-    dec_limit( &in_char_range );
+    char_range_possible = !dec_limit( &in_char_range );
     dec_limit( &got_quantifier );
     dec_limit( &is_first_char );
     prev_c_cooked = c_cooked;

=== modified file 'test/fots/CMakeLists.txt'
--- test/fots/CMakeLists.txt	2013-04-19 00:21:18 +0000
+++ test/fots/CMakeLists.txt	2013-04-19 04:11:29 +0000
@@ -125,8 +125,6 @@
 EXPECTED_FOTS_FAILURE (DISPUTED fn-format-date format-date-en132 21423)
 EXPECTED_FOTS_FAILURE (DISPUTED fn-format-date format-date-en133 21423)
 EXPECTED_FOTS_FAILURE (DISPUTED fn-format-date format-date-en134 21423)
-EXPECTED_FOTS_FAILURE (DISPUTED fn-matches.re re00056 21425)
-EXPECTED_FOTS_FAILURE (DISPUTED fn-matches.re re00086 21425)
 EXPECTED_FOTS_FAILURE (DISPUTED fn-format-integer format-integer-044 21448)
 EXPECTED_FOTS_FAILURE (DISPUTED fn-format-date format-date-en152 21558)
 EXPECTED_FOTS_FAILURE (DISPUTED fn-format-dateTime format-dateTime-en152 21558)


Follow ups