← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba

 

Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/bug-994610 into lp:zorba.

Requested reviews:
  Ghislain Fourny (gislenius)
  Paul J. Lucas (paul-lucas)
Related bugs:
  Bug #974477 in Zorba: "Invalid regex character class not reported with ICU"
  https://bugs.launchpad.net/zorba/+bug/974477
  Bug #994610 in Zorba: "Regex tests "fail" using latest ICU version"
  https://bugs.launchpad.net/zorba/+bug/994610

For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/bug-994610/+merge/104769

1. Added fix for [\s] -- should now always throw FORX0002.
2. I think all regex tests "pass" using both pre- and post-4.0 of ICU.
-- 
https://code.launchpad.net/~paul-lucas/zorba/bug-994610/+merge/104769
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/util/regex.cpp'
--- src/util/regex.cpp	2012-05-03 12:31:51 +0000
+++ src/util/regex.cpp	2012-05-04 16:10:25 +0000
@@ -123,74 +123,104 @@
         continue;
       }
       got_backslash = false;
-      switch ( *xq_c ) {
-        case 'c': // NameChar
-          *icu_re += "[" bs_c "]";
-          continue;
-        case 'C': // [^\c]
-          *icu_re += "[^" bs_c "]";
-          continue;
-        case 'i': // initial NameChar
-          *icu_re += "[" bs_i "]";
-          continue;
-        case 'I': // [^\i]
-          *icu_re += "[^" bs_i "]";
-          continue;
-        case '0':
-        case '1':
-        case '2':
-        case '3':
-        case '4':
-        case '5':
-        case '6':
-        case '7':
-        case '8':
-        case '9':
-          backref_no = *xq_c - '0';
-          if ( !backref_no )          // \0 is illegal
-            throw INVALID_RE_EXCEPTION( xq_re, ZED( BackRef0Illegal ) );
-          if ( in_char_class ) {
-            //
-            // XQuery 3.0 F&O 5.6.1: Within a character class expression,
-            // \ followed by a digit is invalid.
-            //
-            throw INVALID_RE_EXCEPTION(
-              xq_re, ZED( BackRefIllegalInCharClass )
-            );
-          }
-          in_backref = true;
-          // no break;
-        case '$':
-        case '(':
-        case ')':
-        case '*':
-        case '+':
-        case '-':
-        case '.':
-        case '?':
-        case 'd': // [0-9]
-        case 'D': // [^\d]
-        case 'n': // newline
-        case 'p': // category escape
-        case 'P': // [^\p]
-        case 'r': // carriage return
-        case 's': // whitespace
-        case 'S': // [^\s]
-        case 't': // tab
-        case 'w': // word char
-        case 'W': // [^\w]
-        case '[':
-        case '\\':
-        case ']':
-        case '^':
-        case '{':
-        case '|':
-        case '}':
-          *icu_re += '\\';
-          break;
-        default:
-          throw INVALID_RE_EXCEPTION( xq_re, ZED( BadRegexEscape_3 ), *xq_c );
-      }
+
+      if ( in_char_class ) {
+        //
+        // When in a character class, only single-character escapes are
+        // permitted.
+        //
+        switch ( *xq_c ) {
+          case '(':
+          case ')':
+          case '*':
+          case '+':
+          case '-':
+          case '.':
+          case 'n': // newline
+          case 'r': // carriage return
+          case 't': // tab
+          case '[':
+          case '\\':
+          case ']':
+          case '^':
+          case '{':
+          case '|':
+          case '}':
+            *icu_re += '\\';
+            break;
+          default:
+            throw INVALID_RE_EXCEPTION( xq_re, ZED( BadRegexEscape_3 ), *xq_c );
+        }
+      } else {
+        switch ( *xq_c ) {
+          case 'c': // NameChar
+            *icu_re += "[" bs_c "]";
+            continue;
+          case 'C': // [^\c]
+            *icu_re += "[^" bs_c "]";
+            continue;
+          case 'i': // initial NameChar
+            *icu_re += "[" bs_i "]";
+            continue;
+          case 'I': // [^\i]
+            *icu_re += "[^" bs_i "]";
+            continue;
+          case '0':
+          case '1':
+          case '2':
+          case '3':
+          case '4':
+          case '5':
+          case '6':
+          case '7':
+          case '8':
+          case '9':
+            backref_no = *xq_c - '0';
+            if ( !backref_no )          // \0 is illegal
+              throw INVALID_RE_EXCEPTION( xq_re, ZED( BackRef0Illegal ) );
+            if ( in_char_class ) {
+              //
+              // XQuery 3.0 F&O 5.6.1: Within a character class expression,
+              // \ followed by a digit is invalid.
+              //
+              throw INVALID_RE_EXCEPTION(
+                xq_re, ZED( BackRefIllegalInCharClass )
+              );
+            }
+            in_backref = true;
+            // no break;
+          case '$':
+          case '(':
+          case ')':
+          case '*':
+          case '+':
+          case '-':
+          case '.':
+          case '?':
+          case 'd': // [0-9]
+          case 'D': // [^\d]
+          case 'n': // newline
+          case 'p': // category escape
+          case 'P': // [^\p]
+          case 'r': // carriage return
+          case 's': // whitespace
+          case 'S': // [^\s]
+          case 't': // tab
+          case 'w': // word char
+          case 'W': // [^\w]
+          case '[':
+          case '\\':
+          case ']':
+          case '^':
+          case '{':
+          case '|':
+          case '}':
+            *icu_re += '\\';
+            break;
+          default:
+            throw INVALID_RE_EXCEPTION( xq_re, ZED( BadRegexEscape_3 ), *xq_c );
+        }
+      } // if ( in_char_class )
     } else {
       if ( in_backref ) {
         //

=== modified file 'test/rbkt/Queries/CMakeLists.txt'
--- test/rbkt/Queries/CMakeLists.txt	2012-05-03 12:31:51 +0000
+++ test/rbkt/Queries/CMakeLists.txt	2012-05-04 16:10:25 +0000
@@ -534,11 +534,13 @@
 EXPECTED_FAILURE(test/rbkt/zorba/http-client/post/post3_binary_element 3391756)
 IF(NOT ZORBA_NO_ICU)
   IF ( ${ICU_VERSION} VERSION_LESS 4.0.0 )
-    EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err17 974477)
+    EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m40 866874)
+    EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m41 866874)
+  ELSE ( ${ICU_VERSION} VERSION_LESS 4.0.0 )
+    EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err10 994610)
+    EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err15 866874)
   ENDIF ( ${ICU_VERSION} VERSION_LESS 4.0.0 )
   EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m11 866874)
-  EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m40 866874)
-  EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m41 866874)
 ENDIF(NOT ZORBA_NO_ICU)
 
 IF(ZORBA_NO_ICU)


Follow ups