zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #09170
[Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/bug-994610 into lp:zorba.
Requested reviews:
Ghislain Fourny (gislenius)
Paul J. Lucas (paul-lucas)
Related bugs:
Bug #974477 in Zorba: "Invalid regex character class not reported with ICU"
https://bugs.launchpad.net/zorba/+bug/974477
Bug #994610 in Zorba: "Regex tests "fail" using latest ICU version"
https://bugs.launchpad.net/zorba/+bug/994610
For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/bug-994610/+merge/104769
1. Added fix for [\s] -- should now always throw FORX0002.
2. I think all regex tests "pass" using both pre- and post-4.0 of ICU.
--
https://code.launchpad.net/~paul-lucas/zorba/bug-994610/+merge/104769
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/util/regex.cpp'
--- src/util/regex.cpp 2012-05-03 12:31:51 +0000
+++ src/util/regex.cpp 2012-05-04 16:10:25 +0000
@@ -123,74 +123,104 @@
continue;
}
got_backslash = false;
- switch ( *xq_c ) {
- case 'c': // NameChar
- *icu_re += "[" bs_c "]";
- continue;
- case 'C': // [^\c]
- *icu_re += "[^" bs_c "]";
- continue;
- case 'i': // initial NameChar
- *icu_re += "[" bs_i "]";
- continue;
- case 'I': // [^\i]
- *icu_re += "[^" bs_i "]";
- continue;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- backref_no = *xq_c - '0';
- if ( !backref_no ) // \0 is illegal
- throw INVALID_RE_EXCEPTION( xq_re, ZED( BackRef0Illegal ) );
- if ( in_char_class ) {
- //
- // XQuery 3.0 F&O 5.6.1: Within a character class expression,
- // \ followed by a digit is invalid.
- //
- throw INVALID_RE_EXCEPTION(
- xq_re, ZED( BackRefIllegalInCharClass )
- );
- }
- in_backref = true;
- // no break;
- case '$':
- case '(':
- case ')':
- case '*':
- case '+':
- case '-':
- case '.':
- case '?':
- case 'd': // [0-9]
- case 'D': // [^\d]
- case 'n': // newline
- case 'p': // category escape
- case 'P': // [^\p]
- case 'r': // carriage return
- case 's': // whitespace
- case 'S': // [^\s]
- case 't': // tab
- case 'w': // word char
- case 'W': // [^\w]
- case '[':
- case '\\':
- case ']':
- case '^':
- case '{':
- case '|':
- case '}':
- *icu_re += '\\';
- break;
- default:
- throw INVALID_RE_EXCEPTION( xq_re, ZED( BadRegexEscape_3 ), *xq_c );
- }
+
+ if ( in_char_class ) {
+ //
+ // When in a character class, only single-character escapes are
+ // permitted.
+ //
+ switch ( *xq_c ) {
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case '-':
+ case '.':
+ case 'n': // newline
+ case 'r': // carriage return
+ case 't': // tab
+ case '[':
+ case '\\':
+ case ']':
+ case '^':
+ case '{':
+ case '|':
+ case '}':
+ *icu_re += '\\';
+ break;
+ default:
+ throw INVALID_RE_EXCEPTION( xq_re, ZED( BadRegexEscape_3 ), *xq_c );
+ }
+ } else {
+ switch ( *xq_c ) {
+ case 'c': // NameChar
+ *icu_re += "[" bs_c "]";
+ continue;
+ case 'C': // [^\c]
+ *icu_re += "[^" bs_c "]";
+ continue;
+ case 'i': // initial NameChar
+ *icu_re += "[" bs_i "]";
+ continue;
+ case 'I': // [^\i]
+ *icu_re += "[^" bs_i "]";
+ continue;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ backref_no = *xq_c - '0';
+ if ( !backref_no ) // \0 is illegal
+ throw INVALID_RE_EXCEPTION( xq_re, ZED( BackRef0Illegal ) );
+ if ( in_char_class ) {
+ //
+ // XQuery 3.0 F&O 5.6.1: Within a character class expression,
+ // \ followed by a digit is invalid.
+ //
+ throw INVALID_RE_EXCEPTION(
+ xq_re, ZED( BackRefIllegalInCharClass )
+ );
+ }
+ in_backref = true;
+ // no break;
+ case '$':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case '-':
+ case '.':
+ case '?':
+ case 'd': // [0-9]
+ case 'D': // [^\d]
+ case 'n': // newline
+ case 'p': // category escape
+ case 'P': // [^\p]
+ case 'r': // carriage return
+ case 's': // whitespace
+ case 'S': // [^\s]
+ case 't': // tab
+ case 'w': // word char
+ case 'W': // [^\w]
+ case '[':
+ case '\\':
+ case ']':
+ case '^':
+ case '{':
+ case '|':
+ case '}':
+ *icu_re += '\\';
+ break;
+ default:
+ throw INVALID_RE_EXCEPTION( xq_re, ZED( BadRegexEscape_3 ), *xq_c );
+ }
+ } // if ( in_char_class )
} else {
if ( in_backref ) {
//
=== modified file 'test/rbkt/Queries/CMakeLists.txt'
--- test/rbkt/Queries/CMakeLists.txt 2012-05-03 12:31:51 +0000
+++ test/rbkt/Queries/CMakeLists.txt 2012-05-04 16:10:25 +0000
@@ -534,11 +534,13 @@
EXPECTED_FAILURE(test/rbkt/zorba/http-client/post/post3_binary_element 3391756)
IF(NOT ZORBA_NO_ICU)
IF ( ${ICU_VERSION} VERSION_LESS 4.0.0 )
- EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err17 974477)
+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m40 866874)
+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m41 866874)
+ ELSE ( ${ICU_VERSION} VERSION_LESS 4.0.0 )
+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err10 994610)
+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err15 866874)
ENDIF ( ${ICU_VERSION} VERSION_LESS 4.0.0 )
EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m11 866874)
- EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m40 866874)
- EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m41 866874)
ENDIF(NOT ZORBA_NO_ICU)
IF(ZORBA_NO_ICU)
Follow ups
-
[Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
From: noreply, 2012-05-08
-
[Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
From: Zorba Build Bot, 2012-05-08
-
[Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
From: Zorba Build Bot, 2012-05-08
-
[Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
From: Paul J. Lucas, 2012-05-08
-
Re: [Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
From: Ghislain Fourny, 2012-05-08
-
[Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
From: Zorba Build Bot, 2012-05-05
-
Re: [Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
From: Zorba Build Bot, 2012-05-05
-
[Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
From: Zorba Build Bot, 2012-05-05
-
[Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
From: Zorba Build Bot, 2012-05-04
-
[Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
From: Paul J. Lucas, 2012-05-04
-
Re: [Merge] lp:~paul-lucas/zorba/bug-994610 into lp:zorba
From: Paul J. Lucas, 2012-05-04