← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/no_unicode into lp:zorba

 

Paul J. Lucas has proposed merging lp:~zorba-coders/zorba/no_unicode into lp:zorba.

Requested reviews:
  Markos Zaharioudakis (markos-za)
  Matthias Brantner (matthias-brantner)

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/no_unicode/+merge/80644

"No Unicode" is now "No ICU."
-- 
https://code.launchpad.net/~zorba-coders/zorba/no_unicode/+merge/80644
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'CMakeConfiguration.txt'
--- CMakeConfiguration.txt	2011-09-16 19:55:59 +0000
+++ CMakeConfiguration.txt	2011-10-28 02:34:26 +0000
@@ -139,14 +139,14 @@
 SET (ZORBA_DEBUG_STRING ${ZORBA_DEBUG_STRING} CACHE BOOL "debug strings")
 MESSAGE (STATUS "ZORBA_DEBUG_STRING:                   " ${ZORBA_DEBUG_STRING})
 
-SET(ZORBA_NO_UNICODE OFF CACHE BOOL "disable ICU")
-MESSAGE(STATUS "ZORBA_NO_UNICODE:                     " ${ZORBA_NO_UNICODE})
+SET(ZORBA_NO_ICU OFF CACHE BOOL "disable ICU")
+MESSAGE(STATUS "ZORBA_NO_ICU:                         " ${ZORBA_NO_ICU})
 
-IF (ZORBA_NO_UNICODE)
+IF (ZORBA_NO_ICU)
   SET (no_full_text ON)
-ELSE (ZORBA_NO_UNICODE)
+ELSE (ZORBA_NO_ICU)
   SET (no_full_text OFF)
-ENDIF (ZORBA_NO_UNICODE)
+ENDIF (ZORBA_NO_ICU)
 SET (ZORBA_NO_FULL_TEXT ${no_full_text} CACHE BOOL "disable XQuery Full-Text support")
 MESSAGE(STATUS "ZORBA_NO_FULL_TEXT:                   " ${ZORBA_NO_FULL_TEXT})
 

=== modified file 'CMakeLists.txt'
--- CMakeLists.txt	2011-10-09 13:56:39 +0000
+++ CMakeLists.txt	2011-10-28 02:34:26 +0000
@@ -123,10 +123,14 @@
 CHECK_TYPE_SIZE("int64_t" ZORBA_HAVE_INT64_T) 
 
 CHECK_CXX_SOURCE_COMPILES ("#include <type_traits>\nint main() { std::enable_if<true,int> x; }" ZORBA_CXX_ENABLE_IF)
-CHECK_CXX_SOURCE_COMPILES ("int main() { int *p = nullptr; }" ZORBA_CXX_NULLPTR)
-CHECK_CXX_SOURCE_COMPILES ("int main() { static_assert(1,\"\"); }" ZORBA_CXX_STATIC_ASSERT)
+SET(CMAKE_EXTRA_INCLUDE_FILES wchar.h)
+CHECK_TYPE_SIZE("wchar_t" ZORBA_SIZEOF_WCHAR_T)
+SET(CMAKE_EXTRA_INCLUDE_FILES)
 CHECK_CXX_SOURCE_COMPILES ("#include <memory>\nint main() { std::unique_ptr<int> p; }" ZORBA_CXX_UNIQUE_PTR)
 
+CHECK_CXX_SOURCE_COMPILES("int main() { int *p = nullptr; }" ZORBA_CXX_NULLPTR)
+CHECK_CXX_SOURCE_COMPILES("int main() { static_assert(1,\"\"); }" ZORBA_CXX_STATIC_ASSERT)
+
 ################################################################################
 # Various cmake macros
 

=== modified file 'ChangeLog'
--- ChangeLog	2011-10-20 23:05:55 +0000
+++ ChangeLog	2011-10-28 02:34:26 +0000
@@ -54,6 +54,7 @@
   * Fixed bug #872796  (validate-in-place can interfere with other update primitives)
   * Fixed bug #872799 (validate-in-place can set incorrect types)
   * Fixed bug #855715 (Invalid escaped characters in regex not caught)
+  * Fixed bug #868325 (fn:analyze-string fails with some recursive subgroups)
 
 version 2.0.1
 

=== modified file 'KNOWN_ISSUES.txt'
--- KNOWN_ISSUES.txt	2011-10-07 08:28:43 +0000
+++ KNOWN_ISSUES.txt	2011-10-28 02:34:26 +0000
@@ -37,7 +37,7 @@
 * The serializer currently doesn't implement character maps as specified
   (http://www.w3.org/TR/xslt-xquery-serialization/#character-maps)
 
-* In the 2.0 release, setting the CMake variables ZORBA_NO_UNICODE to
+* In the 2.0 release, setting the CMake variables ZORBA_NO_ICU to
   ON is not supported.
 
 * The PHP language binding is not supported on Mac OS X. For details,

=== modified file 'doc/cxx/examples/context.cpp'
--- doc/cxx/examples/context.cpp	2011-07-22 08:12:31 +0000
+++ doc/cxx/examples/context.cpp	2011-10-28 02:34:26 +0000
@@ -149,7 +149,11 @@
     outStream2 << lQuery << std::endl;
     std::cout << outStream2.str() << std::endl;
 
+#ifndef ZORBA_NO_ICU
     if (outStream2.str() != "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\nBook 1.1\n")
+#else
+    if (outStream2.str() != "<?xml version=\"1.0\"?>\nBook 1.1\n")
+#endif /* ZORBA_NO_ICU */
     {
       std::cerr << "Test 4 failed with a wrong result : " << std::endl
                 << outStream2.str() << std::endl;

=== modified file 'include/zorba/config.h.cmake'
--- include/zorba/config.h.cmake	2011-08-22 15:14:14 +0000
+++ include/zorba/config.h.cmake	2011-10-28 02:34:26 +0000
@@ -93,6 +93,8 @@
 typedef __int64 int64_t;
 #endif /* ZORBA_HAVE_INT64_T */
 
+#cmakedefine ZORBA_SIZEOF_WCHAR_T @ZORBA_SIZEOF_WCHAR_T@
+
 // Compiler
 #cmakedefine CLANG
 #cmakedefine MSVC
@@ -142,7 +144,7 @@
 
 // Zorba features
 #cmakedefine ZORBA_NO_FULL_TEXT
-#cmakedefine ZORBA_NO_UNICODE
+#cmakedefine ZORBA_NO_ICU
 #cmakedefine ZORBA_NO_XMLSCHEMA
 #cmakedefine ZORBA_NUMERIC_OPTIMIZATION
 #cmakedefine ZORBA_VERIFY_PEER_SSL_CERTIFICATE

=== modified file 'src/api/collectionimpl.cpp'
--- src/api/collectionimpl.cpp	2011-09-15 13:11:51 +0000
+++ src/api/collectionimpl.cpp	2011-10-28 02:34:26 +0000
@@ -45,6 +45,7 @@
 
 #include "context/static_context.h"
 
+#include "types/typeimpl.h"
 #include "types/typeops.h"
 
 #include "compiler/xqddf/collection_decl.h"

=== modified file 'src/api/serialization/serializer.cpp'
--- src/api/serialization/serializer.cpp	2011-06-20 15:38:42 +0000
+++ src/api/serialization/serializer.cpp	2011-10-28 02:34:26 +0000
@@ -180,7 +180,6 @@
   for (; chars < chars_end; chars++ )
   {
 
-#ifndef ZORBA_NO_UNICODE
     // the input string is UTF-8
     int char_length = utf8::char_length(*chars);
     if (char_length == 0)
@@ -217,7 +216,6 @@
 
       continue;
     }
-#endif//ZORBA_NO_UNICODE
 
     // raise an error iff (1) the serialization format is XML 1.0 and (2) the given character is an invalid XML 1.0 character
     if (ser && ser->method == PARAMETER_VALUE_XML &&
@@ -332,14 +330,12 @@
     {
       tr << (char)0xEF << (char)0xBB << (char)0xBF;
     }
-#ifndef ZORBA_NO_UNICODE
     else if (ser->encoding == PARAMETER_VALUE_UTF_16)
     {
       // Little-endian
       tr.verbatim((char)0xFF);
       tr.verbatim((char)0xFE);
     }
-#endif
   }
 }
 
@@ -792,13 +788,17 @@
   emitter::emit_declaration();
 
   if (ser->omit_xml_declaration == PARAMETER_VALUE_NO) {
-    tr << "<?xml version=\"" << ser->version << "\" encoding=\"";
-    if (ser->encoding == PARAMETER_VALUE_UTF_8) {
-      tr << "UTF-8";
-#ifndef ZORBA_NO_UNICODE
-    } else if (ser->encoding == PARAMETER_VALUE_UTF_16) {
-      tr << "UTF-16";
-#endif
+    tr << "<?xml version=\"" << ser->version;
+    switch (ser->encoding) {
+      case PARAMETER_VALUE_UTF_8:
+      case PARAMETER_VALUE_UTF_16:
+        tr << "\" encoding=\"";
+        switch (ser->encoding) {
+          case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
+          case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
+          default                    : ZORBA_ASSERT(false);
+        }
+        break;
     }
     tr << "\"";
 
@@ -1104,14 +1104,18 @@
       }
 
       tr << "<meta http-equiv=\"content-type\" content=\""
-         << ser->media_type << "; charset=";
-
-      if (ser->encoding == PARAMETER_VALUE_UTF_8)
-        tr << "UTF-8";
-#ifndef ZORBA_NO_UNICODE
-      else if (ser->encoding == PARAMETER_VALUE_UTF_16)
-        tr << "UTF-16";
-#endif
+         << ser->media_type;
+      switch (ser->encoding) {
+        case PARAMETER_VALUE_UTF_8:
+        case PARAMETER_VALUE_UTF_16:
+          tr << "\" charset=\"";
+          switch (ser->encoding) {
+            case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
+            case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
+            default                    : ZORBA_ASSERT(false);
+          }
+          break;
+      }
       tr << "\"";
       // closed_parent_tag = 1;
     }
@@ -1301,14 +1305,18 @@
         }
 
         tr << "<meta http-equiv=\"content-type\" content=\""
-           << ser->media_type << "; charset=";
-
-        if (ser->encoding == PARAMETER_VALUE_UTF_8)
-          tr << "UTF-8";
-#ifndef ZORBA_NO_UNICODE
-        else if (ser->encoding == PARAMETER_VALUE_UTF_16)
-          tr << "UTF-16";
-#endif
+           << ser->media_type;
+        switch (ser->encoding) {
+          case PARAMETER_VALUE_UTF_8:
+          case PARAMETER_VALUE_UTF_16:
+            tr << "\" charset=\"";
+            switch (ser->encoding) {
+              case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
+              case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
+              default                    : ZORBA_ASSERT(false);
+            }
+            break;
+        }
         tr << "\"/";
         //closed_parent_tag = 1;
       }
@@ -1999,10 +2007,8 @@
   {
     if (!strcmp(aValue, "UTF-8"))
       encoding = PARAMETER_VALUE_UTF_8;
-#ifndef ZORBA_NO_UNICODE
     else if (!strcmp(aValue, "UTF-16"))
       encoding = PARAMETER_VALUE_UTF_16;
-#endif
     else
       throw XQUERY_EXCEPTION(
         err::SEPM0016, ERROR_PARAMS( aValue, aName, ZED( GoodValuesAreUTF8 ) )
@@ -2103,16 +2109,13 @@
   {
     tr = new transcoder(os, false);
   }
-#ifndef ZORBA_NO_UNICODE
   else if (encoding == PARAMETER_VALUE_UTF_16)
   {
     tr = new transcoder(os, true);
   }
-#endif
   else
   {
-    ZORBA_ASSERT(0);
-    return false;
+    ZORBA_ASSERT(false);
   }
 
   if (method == PARAMETER_VALUE_XML)

=== modified file 'src/api/serialization/serializer.h'
--- src/api/serialization/serializer.h	2011-06-14 17:26:33 +0000
+++ src/api/serialization/serializer.h	2011-10-28 02:34:26 +0000
@@ -70,10 +70,8 @@
     PARAMETER_VALUE_TEXT,
     PARAMETER_VALUE_BINARY,
 
-    PARAMETER_VALUE_UTF_8
-#ifndef ZORBA_NO_UNICODE
-    ,PARAMETER_VALUE_UTF_16
-#endif
+    PARAMETER_VALUE_UTF_8,
+    PARAMETER_VALUE_UTF_16
   } PARAMETER_VALUE_TYPE;
 
 protected:

=== modified file 'src/diagnostics/diagnostic_en.xml'
--- src/diagnostics/diagnostic_en.xml	2011-10-26 21:32:57 +0000
+++ src/diagnostics/diagnostic_en.xml	2011-10-28 02:34:26 +0000
@@ -3025,85 +3025,167 @@
       <value>item type is not a subtype of "$3"</value>
     </entry>
 
-    <entry key="U_REGEX_BAD_ESCAPE_SEQUENCE" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_BAD_ESCAPE_SEQUENCE" if="!defined(ZORBA_NO_ICU)">
       <value>unrecognized backslash escape sequence</value>
     </entry>
 
-    <entry key="U_REGEX_BAD_INTERVAL" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_BAD_INTERVAL" if="!defined(ZORBA_NO_ICU)">
       <value>error in {min,max} interval</value>
     </entry>
 
-    <entry key="U_REGEX_INTERNAL_ERROR" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_INTERNAL_ERROR" if="!defined(ZORBA_NO_ICU)">
       <value>an internal ICU error (bug) was detected</value>
     </entry>
 
-    <entry key="U_REGEX_INVALID_BACK_REF" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_INVALID_BACK_REF" if="!defined(ZORBA_NO_ICU)">
       <value>backreference to a non-existent capture group</value>
     </entry>
 
-    <entry key="U_REGEX_INVALID_FLAG" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_INVALID_FLAG" if="!defined(ZORBA_NO_ICU)">
       <value>invalid value for match mode flags</value>
     </entry>
 
-    <entry key="U_REGEX_INVALID_RANGE" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_INVALID_RANGE" if="!defined(ZORBA_NO_ICU)">
       <value>in character range [x-y], x is greater than y</value>
     </entry>
 
-    <entry key="U_REGEX_INVALID_STATE" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_INVALID_STATE" if="!defined(ZORBA_NO_ICU)">
       <value>RegexMatcher in invalid state for requested operation</value>
     </entry>
 
-    <entry key="U_REGEX_LOOK_BEHIND_LIMIT" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_LOOK_BEHIND_LIMIT" if="!defined(ZORBA_NO_ICU)">
       <value>look-behind pattern matches must have a bounded maximum length</value>
     </entry>
 
-    <entry key="U_REGEX_MAX_LT_MIN" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_MAX_LT_MIN" if="!defined(ZORBA_NO_ICU)">
       <value>in {min,max}, max is less than min</value>
     </entry>
 
-    <entry key="U_REGEX_MISMATCHED_PAREN" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_MISMATCHED_PAREN" if="!defined(ZORBA_NO_ICU)">
       <value>incorrectly nested parentheses</value>
     </entry>
 
-    <entry key="U_REGEX_MISSING_CLOSE_BRACKET" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_MISSING_CLOSE_BRACKET" if="!defined(ZORBA_NO_ICU)">
       <value>missing ']'</value>
     </entry>
 
-    <entry key="U_REGEX_NUMBER_TOO_BIG" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_NUMBER_TOO_BIG" if="!defined(ZORBA_NO_ICU)">
       <value>decimal number is too large</value>
     </entry>
 
-    <entry key="U_REGEX_OCTAL_TOO_BIG" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_OCTAL_TOO_BIG" if="!defined(ZORBA_NO_ICU)">
       <value>octal character constants must be &lt;= 0377</value>
     </entry>
 
-    <entry key="U_REGEX_PROPERTY_SYNTAX" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_PROPERTY_SYNTAX" if="!defined(ZORBA_NO_ICU)">
       <value>incorrect Unicode property</value>
     </entry>
 
-    <entry key="U_REGEX_RULE_SYNTAX" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_RULE_SYNTAX" if="!defined(ZORBA_NO_ICU)">
       <value>syntax error</value>
     </entry>
 
-    <entry key="U_REGEX_SET_CONTAINS_STRING" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_SET_CONTAINS_STRING" if="!defined(ZORBA_NO_ICU)">
       <value>can not have UnicodeSets containing strings</value>
     </entry>
 
-    <entry key="U_REGEX_STACK_OVERFLOW" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_STACK_OVERFLOW" if="!defined(ZORBA_NO_ICU)">
       <value>backtrack stack overflow</value>
     </entry>
 
-    <entry key="U_REGEX_STOPPED_BY_CALLER" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_STOPPED_BY_CALLER" if="!defined(ZORBA_NO_ICU)">
       <value>matching operation aborted by user callback fn</value>
     </entry>
 
-    <entry key="U_REGEX_TIME_OUT" if="!defined(ZORBA_NO_UNICODE)">
+    <entry key="U_REGEX_TIME_OUT" if="!defined(ZORBA_NO_ICU)">
       <value>maximum allowed match time exceeded</value>
     </entry>
 
-    <entry key="U_REGEX_UNIMPLEMENTED" if="!defined(ZORBA_NO_UNICODE)">
-      <value>use of regular expression feature that is not yet implemented</value>
-    </entry>
+    <entry key="U_REGEX_UNIMPLEMENTED" if="!defined(ZORBA_NO_ICU)">
+      <value>use of regular expression feature that is not yet implemented</value>
+    </entry>
+
+    <!-- Regex Ascii error messages-->
+    <entry key="REGEX_UNIMPLEMENTED" if="defined(ZORBA_NO_ICU)">
+      <value>use of regular expression feature that is not yet implemented</value>
+    </entry>
+
+    <entry key="REGEX_MISMATCHED_PAREN" if="defined(ZORBA_NO_ICU)">
+      <value>incorrectly nested parentheses</value>
+    </entry>
+
+    <entry key="REGEX_BROKEN_P_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+      <value>broken \\p construct</value>
+    </entry>
+
+    <entry key="REGEX_UNKNOWN_PL_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+      <value>unknown \\p{L?} category; supported categories: L, Lu, Ll, Lt, Lm, Lo</value>
+    </entry>
+
+    <entry key="REGEX_UNKNOWN_PM_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+      <value>unknown \\p{M?} category; supported categories: M, Mn, Mc, Me</value>
+    </entry>
+
+    <entry key="REGEX_UNKNOWN_PN_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+      <value>unknown \\p{N?} category; supported categories: N, Nd, Nl, No</value>
+    </entry>
+
+    <entry key="REGEX_UNKNOWN_PP_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+      <value>unknown \\p{P?} category; supported categories: P, Pc, Pd, Ps, Pe, Pi, Pf, Po</value>
+    </entry>
+
+    <entry key="REGEX_UNKNOWN_PZ_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+      <value>unknown \\p{Z?} category; supported categories: Z, Zs, Zl, Zp</value>
+    </entry>
+
+    <entry key="REGEX_UNKNOWN_PS_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+      <value>unknown \\p{S?} category; supported categories: S, Sm, Sc, Sk, So</value>
+    </entry>
+
+    <entry key="REGEX_UNKNOWN_PC_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+      <value>unknown \\p{C?} category; supported categories: C, Cc, Cf, Co, Cn(for not assigned)</value>
+    </entry>
+
+    <entry key="REGEX_BROKEN_PIs_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+      <value>broken \\p{Is} construct; valid characters are [a-zA-Z0-9-]</value>
+    </entry>
+
+    <entry key="REGEX_UNKNOWN_PIs_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+      <value>unknown \\p{Is} category block; see supported block escapes here: http://www.w3.org/TR/xmlschema-2/#charcter-classes</value>
+    </entry>
+
+    <entry key="REGEX_UNKNOWN_ESC_CHAR" if="defined(ZORBA_NO_ICU)">
+      <value>unknown \\? escape char; supported escapes are: \\[nrt\\|.?*+(){}[]-^$] for char escapes, \\[pP] for categories and \\[sSiIcCdDwW] for multichar groups</value>
+    </entry>
+
+    <entry key="REGEX_INVALID_BACK_REF" if="defined(ZORBA_NO_ICU)">
+      <value>\\$3 backreference to a non-existent capture group ($4 groups so far)</value>
+    </entry>
+
+    <entry key="REGEX_INVALID_ATOM_CHAR" if="defined(ZORBA_NO_ICU)">
+      <value>$3 - invalid character for at atom; forbidden characters are: [{}?*+|^]</value>
+    </entry>
+
+    <entry key="REGEX_INVALID_SUBCLASS" if="defined(ZORBA_NO_ICU)">
+      <value>malformed class subtraction</value>
+    </entry>
+
+    <entry key="REGEX_INVALID_USE_OF_SUBCLASS" if="defined(ZORBA_NO_ICU)">
+      <value>improper use of class subtraction: it must be the last construct in a class group [xxx-[yyy]]</value>
+    </entry>
+
+    <entry key="REGEX_MULTICHAR_IN_CHAR_RANGE" if="defined(ZORBA_NO_ICU)">
+      <value>multichars or char categories cannot be part of a char range</value>
+    </entry>
+
+    <entry key="REGEX_MISSING_CLOSE_BRACKET" if="defined(ZORBA_NO_ICU)">
+      <value>missing close bracket in char group</value>
+    </entry>
+
+    <entry key="REGEX_MAX_LT_MIN" if="defined(ZORBA_NO_ICU)">
+      <value>in {min,max}, max is less than min</value>
+    </entry>
+
 
     <entry key="UnaryArithOp">
       <value>unary arithmetic operator</value>

=== modified file 'src/diagnostics/dict_XX_cpp.xq'
--- src/diagnostics/dict_XX_cpp.xq	2011-08-05 02:21:55 +0000
+++ src/diagnostics/dict_XX_cpp.xq	2011-10-28 02:34:26 +0000
@@ -64,6 +64,7 @@
 return string-join(
   ( util:copyright(), 
     '#include "stdafx.h"',
+    '#include "zorba/config.h"',
     '#include "diagnostics/dict_impl.h"',
     '',
     'namespace zorba {',

=== modified file 'src/diagnostics/pregenerated/dict_en.cpp'
--- src/diagnostics/pregenerated/dict_en.cpp	2011-10-26 21:32:57 +0000
+++ src/diagnostics/pregenerated/dict_en.cpp	2011-10-28 02:34:26 +0000
@@ -20,6 +20,7 @@
   */
  
 #include "stdafx.h"
+#include "zorba/config.h"
 #include "diagnostics/dict_impl.h"
 
 namespace zorba {
@@ -560,6 +561,66 @@
   { "~ParserNoCreateTree", "XML tree creation failed" },
   { "~PromotionImpossible", "promotion not possible" },
   { "~QuotedColon_23", "\"$2\": $3" },
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_BROKEN_PIs_CONSTRUCT", "broken \\p{Is} construct; valid characters are [a-zA-Z0-9-]" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_BROKEN_P_CONSTRUCT", "broken \\p construct" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_INVALID_ATOM_CHAR", "$3 - invalid character for at atom; forbidden characters are: [{}?*+|^]" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_INVALID_BACK_REF", "\\$3 backreference to a non-existent capture group ($4 groups so far)" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_INVALID_SUBCLASS", "malformed class subtraction" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_INVALID_USE_OF_SUBCLASS", "improper use of class subtraction: it must be the last construct in a class group [xxx-[yyy]]" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_MISSING_CLOSE_BRACKET", "missing close bracket in char group" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_MULTICHAR_IN_CHAR_RANGE", "multichars or char categories cannot be part of a char range" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_UNKNOWN_ESC_CHAR", "unknown \\? escape char; supported escapes are: \\[nrt\\|.?*+(){}[]-^$] for char escapes, \\[pP] for categories and \\[sSiIcCdDwW] for multichar groups" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_UNKNOWN_PC_CONSTRUCT", "unknown \\p{C?} category; supported categories: C, Cc, Cf, Co, Cn(for not assigned)" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_UNKNOWN_PIs_CONSTRUCT", "unknown \\p{Is} category block; see supported block escapes here: http://www.w3.org/TR/xmlschema-2/#charcter-classes"; },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_UNKNOWN_PL_CONSTRUCT", "unknown \\p{L?} category; supported categories: L, Lu, Ll, Lt, Lm, Lo" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_UNKNOWN_PM_CONSTRUCT", "unknown \\p{M?} category; supported categories: M, Mn, Mc, Me" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_UNKNOWN_PN_CONSTRUCT", "unknown \\p{N?} category; supported categories: N, Nd, Nl, No" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_UNKNOWN_PP_CONSTRUCT", "unknown \\p{P?} category; supported categories: P, Pc, Pd, Ps, Pe, Pi, Pf, Po" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_UNKNOWN_PS_CONSTRUCT", "unknown \\p{S?} category; supported categories: S, Sm, Sc, Sk, So" },
+#endif
+#if defined(ZORBA_NO_ICU)
+  { "~REGEX_UNKNOWN_PZ_CONSTRUCT", "unknown \\p{Z?} category; supported categories: Z, Zs, Zl, Zp" },
+#endif
   { "~SEPM0009_Not10", "the version parameter has a value other than \"1.0\" and the doctype-system parameter is specified" },
   { "~SEPM0009_NotOmit", "the standalone attribute has a value other than \"omit\"" },
   { "~SchemaAttributeName", "schema-attribute name" },
@@ -583,64 +644,64 @@
   { "~TwoDecimalFormatsSameName_2", "\"$2\": two decimal formats with this name" },
   { "~TwoDefaultDecimalFormats", "two default decimal formats" },
   { "~TypeIsNotSubtype", "item type is not a subtype of \"$3\"" },
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_BAD_ESCAPE_SEQUENCE", "unrecognized backslash escape sequence" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_BAD_INTERVAL", "error in {min,max} interval" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_INTERNAL_ERROR", "an internal ICU error (bug) was detected" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_INVALID_BACK_REF", "backreference to a non-existent capture group" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_INVALID_FLAG", "invalid value for match mode flags" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_INVALID_RANGE", "in character range [x-y], x is greater than y" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_INVALID_STATE", "RegexMatcher in invalid state for requested operation" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_LOOK_BEHIND_LIMIT", "look-behind pattern matches must have a bounded maximum length" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_MISSING_CLOSE_BRACKET", "missing ']'" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_NUMBER_TOO_BIG", "decimal number is too large" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_OCTAL_TOO_BIG", "octal character constants must be <= 0377" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_PROPERTY_SYNTAX", "incorrect Unicode property" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_RULE_SYNTAX", "syntax error" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_SET_CONTAINS_STRING", "can not have UnicodeSets containing strings" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_STACK_OVERFLOW", "backtrack stack overflow" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_STOPPED_BY_CALLER", "matching operation aborted by user callback fn" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_TIME_OUT", "maximum allowed match time exceeded" },
 #endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
   { "~U_REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" },
 #endif
   { "~UnaryArithOp", "unary arithmetic operator" },

=== modified file 'src/precompiled/stdafx.h'
--- src/precompiled/stdafx.h	2011-09-02 19:58:59 +0000
+++ src/precompiled/stdafx.h	2011-10-28 02:34:26 +0000
@@ -333,7 +333,6 @@
  //#include "zorbatypes/floatimpl.h"
  #include "zorbatypes/ft_token.h"
  //#include "zorbatypes/integer.h"
- #include "zorbatypes/libicu.h"
  #include "zorbatypes/m_apm.h"
  //#include "zorbatypes/rchandle.h"
  #include "zorbatypes/rclock.h"

=== modified file 'src/runtime/full_text/CMakeLists.txt'
--- src/runtime/full_text/CMakeLists.txt	2011-08-31 13:17:59 +0000
+++ src/runtime/full_text/CMakeLists.txt	2011-10-28 02:34:26 +0000
@@ -42,11 +42,11 @@
     default_tokenizer.cpp
     )
 
-IF (ZORBA_NO_UNICODE)
+IF (ZORBA_NO_ICU)
   LIST(APPEND FULLTEXT_SRCS latin_tokenizer.cpp)
-ELSE (ZORBA_NO_UNICODE)
+ELSE (ZORBA_NO_ICU)
   LIST(APPEND FULLTEXT_SRCS icu_tokenizer.cpp)
-ENDIF (ZORBA_NO_UNICODE)
+ENDIF (ZORBA_NO_ICU)
 
 ADD_SRC_SUBFOLDER(FULLTEXT_SRCS stemmer LIBSTEMMER_SRCS)
 

=== modified file 'src/runtime/full_text/default_tokenizer.cpp'
--- src/runtime/full_text/default_tokenizer.cpp	2011-08-31 02:53:07 +0000
+++ src/runtime/full_text/default_tokenizer.cpp	2011-10-28 02:34:26 +0000
@@ -19,22 +19,22 @@
 #include <zorba/config.h>
 
 #include "default_tokenizer.h"
-#ifdef ZORBA_NO_UNICODE
+#ifdef ZORBA_NO_ICU
 # include "latin_tokenizer.h"
 #else
 # include "icu_tokenizer.h"
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 
 namespace zorba {
 
 ///////////////////////////////////////////////////////////////////////////////
 
 TokenizerProvider const& default_tokenizer_provider() {
-#ifdef ZORBA_NO_UNICODE
+#ifdef ZORBA_NO_ICU
   static LatinTokenizerProvider const instance;
 #else
   static ICU_TokenizerProvider const instance;
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
   return instance;
 };
 

=== modified file 'src/runtime/numerics/format_integer_impl.cpp'
--- src/runtime/numerics/format_integer_impl.cpp	2011-07-07 12:47:14 +0000
+++ src/runtime/numerics/format_integer_impl.cpp	2011-10-28 02:34:26 +0000
@@ -881,7 +881,7 @@
             utf8_result += (*valueit);
           }
           else
-            utf8_result += (0x2080 + *valueit - '0');
+            utf8_result += (unicode::code_point)(0x2080 + *valueit - '0');
         }
       }
       else if((c0 == 0x2460) || //CIRCLED DIGIT ONE  (1-20)

=== modified file 'src/runtime/numerics/numerics_impl.cpp'
--- src/runtime/numerics/numerics_impl.cpp	2011-07-10 14:55:46 +0000
+++ src/runtime/numerics/numerics_impl.cpp	2011-10-28 02:34:26 +0000
@@ -490,7 +490,7 @@
     minus( "-" )
   {
     utf8_string<zstring> u_per_mille( per_mille );
-    u_per_mille = 0x2030;
+    u_per_mille = (unicode::code_point)0x2030;
   }
 
   void readFormat(const DecimalFormat_t& df_t)

=== modified file 'src/runtime/strings/strings_impl.cpp'
--- src/runtime/strings/strings_impl.cpp	2011-08-10 18:58:11 +0000
+++ src/runtime/strings/strings_impl.cpp	2011-10-28 02:34:26 +0000
@@ -607,7 +607,6 @@
   zstring normForm;
   zstring resStr;
   unicode::normalization::type normType;
-  bool success;
 
   PlanIteratorState* state;
   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
@@ -657,10 +656,9 @@
     }
 
     item0->getStringValue2(resStr);
-#ifndef ZORBA_NO_UNICODE
-    success = utf8::normalize(resStr, normType, &resStr);
-    ZORBA_ASSERT(success);
-#endif//#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
+    ZORBA_ASSERT( utf8::normalize( resStr, normType, &resStr ) );
+#endif /* ZORBA_NO_ICU */
     STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state );
   }
   else
@@ -1688,7 +1686,7 @@
     GENV_ITEMFACTORY->createString(strid_item, zstrid);
     store::Item_t id_attrib_item;
     GENV_ITEMFACTORY->createAttributeNode(id_attrib_item, group_elem.getp(), nr_attrib_name, untyped_type_name, strid_item);
-    if(match_startg < 0)
+    if((match_startg < 0) || (match_startg < match_endgood))
       continue;
     match_endgood = match_endg;
     if((i+1)<nr_pattern_groups)

=== modified file 'src/system/globalenv.cpp'
--- src/system/globalenv.cpp	2011-06-26 09:43:12 +0000
+++ src/system/globalenv.cpp	2011-10-28 02:34:26 +0000
@@ -17,11 +17,11 @@
 
 #include "common/common.h"
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
 # include <unicode/uclean.h>
 # include <unicode/utypes.h>
 # include <unicode/udata.h>
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 
 #ifdef ZORBA_WITH_BIG_INTEGER
 # include "zorbatypes/m_apm.h"
@@ -171,7 +171,7 @@
   // from one thread only
   // see http://www.icu-project.org/userguide/design.html#Init_and_Termination
   // and http://www.icu-project.org/apiref/icu4c/uclean_8h.html
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
 #  if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE)
   {
     TCHAR    self_path[1024];
@@ -201,13 +201,13 @@
     udata_setCommonData(icu_appdata, &data_err);
     ZORBA_ASSERT(data_err == U_ZERO_ERROR);
   
-      //  u_setDataDirectory(self_path);
+    // u_setDataDirectory(self_path);
   }
 #  endif
   UErrorCode lICUInitStatus = U_ZERO_ERROR;
   u_init(&lICUInitStatus);
   ZORBA_ASSERT(lICUInitStatus == U_ZERO_ERROR);
-#endif//ifndef ZORBA_NO_UNICODE
+#endif /* ZORBA_NO_ICU */
 }
 
 
@@ -219,12 +219,12 @@
   // releases statically initialized memory and prevents
   // valgrind from reporting those problems at the end
   // see http://www.icu-project.org/apiref/icu4c/uclean_8h.html#93f27d0ddc7c196a1da864763f2d8920
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
   u_cleanup();
 # if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE)
   delete[] icu_appdata;
 # endif
-#endif//ifndef ZORBA_NO_UNICODE
+#endif /* ZORBA_NO_ICU */
 }
 
 

=== modified file 'src/util/CMakeLists.txt'
--- src/util/CMakeLists.txt	2011-07-18 14:25:21 +0000
+++ src/util/CMakeLists.txt	2011-10-28 02:34:26 +0000
@@ -38,9 +38,9 @@
   LIST(APPEND UTIL_SRCS mmap_file.cpp)
 ENDIF(ZORBA_WITH_FILE_ACCESS)
 
-IF(ZORBA_NO_UNICODE)
+IF(ZORBA_NO_ICU)
   LIST(APPEND UTIL_SRCS regex_ascii.cpp)
-ENDIF(ZORBA_NO_UNICODE)
+ENDIF(ZORBA_NO_ICU)
 
 HEADER_GROUP_SUBFOLDER(UTIL_SRCS fx)
 HEADER_GROUP_SUBFOLDER(UTIL_SRCS win32)

=== modified file 'src/util/regex.cpp'
--- src/util/regex.cpp	2011-09-24 00:16:36 +0000
+++ src/util/regex.cpp	2011-10-28 02:34:26 +0000
@@ -21,10 +21,10 @@
 #include <vector>
 
 #include <zorba/diagnostic_list.h>
-#include "diagnostics/xquery_exception.h"
 
 #include "diagnostics/assert.h"
 #include "diagnostics/dict.h"
+#include "diagnostics/xquery_exception.h"
 
 #include "ascii_util.h"
 #include "cxx_util.h"
@@ -33,8 +33,7 @@
 #define INVALID_RE_EXCEPTION(...) \
   XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS( __VA_ARGS__ ) )
 
-
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
 # include <unicode/uversion.h>
 U_NAMESPACE_USE
 
@@ -442,11 +441,11 @@
 }
 
 } // namespace unicode
-
-}//namespace zorba
-
-
-#else /* ZORBA_NO_UNICODE */
+} // namespace zorba
+
+///////////////////////////////////////////////////////////////////////////////
+
+#else /* ZORBA_NO_ICU */
 
 #include "zorbatypes/zstring.h"
 
@@ -470,7 +469,7 @@
     case 'i': flags |= REGEX_ASCII_CASE_INSENSITIVE; break;
     case 's': flags |= REGEX_ASCII_DOTALL; break;
     case 'm': flags |= REGEX_ASCII_MULTILINE; break;
-    case 'x': flags |= REGEX_ASCII_COMMENTS; break;
+    case 'x': flags |= REGEX_ASCII_NO_WHITESPACE; break;
     case 'q': flags |= REGEX_ASCII_LITERAL; break;
     default:
       throw XQUERY_EXCEPTION( err::FORX0001, ERROR_PARAMS( *p ) );
@@ -483,6 +482,7 @@
 void regex::compile( char const *pattern, char const *flags)
 {
   parsed_flags = parse_regex_flags(flags);
+  regex_ascii::CRegexAscii_parser regex_parser;
   regex_matcher = regex_parser.parse(pattern, parsed_flags);
   if(!regex_matcher)
     throw INVALID_RE_EXCEPTION(pattern);
@@ -517,6 +517,8 @@
 bool regex::next_token( char const *s, size_type *pos, zstring *token,
                   bool *matched)
 {
+  if(!s[*pos])
+    return false;
   bool  retval;
   int   match_pos;
   int   matched_len;
@@ -528,14 +530,8 @@
       token->assign(s+*pos, match_pos);
     *pos += match_pos + matched_len;
     if(matched)
-      if(match_pos)
-        *matched = true;
-      else
-        *matched = false;
-    if(match_pos)
-      return true;
-    else
-      return false;
+      *matched = true;
+    return true;
   }
   else
   {
@@ -544,7 +540,7 @@
     *pos += strlen(s+*pos);
     if(matched)
       *matched = false;
-    return s[*pos] != 0;
+    return true;
   }
 }
 
@@ -554,13 +550,9 @@
   int   matched_pos;
   int   matched_len;
 
-  bool prev_align = regex_matcher->set_align_begin(true);
-  retval = regex_matcher->match_from(s, parsed_flags, &matched_pos, &matched_len);
-  regex_matcher->set_align_begin(prev_align);
+  retval = regex_matcher->match_anywhere(s, parsed_flags|REGEX_ASCII_WHOLE_MATCH, &matched_pos, &matched_len);
   if(!retval)
     return false;
-  if(matched_len != strlen(s))
-    return false;
   return true;
 }
 
@@ -587,14 +579,19 @@
       //look for dollars
       if(*temprepl == '\\')
       {
-        temprepl++;
-        if(!*temprepl || (*temprepl != '\\') || (*temprepl != '$'))//Invalid replacement string.
-          throw XQUERY_EXCEPTION( err::FORX0004, ERROR_PARAMS( replacement ) );
+        if(!(parsed_flags & REGEX_ASCII_LITERAL))
+        {
+          temprepl++;
+          if(!*temprepl) 
+            temprepl--;
+          else if((*temprepl != '\\') && (*temprepl != '$'))//Invalid replacement string.
+            throw XQUERY_EXCEPTION( err::FORX0004, ERROR_PARAMS( replacement ) );
+        }
         result->append(1, *temprepl);
         temprepl++;
         continue;
       }
-      if(*temprepl == '$')
+      if((*temprepl == '$') && !(parsed_flags & REGEX_ASCII_LITERAL))
       {
         temprepl++;
         index = 0;
@@ -648,7 +645,7 @@
   if(retval)
   {
     m_match_pos += m_pos;
-    m_pos = m_match_pos = m_matched_len;
+    m_pos = m_match_pos + m_matched_len;
   }
   else
   {
@@ -694,7 +691,7 @@
 
 } // namespace unicode
 } // namespace zorba
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 
 ///////////////////////////////////////////////////////////////////////////////
 

=== modified file 'src/util/regex.h'
--- src/util/regex.h	2011-07-18 14:25:21 +0000
+++ src/util/regex.h	2011-10-28 02:34:26 +0000
@@ -17,15 +17,13 @@
 #ifndef ZORBA_REGEX_H
 #define ZORBA_REGEX_H
 
-#ifndef ZORBA_NO_UNICODE
-#include <unicode/regex.h>
-#endif
-
 #include "cxx_util.h"
 #include "unicode_util.h"
 #include "zorbatypes/zstring.h"
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
+
+#include <unicode/regex.h>
 
 namespace zorba {
 
@@ -44,7 +42,6 @@
 
 namespace unicode {
 
-
 /**
  * The %regex class wraps the underlying Unicode regular expression library.
  */
@@ -423,14 +420,13 @@
     return replace_all( in.c_str(), replacement.c_str(), out );
   }
 
-
   /**
    * Set the string to work on, without doing matching yet.
    *
    * @param in The UTF-8 input string.
    * @param len the size in bytes.
    */
-  void set_string( const char* in, size_type len );
+  void set_string( char const *in, size_type len );
 
   /**
    * Find the next match in string set by set_string().
@@ -496,12 +492,15 @@
 } // namespace unicode
 } // namespace zorba
 
-#else ///ZORBA_NO_UNICODE (ascii part:)
+///////////////////////////////////////////////////////////////////////////////
+
+#else /* ZORBA_NO_ICU (ascii part:) */
 
 #include "util/regex_ascii.h"
 #include <string>
 
-namespace zorba{
+namespace zorba {
+
 /**
  * Converts an XQuery regular expression to the form used by the regular
  * expression library Zorba is using (here regex_ascii).
@@ -513,10 +512,10 @@
 void convert_xquery_re( zstring const &xq_re, zstring *lib_re,
                         char const *flags = "" );
 
-namespace unicode{
+namespace unicode {
+
 ////////// classes ////////////////////////////////////////////////////////////
 
-
 /**
  * The %regex class wraps the underlying Unicode regular expression library.
  */
@@ -525,7 +524,7 @@
   /**
    * Constructs a %regex.
    */
-  regex() : regex_matcher( NULL ) { }
+  regex() : regex_matcher( nullptr ) { }
 
   /**
    * Destroys a %regex.
@@ -858,7 +857,6 @@
   int get_match_end( int groupId = 0 );
 
 private:
-  regex_ascii::CRegexAscii_parser regex_parser;
   regex_ascii::CRegexAscii_regex  *regex_matcher;
   uint32_t    parsed_flags;
 
@@ -873,15 +871,13 @@
   regex( regex const& );
   regex& operator=( regex const& );
 };
+
+///////////////////////////////////////////////////////////////////////////////
+
 } // namespace unicode
 } // namespace zorba
 
-#endif /* ZORBA_NO_UNICODE */
-
-
-///////////////////////////////////////////////////////////////////////////////
-
-
+#endif /* ZORBA_NO_ICU */
 #endif /* ZORBA_REGEX_H */
 /*
  * Local variables:

=== modified file 'src/util/regex_ascii.cpp'
--- src/util/regex_ascii.cpp	2011-08-05 02:21:55 +0000
+++ src/util/regex_ascii.cpp	2011-10-28 02:34:26 +0000
@@ -1,4 +1,4 @@
-a/*
+/*
  * Copyright 2006-2008 The FLWOR Foundation.
  * 
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -21,6 +21,8 @@
 #include "regex_ascii.h"
 #include <string.h>
 #include "zorbatypes/chartype.h"
+#include "util/unicode_categories.h"
+#include "util/ascii_util.h"
 
 namespace zorba {
   namespace regex_ascii{
@@ -62,6 +64,34 @@
 + http://www.w3.org/TR/xquery-operators/#regex-syntax (not implemented)
 */
 
+
+static bool compare_i(const char *str1, const char *str2)
+{
+  while(*str1 && *str2)
+  {
+    if(ascii::to_lower(*str1) != ascii::to_lower(*str2))
+      return false;
+    str1++;
+    str2++;
+  }
+  if(*str1 || *str2)
+    return false;
+  return true;
+}
+static bool compare_ni(const char *str1, const char *str2, unsigned int maxlen)
+{
+  while(*str1 && *str2 && maxlen)
+  {
+    if(ascii::to_lower(*str1) != ascii::to_lower(*str2))
+      return false;
+    str1++;
+    str2++;
+    maxlen--;
+  }
+  if(maxlen && (*str1 || *str2))
+    return false;
+  return true;
+}
 ////////////////////////////////////
 ////Regular expression parsing and building of the tree
 ////////////////////////////////////
@@ -69,17 +99,10 @@
 CRegexAscii_regex* CRegexAscii_parser::parse(const char *pattern, unsigned int flags)
 {
   this->flags = flags;
-  bool align_begin = false;
   
-  if(!(flags & REGEX_ASCII_LITERAL) && (pattern[0] == '^'))
-    align_begin = true;
-
   int   regex_len;
-  CRegexAscii_regex*  regex = parse_regexp(pattern + (align_begin?1:0), &regex_len);
+  CRegexAscii_regex*  regex = parse_regexp(pattern, &regex_len);
   
-  if(regex)
-    regex->set_align_begin(align_begin);
-
   return regex;
 }
 
@@ -90,46 +113,52 @@
   *regex_len = 0;
   int   branch_len;
   regex_depth++;
-  CRegexAscii_regex *regex = new CRegexAscii_regex(current_regex);
+  std::auto_ptr<CRegexAscii_regex>  regex(new CRegexAscii_regex(current_regex));
   if(!current_regex)
-    current_regex = regex;
+    current_regex = regex.get();
   if(regex_depth >= 2)
   {
     //mark this as group if it does not start with ?:
     if(pattern[0] != '?' || pattern[1] != ':')
-      current_regex->subregex.push_back(regex);
+      current_regex->subregex.push_back(regex.get());
     else
       *regex_len = 2;
   }
   CRegexAscii_branch  *branch;
+  bool must_read_another_branch = true;
   while(pattern[*regex_len] && (pattern[*regex_len] != ')'))
   {
     branch = parse_branch(pattern+*regex_len, &branch_len);
     if(!branch)
     {
       regex_depth--;
-      delete regex;
       return NULL;
     }
     regex->add_branch(branch);
     *regex_len += branch_len;
+    if(pattern[*regex_len] == '|')
+      (*regex_len)++;
+    else
+      must_read_another_branch = false;
   }
-  if((current_regex == regex) && (pattern[*regex_len] == ')'))
+  if((current_regex == regex.get()) && (pattern[*regex_len] == ')'))
   {
-    throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_MISMATCHED_PAREN)) );
+    throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MISMATCHED_PAREN)) );
   }
   if(pattern[*regex_len])
     (*regex_len)++;
+  if(must_read_another_branch)
+    regex->add_branch(new CRegexAscii_branch(current_regex));//add empty branch
   regex->flags = 0;//finished initialization
   regex_depth--;
-  return regex;
+  return regex.release();
 }
 
 CRegexAscii_branch* CRegexAscii_parser::parse_branch(const char *pattern, int *branch_len)
 {
   int piece_len;
 
-  CRegexAscii_branch    *branch = new CRegexAscii_branch(current_regex);
+  std::auto_ptr<CRegexAscii_branch>    branch(new CRegexAscii_branch(current_regex));
   CRegexAscii_piece     *piece;
   *branch_len = 0;
   while(pattern[*branch_len] && (pattern[*branch_len] != '|') && (pattern[*branch_len] != ')'))
@@ -137,21 +166,25 @@
     piece = parse_piece(pattern+*branch_len, &piece_len);
     if(!piece)
     {
-      delete branch;
       return NULL;
     }
+    if(branch->piece_list.size() && dynamic_cast<CRegexAscii_pinstart*>(piece->atom))
+    {
+      //found ^ that is not at the beginning of branch
+      throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_ATOM_CHAR), '^') );
+    }
     branch->add_piece(piece);
     *branch_len += piece_len;
   }
-  if(pattern[*branch_len] == '|')
-    (*branch_len)++;
-  return branch;
+  //if(pattern[*branch_len] == '|')
+  //  (*branch_len)++;
+  return branch.release();
 }
 
 //piece = atom + quantifier
 CRegexAscii_piece* CRegexAscii_parser::parse_piece(const char *pattern, int *piece_len)
 {
-  CRegexAscii_piece *piece = new CRegexAscii_piece;
+  std::auto_ptr<CRegexAscii_piece>  piece(new CRegexAscii_piece);
   IRegexAtom  *atom;
   *piece_len = 0;
 
@@ -160,16 +193,15 @@
   atom = read_atom(pattern, &atom_len);
   if(!atom)
   {
-    delete piece;
     return NULL;
   }
   piece->set_atom(atom);
   if(!(flags & REGEX_ASCII_LITERAL))
-    read_quantifier(piece, pattern+atom_len, &quantif_len);
+    read_quantifier(piece.get(), pattern+atom_len, &quantif_len);
 
   *piece_len += atom_len + quantif_len;
 
-  return piece;
+  return piece.release();
 }
 
 char CRegexAscii_parser::myishex(char c)
@@ -185,24 +217,123 @@
 
 bool CRegexAscii_parser::myisdigit(char c)
 {
-  return (c >= '0') || (c <= '9');
-}
-
-char CRegexAscii_parser::readChar(const char *pattern, int *char_len, bool *is_multichar)
+  return (c >= '0') && (c <= '9');
+}
+
+bool CRegexAscii_parser::myisletterAZ(char c)
+{
+  return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
+}
+
+static const unicode::code_point specials_extcp[] = {0xFFF0, 0xFFFD, 0};
+
+static CRegexAscii_parser::block_escape_t block_escape[] = 
+{
+{{0x0000, 0x007F}, NULL, "BasicLatin"},
+{{0x0080, 0x00FF}, NULL, "Latin-1Supplement"},
+{{0x0100, 0x017F}, NULL, "LatinExtended-A"},
+{{0x0180, 0x024F}, NULL, "LatinExtended-B"},
+{{0x0250, 0x02AF}, NULL, "IPAExtensions"},
+{{0x02B0, 0x02FF}, NULL, "SpacingModifierLetters"},
+{{0x0300, 0x036F}, NULL, "CombiningDiacriticalMarks"},
+{{0x0370, 0x03FF}, NULL, "Greek"},
+{{0x0400, 0x04FF}, NULL, "Cyrillic"},
+{{0x0530, 0x058F}, NULL, "Armenian"},
+{{0x0590, 0x05FF}, NULL, "Hebrew"},
+{{0x0600, 0x06FF}, NULL, "Arabic"},
+{{0x0700, 0x074F}, NULL, "Syriac"},
+{{0x0780, 0x07BF}, NULL, "Thaana"},
+{{0x0900, 0x097F}, NULL, "Devanagari"},
+{{0x0980, 0x09FF}, NULL, "Bengali"},
+{{0x0A00, 0x0A7F}, NULL, "Gurmukhi"},
+{{0x0A80, 0x0AFF}, NULL, "Gujarati"},
+{{0x0B00, 0x0B7F}, NULL, "Oriya"},
+{{0x0B80, 0x0BFF}, NULL, "Tamil"},
+{{0x0C00, 0x0C7F}, NULL, "Telugu"},
+{{0x0C80, 0x0CFF}, NULL, "Kannada"},
+{{0x0D00, 0x0D7F}, NULL, "Malayalam"},
+{{0x0D80, 0x0DFF}, NULL, "Sinhala"},
+{{0x0E00, 0x0E7F}, NULL, "Thai"},
+{{0x0E80, 0x0EFF}, NULL, "Lao"},
+{{0x0F00, 0x0FFF}, NULL, "Tibetan"},
+{{0x1000, 0x109F}, NULL, "Myanmar"},
+{{0x10A0, 0x10FF}, NULL, "Georgian"},
+{{0x1100, 0x11FF}, NULL, "HangulJamo"},
+{{0x1200, 0x137F}, NULL, "Ethiopic"},
+{{0x13A0, 0x13FF}, NULL, "Cherokee"},
+{{0x1400, 0x167F}, NULL, "UnifiedCanadianAboriginalSyllabics"},
+{{0x1680, 0x169F}, NULL, "Ogham"},
+{{0x16A0, 0x16FF}, NULL, "Runic"},
+{{0x1780, 0x17FF}, NULL, "Khmer"},
+{{0x1800, 0x18AF}, NULL, "Mongolian"},
+{{0x1E00, 0x1EFF}, NULL, "LatinExtendedAdditional"},
+{{0x1F00, 0x1FFF}, NULL, "GreekExtended"},
+{{0x2000, 0x206F}, NULL, "GeneralPunctuation"},
+{{0x2070, 0x209F}, NULL, "SuperscriptsandSubscripts"},
+{{0x20A0, 0x20CF}, NULL, "CurrencySymbols"},
+{{0x20D0, 0x20FF}, NULL, "CombiningMarksforSymbols"},
+{{0x2100, 0x214F}, NULL, "LetterlikeSymbols"},
+{{0x2150, 0x218F}, NULL, "NumberForms"},
+{{0x2190, 0x21FF}, NULL, "Arrows"},
+{{0x2200, 0x22FF}, NULL, "MathematicalOperators"},
+{{0x2300, 0x23FF}, NULL, "MiscellaneousTechnical"},
+{{0x2400, 0x243F}, NULL, "ControlPictures"},
+{{0x2440, 0x245F}, NULL, "OpticalCharacterRecognition"},
+{{0x2460, 0x24FF}, NULL, "EnclosedAlphanumerics"},
+{{0x2500, 0x257F}, NULL, "BoxDrawing"},
+{{0x2580, 0x259F}, NULL, "BlockElements"},
+{{0x25A0, 0x25FF}, NULL, "GeometricShapes"},
+{{0x2600, 0x26FF}, NULL, "MiscellaneousSymbols"},
+{{0x2700, 0x27BF}, NULL, "Dingbats"},
+{{0x2800, 0x28FF}, NULL, "BraillePatterns"},
+{{0x2E80, 0x2EFF}, NULL, "CJKRadicalsSupplement"},
+{{0x2F00, 0x2FDF}, NULL, "KangxiRadicals"},
+{{0x2FF0, 0x2FFF}, NULL, "IdeographicDescriptionCharacters"},
+{{0x3000, 0x303F}, NULL, "CJKSymbolsandPunctuation"},
+{{0x3040, 0x309F}, NULL, "Hiragana"},
+{{0x30A0, 0x30FF}, NULL, "Katakana"},
+{{0x3100, 0x312F}, NULL, "Bopomofo"},
+{{0x3130, 0x318F}, NULL, "HangulCompatibilityJamo"},
+{{0x3190, 0x319F}, NULL, "Kanbun"},
+{{0x31A0, 0x31BF}, NULL, "BopomofoExtended"},
+{{0x3200, 0x32FF}, NULL, "EnclosedCJKLettersandMonths"},
+{{0x3300, 0x33FF}, NULL, "CJKCompatibility"},
+{{0x3400, 0x4DB5}, NULL, "CJKUnifiedIdeographsExtensionA"},
+{{0x4E00, 0x9FFF}, NULL, "CJKUnifiedIdeographs"},
+{{0xA000, 0xA48F}, NULL, "YiSyllables"},
+{{0xA490, 0xA4CF}, NULL, "YiRadicals"},
+{{0xAC00, 0xD7A3}, NULL, "HangulSyllables"},
+{{0xE000, 0xF8FF}, NULL, "PrivateUse"},
+{{0xF900, 0xFAFF}, NULL, "CJKCompatibilityIdeographs"},
+{{0xFB00, 0xFB4F}, NULL, "AlphabeticPresentationForms"},
+{{0xFB50, 0xFDFF}, NULL, "ArabicPresentationForms-A"},
+{{0xFE20, 0xFE2F}, NULL, "CombiningHalfMarks"},
+{{0xFE30, 0xFE4F}, NULL, "CJKCompatibilityForms"},
+{{0xFE50, 0xFE6F}, NULL, "SmallFormVariants"},
+{{0xFE70, 0xFEFE}, NULL, "ArabicPresentationForms-B"},
+{{0xFEFF, 0xFEFF}, specials_extcp, "Specials"},
+{{0xFF00, 0xFFEF}, NULL, "HalfwidthandFullwidthForms"}
+};
+
+char CRegexAscii_parser::readChar(const char *pattern, 
+                                  bool for_atom, 
+                                  int *char_len, CHARGROUP_t *multichar_type)
 {
   char  c = 0;
   *char_len = 0;
-  *is_multichar = false;
+  *multichar_type = CHARGROUP_NO_MULTICHAR;
   switch(pattern[*char_len])
   {
   case '\\':
-  {  (*char_len)++;
+  {
+    (*char_len)++;
     switch(pattern[*char_len])
     {
     case 'n': c = '\n';break;
     case 'r': c = '\r';break;
     case 't': c = '\t';break;
     case '\\':
+    case '/'://+
     case '|':
     case '.':
     case '?':
@@ -216,18 +347,213 @@
     case '['://#x5B
     case ']'://#x5D
     case '^'://#x5E
+    case '$'://+
        c = pattern[*char_len];
        break;
     case 'p'://catEsc
     case 'P'://complEsc
       //ignore the prop for now
-      c = pattern[*char_len];
-      *is_multichar = true;
-      if(pattern[*char_len+1] == '{')
-      {
-        while(pattern[*char_len] != '}')
+      *multichar_type = (CHARGROUP_t)((pattern[*char_len] == 'P') ? 128 : 0);
+      if(pattern[(*char_len)+1] != '{')
+      {
+        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
+      }
+      (*char_len) += 2;
+      switch(pattern[*char_len])
+      {//IsCategory
+      case 'L':
+      {
+        unsigned int temp_int = *multichar_type;
+        temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+        *multichar_type = (CHARGROUP_t)temp_int;
+        switch(pattern[(*char_len)+1])
+        {
+        case '}':
+          c = unicode::UNICODE_Ll + 50;break;
+        case 'u':
+          c = unicode::UNICODE_Lu; (*char_len)++;break;
+        case 'l':
+          c = unicode::UNICODE_Ll; (*char_len)++;break;
+        case 't':
+          c = unicode::UNICODE_Lt; (*char_len)++;break;
+        case 'm':
+          c = unicode::UNICODE_Lm; (*char_len)++;break;
+        case 'o':
+          c = unicode::UNICODE_Lo; (*char_len)++;break;
+        default:
+          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PL_CONSTRUCT)) );
+        }
+      }break;
+      case 'M':
+      {
+        unsigned int temp_int = *multichar_type;
+        temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+        *multichar_type = (CHARGROUP_t)temp_int;
+        switch(pattern[(*char_len)+1])
+        {
+        case '}':
+          c = unicode::UNICODE_Mc + 50;break;
+        case 'n':
+          c = unicode::UNICODE_Mn; (*char_len)++;break;
+        case 'c':
+          c = unicode::UNICODE_Mc; (*char_len)++;break;
+        case 'e':
+          c = unicode::UNICODE_Me; (*char_len)++;break;
+        default:
+          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PM_CONSTRUCT)) );
+        }
+      }break;
+      case 'N':
+      {
+        unsigned int temp_int = *multichar_type;
+        temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+        *multichar_type = (CHARGROUP_t)temp_int;
+        switch(pattern[(*char_len)+1])
+        {
+        case '}':
+          c = unicode::UNICODE_Nd + 50;break;
+        case 'd':
+          c = unicode::UNICODE_Nd; (*char_len)++;break;
+        case 'l':
+          c = unicode::UNICODE_Nl; (*char_len)++;break;
+        case 'o':
+          c = unicode::UNICODE_No; (*char_len)++;break;
+        default:
+          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PN_CONSTRUCT)) );
+        }
+      }break;
+      case 'P':
+      {
+        unsigned int temp_int = *multichar_type;
+        temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+        *multichar_type = (CHARGROUP_t)temp_int;
+        switch(pattern[(*char_len)+1])
+        {
+        case '}':
+          c = unicode::UNICODE_Pc + 50;break;
+        case 'c':
+          c = unicode::UNICODE_Pc; (*char_len)++;break;
+        case 'd':
+          c = unicode::UNICODE_Pd; (*char_len)++;break;
+        case 's':
+          c = unicode::UNICODE_Ps; (*char_len)++;break;
+        case 'e':
+          c = unicode::UNICODE_Pe; (*char_len)++;break;
+        case 'i':
+          c = unicode::UNICODE_Pi; (*char_len)++;break;
+        case 'f':
+          c = unicode::UNICODE_Pf; (*char_len)++;break;
+        case 'o':
+          c = unicode::UNICODE_Po; (*char_len)++;break;
+        default:
+          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PP_CONSTRUCT)) );
+        }
+      }break;
+      case 'Z':
+      {
+        unsigned int temp_int = *multichar_type;
+        temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+        *multichar_type = (CHARGROUP_t)temp_int;
+        switch(pattern[(*char_len)+1])
+        {
+        case '}':
+          c = unicode::UNICODE_Zl + 50;break;
+        case 's':
+          c = unicode::UNICODE_Zs; (*char_len)++;break;
+        case 'l':
+          c = unicode::UNICODE_Zl; (*char_len)++;break;
+        case 'p':
+          c = unicode::UNICODE_Zp; (*char_len)++;break;
+        default:
+          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PZ_CONSTRUCT)) );
+        }
+      }break;
+      case 'S':
+      {
+        unsigned int temp_int = *multichar_type;
+        temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+        *multichar_type = (CHARGROUP_t)temp_int;
+        switch(pattern[(*char_len)+1])
+        {
+        case '}':
+          c = unicode::UNICODE_Sc + 50;break;
+        case 'm':
+          c = unicode::UNICODE_Sm; (*char_len)++;break;
+        case 'c':
+          c = unicode::UNICODE_Sc; (*char_len)++;break;
+        case 'k':
+          c = unicode::UNICODE_Sk; (*char_len)++;break;
+        case 'o':
+          c = unicode::UNICODE_So; (*char_len)++;break;
+        default:
+          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PS_CONSTRUCT)) );
+        }
+      }break;
+      case 'C':
+      {
+        unsigned int temp_int = *multichar_type;
+        temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+        *multichar_type = (CHARGROUP_t)temp_int;
+        switch(pattern[(*char_len)+1])
+        {
+        case '}':
+          c = unicode::UNICODE_Cc + 50;break;
+        case 'c':
+          c = unicode::UNICODE_Cc; (*char_len)++;break;
+        case 'f':
+          c = unicode::UNICODE_Cf; (*char_len)++;break;
+        case 'o':
+          c = unicode::UNICODE_Co; (*char_len)++;break;
+        case 'n':
+          c = unicode::UNICODE_Cn; (*char_len)++;break;
+        default:
+          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PC_CONSTRUCT)) );
+        }
+      }break;
+      case 'I':
+        switch(pattern[(*char_len)+1])
+        {
+        case 's'://IsBlock
+        {
+          unsigned int temp_int = *multichar_type;
+          temp_int |= CHARGROUP_FLAGS_MULTICHAR_Is;
+          *multichar_type = (CHARGROUP_t)temp_int;
           (*char_len)++;
+          zstring block_name;
+          char tempc = pattern[(*char_len)+1];
+          while(tempc && (tempc != '}'))
+          {
+            if(!myisletterAZ(tempc) && !myisdigit(tempc) && (tempc != '-'))
+              throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
+            block_name.append(1, tempc);
+            (*char_len)++;
+            tempc = pattern[(*char_len)+1];
+          }
+          if(!pattern[(*char_len)+1])
+            throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
+          //search for the block name
+          int i;
+          int nr_blocks = sizeof(block_escape)/sizeof(CRegexAscii_parser::block_escape_t);
+          for(i=0;i<nr_blocks;i++)
+          {
+            if(compare_i(block_name.c_str(), block_escape[i].group_name))
+            {
+              c = i;
+              break;
+            }
+          }
+          if(i==nr_blocks)
+            throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PIs_CONSTRUCT)) );
+        }break;
+        default:
+          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
+        }break;
+      default:
+        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
       }
+      if(pattern[(*char_len) + 1] != '}')
+          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
+      (*char_len)++;
       break;
       //multiCharEsc
     case 's':
@@ -240,9 +566,11 @@
     case 'D':
     case 'w':
     case 'W':
-      *is_multichar = true;
+       *multichar_type = CHARGROUP_FLAGS_MULTICHAR_OTHER;
        c = pattern[*char_len];
        break;
+    default:
+      throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_ESC_CHAR)) );
     }
     break;
   }
@@ -262,6 +590,15 @@
   }
 
   (*char_len)++;
+  if((flags & REGEX_ASCII_NO_WHITESPACE) && for_atom &&
+    ((c == ' ') || (c == '\t') || (c == '\r') || (c == '\n')))
+  {
+    //ignore this whitespace
+    int char_len2;
+    char c2 = readChar(pattern + *char_len, for_atom, &char_len2, multichar_type);
+    *char_len += char_len2;
+    return c2;
+  }
   return c;
 }
 
@@ -281,13 +618,13 @@
       (*atom_len)++;
       if(pattern[*atom_len] == '0')
       {
-        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_INVALID_BACK_REF)) );
+        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_BACK_REF), 0, current_regex->subregex.size()) );
       }
       unsigned int backref = pattern[*atom_len] - '0';
       if((backref > current_regex->subregex.size()) ||
         (current_regex->subregex.at(backref-1)->flags != 0))
       {
-        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_INVALID_BACK_REF)) );
+        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_BACK_REF), backref, current_regex->subregex.size()) );
       }
       while(current_regex->subregex.size() >= backref*10)
       {
@@ -303,9 +640,19 @@
             break;
         }
       }
+      (*atom_len)++;
       return new CRegexAscii_backref(current_regex, backref);
     }
   }
+  if((!(flags & REGEX_ASCII_LITERAL)) && (c == '^'))
+  {
+    (*atom_len)++;
+    return new CRegexAscii_pinstart(current_regex);
+  }
+  if((c == '}') || (c == '{') || (c == '?') || (c == '*') || (c == '+') || (c == '|'))
+  {
+    throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_ATOM_CHAR), c) );
+  }
   switch(c)
   {
   case '[':
@@ -350,23 +697,23 @@
   {  
     char  c;
     int   c_len;
-    bool  is_multichar = false;
+    CHARGROUP_t   multichar_type = CHARGROUP_NO_MULTICHAR;
     if(!(flags & REGEX_ASCII_LITERAL))
-      c = readChar(pattern+*atom_len, &c_len, &is_multichar);
+      c = readChar(pattern+*atom_len, true, &c_len, &multichar_type);
     else
     {
       c = pattern[*atom_len];
       c_len = 1;
     }
-    CRegexAscii_chargroup *chargroup = new CRegexAscii_chargroup(current_regex);
-    if(is_multichar)
-      chargroup->addMultiChar(c);
+    std::auto_ptr<CRegexAscii_chargroup> chargroup(new CRegexAscii_chargroup(current_regex));
+    if(multichar_type)
+      chargroup->addMultiChar(c, multichar_type);
     else if(is_end_line)
       chargroup->addEndLine();
     else
-      chargroup->addCharRange(c, c);
+      chargroup->addOneChar(c);
     *atom_len += c_len;
-    return chargroup;
+    return chargroup.release();
   }
   }
 }
@@ -376,19 +723,19 @@
 //charRange     ::=    seRange | XmlCharIncDash
 CRegexAscii_chargroup* CRegexAscii_parser::readchargroup(const char *pattern, int *chargroup_len)
 {
-  CRegexAscii_chargroup *chargroup = NULL;
+  std::auto_ptr<CRegexAscii_chargroup> chargroup;
   *chargroup_len = 0;
   if(pattern[*chargroup_len] == '^')//negative group
   {
     (*chargroup_len)++;
-    chargroup = new CRegexAscii_negchargroup(current_regex);
+    chargroup.reset(new CRegexAscii_negchargroup(current_regex));
   }
   else
-    chargroup = new CRegexAscii_chargroup(current_regex);
+    chargroup.reset(new CRegexAscii_chargroup(current_regex));
   while(pattern[*chargroup_len] && (pattern[*chargroup_len]!=']'))
   {
     char  c1, c2;
-    bool  is_multichar;
+    CHARGROUP_t  multichar_type = CHARGROUP_NO_MULTICHAR;
     int   c1_len;
     c1 = pattern[*chargroup_len];
     c2 = pattern[*chargroup_len+1];
@@ -398,23 +745,26 @@
       CRegexAscii_chargroup *classsub = readchargroup(pattern + *chargroup_len+1 + 1, &classsub_len);
       if(!classsub)
       {
-        delete chargroup;
-        return NULL;
+        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_SUBCLASS)) );
       }
       chargroup->addClassSub(classsub);
       *chargroup_len += 2 + classsub_len + 1;
       if(pattern[*chargroup_len-1] != ']')
       {
-        delete chargroup;
-        return NULL;
+        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_USE_OF_SUBCLASS)) );
       }
-      return chargroup;
+      return chargroup.release();
     }
 
-    c1 = readChar(pattern+*chargroup_len, &c1_len, &is_multichar);
-    if(is_multichar)//first char is multichar
+    c1 = readChar(pattern+*chargroup_len, false, &c1_len, &multichar_type);
+    if(multichar_type)//first char is multichar
     {
-      chargroup->addMultiChar(c1);
+      if((pattern[*chargroup_len+c1_len] == '-') &&///might be a range
+        (pattern[*chargroup_len+c1_len+1] != ']'))
+      {
+        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MULTICHAR_IN_CHAR_RANGE)) );
+      }
+      chargroup->addMultiChar(c1, multichar_type);
       *chargroup_len += c1_len;
       continue;
     }
@@ -422,30 +772,36 @@
     {
       if(pattern[*chargroup_len+c1_len+1] == ']')//no range, just the last char is '-'
       {
-        chargroup->addCharRange(c1, c1);
-        chargroup->addCharRange('-', '-');
+        chargroup->addOneChar(c1);
+        chargroup->addOneChar('-');
         *chargroup_len += c1_len + 1;
         continue;
       }
-      else
+      else if(pattern[*chargroup_len+c1_len+1] != '[')
       {
         //it is a range
         char c3;
         int  c3_len;
-        c3 = readChar(pattern+*chargroup_len+c1_len+1, &c3_len, &is_multichar);
-        if(is_multichar)
-          return NULL;//error
+        c3 = readChar(pattern+*chargroup_len+c1_len+1, false, &c3_len, &multichar_type);
+        if(multichar_type)
+        {
+          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MULTICHAR_IN_CHAR_RANGE)) );
+        }
         chargroup->addCharRange(c1, c3);
         *chargroup_len += c1_len + 1 + c3_len;
         continue;
       }
     }
-    chargroup->addCharRange(c1, c1);
+    chargroup->addOneChar(c1);
     *chargroup_len += c1_len;
   }
   if(pattern[*chargroup_len])
     (*chargroup_len)++;
-  return chargroup;
+  else
+  {
+    throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MISSING_CLOSE_BRACKET)) );
+  }
+  return chargroup.release();
 }
 
 void CRegexAscii_parser::read_quantifier(CRegexAscii_piece *piece,
@@ -496,6 +852,10 @@
         max = max*10 + pattern[*quantif_len] - '0';
         (*quantif_len)++;
       }
+      if(max < min)
+      {
+        throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MAX_LT_MIN)) );
+      }
       piece->set_quantifier_min_max(min, max, true);
     }
     while(pattern[*quantif_len] && (pattern[*quantif_len] != '}'))
@@ -528,6 +888,8 @@
 {
   matched_source = NULL;
   matched_len = 0;
+//  backup_matched_source = NULL;
+//  backup_matched_len = 0;
   flags = 128;//set to 0 after initialization
 }
 
@@ -548,13 +910,6 @@
 */
 }
 
-bool CRegexAscii_regex::set_align_begin(bool align_begin)
-{
-  bool prev_align = this->align_begin;
-  this->align_begin = align_begin;
-  return prev_align;
-}
-
 void CRegexAscii_regex::add_branch(CRegexAscii_branch *branch)
 {
   branch_list.push_back(branch);
@@ -579,18 +934,19 @@
   return subregex.size();
 }
 
-CRegexAscii_branch::CRegexAscii_branch(CRegexAscii_regex* regex) :
-      IRegexMatcher(regex)
+CRegexAscii_branch::CRegexAscii_branch(CRegexAscii_regex* regex) 
+      //:
+      //IRegexMatcher(regex)
 {
 }
 
 CRegexAscii_branch::~CRegexAscii_branch()
 {
-  std::list<CRegexAscii_piece*>::iterator  piece_it;
+  std::list<RegexAscii_pieceinfo>::iterator  piece_it;
 
   for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
   {
-    delete (*piece_it);
+    delete (*piece_it).piece;
   }
 }
 
@@ -601,6 +957,8 @@
 
 CRegexAscii_piece::CRegexAscii_piece()
 {
+  atom = NULL;
+  regex_atom = NULL;
 }
 
 CRegexAscii_piece::~CRegexAscii_piece()
@@ -611,6 +969,7 @@
 void CRegexAscii_piece::set_atom(IRegexAtom *atom)
 {
   this->atom = atom;
+  this->regex_atom = dynamic_cast<CRegexAscii_regex*>(atom);
 }
 
 void CRegexAscii_piece::set_quantifier_min_max(int min, int max, bool strict_max)
@@ -631,6 +990,8 @@
 }
 bool CRegexAscii_piece::get_is_reluctant()
 {
+  if(atom->regex_intern->flags & REGEX_ASCII_MINIMAL_MATCH)
+    return true;
   return is_reluctant;
 }
 
@@ -646,10 +1007,10 @@
   delete classsub;
 }
 
-void CRegexAscii_chargroup::addMultiChar(char c)
+void CRegexAscii_chargroup::addMultiChar(char c, CHARGROUP_t multichar_type)
 {
   chargroup_t cgt;
-  cgt.flags = CHARGROUP_FLAGS_MULTICHAR;
+  cgt.flags = multichar_type;
   cgt.c1 = c;
   cgt.c2 = 0;
   chargroup_list.push_back(cgt);
@@ -667,12 +1028,20 @@
 void CRegexAscii_chargroup::addCharRange(char c1, char c2)
 {
   chargroup_t cgt;
-  cgt.flags = 0;
+  cgt.flags = CHARGROUP_FLAGS_CHAR_RANGE;
   cgt.c1 = c1;
   cgt.c2 = c2;
   chargroup_list.push_back(cgt);
 }
 
+void CRegexAscii_chargroup::addOneChar(char c)
+{
+  chargroup_t cgt;
+  cgt.flags = CHARGROUP_FLAGS_ONECHAR;
+  cgt.c1 = c;
+ chargroup_list.push_back(cgt);
+}
+
 void CRegexAscii_chargroup::addClassSub(CRegexAscii_chargroup* classsub)
 {
   this->classsub = classsub;
@@ -706,6 +1075,11 @@
 {
 }
 
+CRegexAscii_pinstart::CRegexAscii_pinstart(CRegexAscii_regex* regex):
+      IRegexAtom(regex)
+{
+}
+
 CRegexAscii_parser::CRegexAscii_parser()
 {
   current_regex = NULL;
@@ -720,6 +1094,65 @@
 //////////////////////////////////////////
 ////Matching the pattern on a string
 /////////////////////////////////////////
+static std::list<RegexAscii_pieceinfo> empty_pieces;//empty list of pieces
+/*
+std::list<RegexAscii_pieceinfo>::iterator  
+IRegexAtom::choose_next_piece(const char *source, int *matched_len, 
+                              std::list<RegexAscii_pieceinfo>::iterator this_piece,
+                              std::list<RegexAscii_pieceinfo>::iterator end_piece)
+{
+  //if this_piece is repetition, repeat until max, then go to next piece
+  int min, max;
+  bool strict_max;
+  while(this_piece != end_piece)
+  {
+    (*this_piece).piece->get_quantifier(&min, &max, &strict_max);
+    if(max <= ((*this_piece).nr_matches))//finished this piece
+    {
+      this_piece++;
+    }
+    else
+      break;
+  }
+  return this_piece;
+}
+*/
+
+bool IRegexAtom::match(const char *source, int *start_from_branch, int *matched_len,
+                  std::list<RegexAscii_pieceinfo>::iterator this_piece,
+                  std::list<RegexAscii_pieceinfo>::iterator end_piece)
+{
+  *start_from_branch = 0;
+  bool retmatch;
+  retmatch = match_internal(source, start_from_branch, matched_len);
+  if(!retmatch)
+    return false;
+
+  if(this_piece == end_piece)
+    return true;
+
+  (*this_piece).nr_matches++;
+  int min,max;
+  bool strict_max;
+  (*this_piece).piece->get_quantifier(&min, &max, &strict_max);
+  std::list<RegexAscii_pieceinfo>::iterator init_piece = this_piece;
+  if(((min == 1) && (max == 1)) || //the simple common case
+    ((*matched_len == 0) && ((*this_piece).nr_matches>=min)))//to avoid infinite loop
+  {
+    this_piece++;
+    if(this_piece == end_piece)
+      return true;
+  }
+  int matched_len2;
+  retmatch = (*this_piece).piece->match_piece(this_piece, end_piece, source + *matched_len, &matched_len2);
+  if(!retmatch)
+  {
+    (*init_piece).nr_matches--;
+    return false;
+  }
+  *matched_len += matched_len2;
+  return true;
+}
 
 //try every position in source to match the pattern
 bool CRegexAscii_regex::match_anywhere(const char *source, unsigned int flags,
@@ -734,6 +1167,7 @@
                                        int *match_pos, int *matched_len)
 {
   this->flags = flags;
+  this->source_start = source;
   reachedEnd = false;
 
   std::vector<CRegexAscii_regex*>::iterator regex_it;
@@ -741,30 +1175,52 @@
   {
     (*regex_it)->matched_source = NULL;
   }
-//  if(!source[0])
-//  {
-//    if(branch_list.empty())
-//      return true;
-//    else
-//      return false;
-//  }
-
-  bool  skip_first_match = false;
-  if(*match_pos && align_begin)
-    skip_first_match = true;
+
+  std::vector<std::pair<const char*, int> >  saved_subregex;
+
+  if(*match_pos && (flags & REGEX_ASCII_WHOLE_MATCH))
+    return false;
+
   do
   {
-    if(!skip_first_match)
-    {
-      if(match(source + *match_pos, matched_len))
-        return true;
-    }
-    skip_first_match = false;
-    if(align_begin)
+    int   start_from_branch = 0;
+    int   longest_match = -1;
+    while(1)
+    {
+      if(!match(source + *match_pos, &start_from_branch, matched_len, empty_pieces.begin(), empty_pieces.end()))
+        break;
+      if(longest_match < *matched_len)
+      {
+        longest_match = *matched_len;
+        if(start_from_branch && (flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+          save_subregex_list(saved_subregex);
+      }
+      if(!start_from_branch || !(flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+        break;
+      //else try the other branches to see which is longer
+    }
+    if(longest_match != -1)
+    {
+      *matched_len = longest_match;
+      if(saved_subregex.size())
+        load_subregex_list(saved_subregex);
+      if(flags & REGEX_ASCII_WHOLE_MATCH)
+      {
+        if(!source[*match_pos+*matched_len])
+          return true;
+        if((flags & REGEX_ASCII_MULTILINE) && 
+          ((source[*match_pos+*matched_len] == '\n') || (source[*match_pos+*matched_len] == '\r')))
+          return true;
+        return false;
+      }
+      return true;
+    }
+
+    if(flags & REGEX_ASCII_WHOLE_MATCH)
     {
       if(flags & REGEX_ASCII_MULTILINE)
       {
-        //goto the next line
+        //go to next line
         while(source[*match_pos] && (source[*match_pos] != '\n') && (source[*match_pos] != '\r'))
           (*match_pos)++;
         if(source[*match_pos] == '\n')
@@ -780,153 +1236,528 @@
             (*match_pos)++;
         }
         if(!source[*match_pos])
-          return false;
+          break;
         continue;
       }
-      return false;
+      break;
     }
     if(!source[*match_pos])
       break;
     (*match_pos)++;
   }
   while(source[*match_pos]);
+  if(!source[*match_pos])
+  {
+    reachedEnd = true;
+  }
   return false;
 }
 
+void CRegexAscii_regex::reset_match()
+{
+//  this->backup_matched_source = this->matched_source;
+//  this->backup_matched_len = this->matched_len;
+  this->matched_source = NULL;
+  this->matched_len = 0;
+  std::list<CRegexAscii_branch*>::iterator  branch_it;
+  for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
+  {
+    (*branch_it)->reset();
+  }
+}
+/*
+void CRegexAscii_regex::restore_match()
+{
+  this->matched_source = this->backup_matched_source;
+  this->matched_len = this->backup_matched_len;
+  std::list<CRegexAscii_branch*>::iterator  branch_it;
+  for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
+  {
+    (*branch_it)->restore();
+  }
+}
+*/
 //match any of the branches
-bool CRegexAscii_regex::match(const char *source, int *matched_len)
+bool CRegexAscii_regex::match(const char *source, int *start_from_branch, int *matched_len,
+                              std::list<RegexAscii_pieceinfo>::iterator next_piece,
+                              std::list<RegexAscii_pieceinfo>::iterator end_piece)
 {
   reachedEnd = false;
+  if(!(flags & REGEX_ASCII_GROUPING_LEN_WHOLE_PIECE) || 
+    (this->matched_source == NULL) || ((this->matched_source + this->matched_len) != source))
+    this->matched_source = source;
+  *matched_len = 0;
   std::list<CRegexAscii_branch*>::iterator  branch_it;
 
-  for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
-  {
-    if((*branch_it)->match(source, matched_len))
-    {
-      matched_source = source;
-      this->matched_len = *matched_len;
+  if(*start_from_branch == 0)
+  {
+    for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
+    {
+      (*branch_it)->reset();
+    }
+  }
+
+  branch_it = branch_list.begin();
+  if(*start_from_branch)
+  {
+    for(int i=0;i<*start_from_branch;i++)
+      branch_it++;
+  }
+  (*start_from_branch)++;
+  for(; branch_it != branch_list.end(); branch_it++,(*start_from_branch)++)
+  {
+    if((*branch_it)->match(source, matched_len, this, next_piece, end_piece))
+    {
+      //matched_source = source;
+      //this->matched_len = *matched_len;
       return true;
     }
   }
-  matched_source = NULL;
-  matched_len = 0;
+  *start_from_branch = 0;
+  if(this->matched_source == source)
+    this->matched_source = NULL;
+  *matched_len = 0;
   return false;
 }
 
+void CRegexAscii_regex::save_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex)
+{
+  saved_subregex.resize(0);
+  saved_subregex.reserve(subregex.size());
+  std::vector<CRegexAscii_regex*>::iterator   it;
+  for(it=subregex.begin(); it != subregex.end(); it++)
+  {
+    saved_subregex.push_back(std::pair<const char*, int>((*it)->matched_source, (*it)->matched_len));
+  }
+}
+
+void CRegexAscii_regex::load_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex)
+{
+  std::vector<std::pair<const char*, int> >::iterator   it;
+  std::vector<CRegexAscii_regex*>::iterator            subit;
+  for(it=saved_subregex.begin(), subit = subregex.begin(); it != saved_subregex.end(); it++, subit++)
+  {
+    (*subit)->matched_source = (*it).first;
+    (*subit)->matched_len = (*it).second;
+  }
+}
+
+void CRegexAscii_branch::reset()
+{
+  std::list<RegexAscii_pieceinfo>::iterator  piece_it;
+  for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
+  {
+    (*piece_it).piece->atom->reset_match();
+  }
+}
+/*
+void CRegexAscii_branch::restore()
+{
+  std::list<RegexAscii_pieceinfo>::iterator  piece_it;
+  for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
+  {
+    (*piece_it).piece->atom->restore_match();
+  }
+}
+*/
 //match all the pieces
-bool CRegexAscii_branch::match(const char *source, int *matched_len)
+bool CRegexAscii_branch::match(const char *source, int *matched_len,
+                              CRegexAscii_regex* group_regex,
+                              std::list<RegexAscii_pieceinfo>::iterator next_piece,
+                              std::list<RegexAscii_pieceinfo>::iterator end_piece)
 {
-  std::list<CRegexAscii_piece*>::iterator  piece_it;
+  std::list<RegexAscii_pieceinfo>::iterator  piece_it;
 
   piece_it = piece_list.begin(); 
+  //if(piece_it == piece_list.end())
+    //if(!source[0])
+  //    return true;
+    //else
+    //  return false;
   if(piece_it == piece_list.end())
-    if(source[0])
-      return false;
+  {
+    piece_it = next_piece;
+    if(next_piece == end_piece)
+    {
+      group_regex->matched_len = 0;
+      return true;
+    }
+  }
+
+  std::list<RegexAscii_pieceinfo>   temp_pieces(piece_list);
+  temp_pieces.push_back(group_regex);//this will be used to store the group match
+  temp_pieces.insert(temp_pieces.end(), next_piece, end_piece);
+
+  return (*piece_it).piece->match_piece(temp_pieces.begin(), temp_pieces.end(), source, matched_len);
+}
+
+bool CRegexAscii_piece::match_piece(std::list<RegexAscii_pieceinfo>::iterator piece_it,
+                                    std::list<RegexAscii_pieceinfo>::iterator end_it,
+                                    const char *source, int *matched_len)
+{
+  if((*piece_it).nr_matches < 0)
+  {
+    //special case, store the group match
+    (*piece_it).group_regex->matched_len = source - (*piece_it).group_regex->matched_source;
+    piece_it++;
+    if(piece_it == end_it)
+      return true;
     else
-      return true;
-  if(!(*piece_it)->get_is_reluctant())
-    return match_piece_iter_normal(piece_it, source, matched_len);
+      return (*piece_it).piece->match_piece(piece_it, end_it, source, matched_len);
+  }
+
+  if(!get_is_reluctant())
+    return match_piece_iter_normal(piece_it, end_it, source, matched_len);
   else
-    return match_piece_iter_reluctant(piece_it, source, matched_len);
-}
-
-//match as less as possible
-bool CRegexAscii_branch::match_piece_iter_reluctant(
-                                        std::list<CRegexAscii_piece*>::iterator piece_it,
+    return match_piece_iter_reluctant(piece_it, end_it, source, matched_len);
+}
+
+int CRegexAscii_piece::choose_another_branch(std::vector<std::pair<int,int> > &match_lens)
+{
+  int i = match_lens.size()-1;
+  i--;
+  while((i >= 0) && (match_lens.at(i).second == 0))
+    i--;
+  if(i < 0)
+    return -1;//no more branches
+  match_lens.resize(i+1);
+  i++;
+  return i;
+}
+
+bool CRegexAscii_piece::is_regex_atom()
+{
+  return regex_atom != NULL;
+}
+
+//match as less as possible (shortest string)
+bool CRegexAscii_piece::match_piece_iter_reluctant(
+                                        std::list<RegexAscii_pieceinfo>::iterator piece_it,
+                                        std::list<RegexAscii_pieceinfo>::iterator end_it,
                                         const char *source, int *matched_len)
 {
   *matched_len = 0;
-  if(piece_it == piece_list.end())
+  if(piece_it == end_it)
     return true;
 
   int min, max;
   bool  strict_max;
   //std::vector<int>    match_lens;
-  (*piece_it)->get_quantifier(&min, &max, &strict_max);
-  if(strict_max && (max >= 0))
+  (*piece_it).piece->get_quantifier(&min, &max, &strict_max);
+
+  std::vector<std::pair<const char*, int> >  saved_subregex;
+
+  if(is_regex_atom())
   {
-    int   timeslen;
-    //check if the piece doesn't exceed the max match
-    if((*piece_it)->match_piece_times(source, &timeslen, max+1, NULL))
-      return false;///too many matches
+    //recursive
+    bool retmatch;
+    atom->regex_intern->save_subregex_list(saved_subregex);
+    if((*piece_it).nr_matches >= min)
+    {
+      //go to next piece
+      std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
+      next_it++;
+      if(next_it == end_it)
+        return true;
+      retmatch = (*next_it).piece->match_piece(next_it, end_it, source, matched_len);
+      if(retmatch)
+        return true;
+    }
+    if(((max == -1) || ((*piece_it).nr_matches < max)) &&//try further with this piece
+      (((*piece_it).nr_matches < min) || ((*piece_it).nr_matches == 0) || ((*piece_it).piece->regex_atom->matched_len)))//if matched_len is zero, avoid infinite loop
+    {
+      int start_from_branch = 0;
+      int shortest_len = -1;
+      bool branch_saved = false;
+      //try all branches to get the shortest len
+      (*piece_it).nr_matches++;
+      while(atom->match(source, &start_from_branch, matched_len, piece_it, end_it))
+      {
+        if((shortest_len == -1) || (shortest_len > *matched_len))
+        {
+          shortest_len = *matched_len;
+          if(start_from_branch && (atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+          {
+            atom->regex_intern->save_subregex_list(saved_subregex);
+            branch_saved = true;
+          }
+        }
+        if(!start_from_branch || !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+          break;
+      }
+      if(shortest_len != -1)
+      {
+        *matched_len = shortest_len;
+        if(branch_saved)
+          atom->regex_intern->load_subregex_list(saved_subregex);
+        return true;
+      }
+      else
+      {
+        (*piece_it).nr_matches--;
+        atom->regex_intern->load_subregex_list(saved_subregex);
+        return false;
+      }
+    }
+    else
+    {
+      atom->regex_intern->load_subregex_list(saved_subregex);
+      return false;
+    }
   }
 
-  int i=min;
-  std::list<CRegexAscii_piece*>::iterator next_it = piece_it;
+  int i=0;
+  int shortest_len = -1;
+  int otherpieces_shortest = -1;
+  int i_shortest = -1;
+  std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
+  std::vector<std::pair<int,int> >    match_lens;
   next_it++;
   int pieceslen = 0;
   while(1)
   {
-    if((max > 0) && (i>max))
-      break;
-   int piecelen = 0;
-   if((*piece_it)->match_piece_times(source+pieceslen, &piecelen, !pieceslen ? i : 1, NULL))
-   {
-      pieceslen += piecelen;
+    int piecelen = 0;
+    bool retmatch;
+    retmatch = match_piece_times(source, &piecelen, i < min ? min : i, &match_lens);
+    i = match_lens.size()-1;//number of matches
+    if(i<0)
+      i = 0;
+    if((i>=min))
+    {
+      pieceslen = piecelen;
+      if((shortest_len >= 0) && (shortest_len <= pieceslen))//this branch is longer
+      {//try another branch
+        i = choose_another_branch(match_lens);
+        if(i >= 0)
+          continue;//try another branch
+        else
+          break;
+      }
       int   otherpieces = 0;
-      if((next_it == piece_list.end()) ||
-        ((*next_it)->get_is_reluctant() && match_piece_iter_reluctant(next_it, source+pieceslen, &otherpieces)) ||
-        (!(*next_it)->get_is_reluctant() && match_piece_iter_normal(next_it, source+pieceslen, &otherpieces)))
-      {
-        *matched_len = pieceslen + otherpieces;
-        return true;
-      }
+      if((next_it == end_it) ||
+        (*next_it).piece->match_piece(next_it, end_it, source+pieceslen, &otherpieces)
+        )
+      {
+        if((i == pieceslen) || (match_lens.at(0).second == 0) ||//minimum achieved already, cannot go lower than that
+            !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+        {
+          *matched_len = pieceslen + otherpieces;
+          return true;
+        }
+        if((shortest_len < 0) || (shortest_len > pieceslen))
+        {
+          shortest_len = pieceslen;
+          otherpieces_shortest = otherpieces;
+          i_shortest = i;
+          if(match_lens.at(0).second != 0)
+            atom->regex_intern->save_subregex_list(saved_subregex);
+        }
+        i = choose_another_branch(match_lens);
+        if(i >= 0)
+          continue;//try another branch
+        else
+          break;
+      }
+      else
+      {
+        //try further
+        if(retmatch)
+        {
+          i++;
+          if((max < 0) || (i<=max))
+            continue;
+          i--;
+        }
+      }
+    }
+    
+    if(i==0)
+    {
+      break;
     }
     else
-      break;
-    i++;
+    {
+      i = choose_another_branch(match_lens);
+      if(i >= 0)
+        continue;//try another branch
+      else
+        break;
+    }
   }
 
+  if(shortest_len >= 0)
+  {
+    if(strict_max && (max>=0) && (i_shortest > max))
+      return false;
+    *matched_len = shortest_len + otherpieces_shortest;
+    if(saved_subregex.size())
+      atom->regex_intern->load_subregex_list(saved_subregex);
+    return true;
+  }
   return false;
 }
 
 //match as much as possible
-bool CRegexAscii_branch::match_piece_iter_normal(
-                                        std::list<CRegexAscii_piece*>::iterator piece_it,
+bool CRegexAscii_piece::match_piece_iter_normal(
+                                        std::list<RegexAscii_pieceinfo>::iterator piece_it,
+                                        std::list<RegexAscii_pieceinfo>::iterator end_it,
                                         const char *source, int *matched_len)
 {
   *matched_len = 0;
 
   int min, max;
   bool  strict_max;
-  std::vector<int>    match_lens;
-  (*piece_it)->get_quantifier(&min, &max, &strict_max);
-  int   timeslen;
-  if(strict_max && (max >= 0))
+  std::vector<std::pair<int,int> >    match_lens;
+  (*piece_it).piece->get_quantifier(&min, &max, &strict_max);
+  int   timeslen = 0;
+  std::vector<std::pair<const char*, int> >  saved_subregex;
+
+  if(is_regex_atom())
   {
-    //check if the piece doesn't exceed the max match
-    //if((*piece_it)->match_piece_times(source, &timeslen, max+1, &match_lens))
-    //  return false;///too many matches
-    (*piece_it)->match_piece_times(source, &timeslen, max, &match_lens);
+    //recursive
+    bool retmatch;
+    atom->regex_intern->save_subregex_list(saved_subregex);
+    if(((max == -1) || ((*piece_it).nr_matches < max)) && //try further with this piece
+      (((*piece_it).nr_matches < min) || ((*piece_it).nr_matches == 0) || ((*piece_it).piece->regex_atom->matched_len)))//if matched_len is zero, avoid infinite loop
+    {
+      int start_from_branch = 0;
+      int longest_len = -1;
+      bool branch_saved = false;
+      //try all branches to get the longest len
+      (*piece_it).nr_matches++;
+      while(atom->match(source, &start_from_branch, matched_len, piece_it, end_it))
+      {
+        if((longest_len < *matched_len))
+        {
+          longest_len = *matched_len;
+          if(start_from_branch && (atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+          {
+            atom->regex_intern->save_subregex_list(saved_subregex);
+            branch_saved = true;
+          }
+        }
+        if(!start_from_branch || !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+          break;
+      }
+      if(longest_len != -1)
+      {
+        *matched_len = longest_len;
+        if(branch_saved)
+          atom->regex_intern->load_subregex_list(saved_subregex);
+        return true;
+      }
+      else
+      {
+        atom->regex_intern->load_subregex_list(saved_subregex);
+        (*piece_it).nr_matches--;
+      }
+    }
+    if((*piece_it).nr_matches >= min)
+    {
+      //go to next piece
+      std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
+      next_it++;
+      if(next_it == end_it)
+        return true;
+      retmatch = (*next_it).piece->match_piece(next_it, end_it, source, matched_len);
+      if(!retmatch)
+        atom->regex_intern->load_subregex_list(saved_subregex);
+      return retmatch;
+    }
+    else
+    {
+    //  regex_atom->restore_match();
+      atom->regex_intern->load_subregex_list(saved_subregex);
+      return false;
+    }
   }
-  else if(!strict_max && (max >= 0))
-    (*piece_it)->match_piece_times(source, &timeslen, max, &match_lens);
-  else
-    (*piece_it)->match_piece_times(source, &timeslen, -1, &match_lens);
 
-  int i;
-  std::list<CRegexAscii_piece*>::iterator next_it = piece_it;
+  int longest_len = -1;
+  int otherpieces_longest = -1;
+  int i_longest = -1;
+  int i = max;
+  std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
   next_it++;
-  if(next_it == piece_list.end())
+
+  bool retmatch;
+  while(1)
   {
-    if((int)match_lens.size() > min)
-    {
-      *matched_len = timeslen;
-      return true;
+    retmatch = match_piece_times(source, &timeslen, i, &match_lens);
+    i=match_lens.size()-1;//number of matches
+    if((i>=min))
+    {
+      if(timeslen < longest_len)
+      {//this branch is no use
+        i = choose_another_branch(match_lens);
+        if(i >= 0)
+        {
+          i = max;
+          continue;//try another branch
+        }
+        else
+          break;
+      }
+      //int piecelen = 0;
+      int   otherpieces = 0;
+      if((next_it == end_it) ||
+        (*next_it).piece->match_piece(next_it, end_it, source+timeslen, &otherpieces)
+        )
+      {
+        if(timeslen > longest_len)
+        {
+          longest_len = timeslen;
+          otherpieces_longest = otherpieces;
+          i_longest = i;
+          if(!(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+          {
+            *matched_len = longest_len + otherpieces_longest;
+            return true;
+          }
+          else
+          {
+            if(match_lens.at(0).second)
+              atom->regex_intern->save_subregex_list(saved_subregex);
+          }
+        }
+      }
+      else
+      {
+        if(!match_lens.at(0).second)
+        {
+          match_lens.resize(match_lens.size()-1);
+          i--;
+          if(i >= 0)
+            continue;//try smaller 
+          else
+            break;
+        }
+        else
+        {
+          i = choose_another_branch(match_lens);
+          if(i >= 0)
+            continue;//try another branch
+          else
+            break;
+        }
+      }
+    }
+    //now try another branch
+    i = choose_another_branch(match_lens);
+    if(i >= 0)
+    {
+      i = max;
+      continue;//try another branch
     }
     else
-      return false;
-  }
-  for(i=match_lens.size()-1; i>=min; i--)
+      break;
+  }//end while
+
+  if(longest_len >= 0)
   {
-    int piecelen = 0;
-    int   otherpieces = 0;
-    if(((*next_it)->get_is_reluctant() && match_piece_iter_reluctant(next_it, source+match_lens[i]+piecelen, &otherpieces)) ||
-      (!(*next_it)->get_is_reluctant() && match_piece_iter_normal(next_it, source+match_lens[i]+piecelen, &otherpieces)))
-    {
-      *matched_len = match_lens[i] + piecelen + otherpieces;
-      return true;
-    }
+    *matched_len = longest_len + otherpieces_longest;
+    if(saved_subregex.size())
+      atom->regex_intern->load_subregex_list(saved_subregex);
+    return true;
   }
 
   return false;
@@ -935,31 +1766,68 @@
 bool CRegexAscii_piece::match_piece_times(const char *source, 
                                           int *piecelen, 
                                           int times,
-                                          std::vector<int>    *match_lens)
+                                          std::vector<std::pair<int,int> >    *match_lens)
 {
-  *piecelen = 0;
-  for(int i=0;(times < 0) || (i<times);i++)
-  {
+  int i=0;
+  if(match_lens && match_lens->size())
+  {
+    i = match_lens->size()-1;
+  }
+  if(match_lens && match_lens->size())
+    *piecelen = match_lens->at(match_lens->size()-1).first;
+  else
+    *piecelen = 0;
+  if((times >= 0) && (i>=times))
+    return true;
+  for(;(times < 0) || (i<times);i++)
+  {
+    int   atomlen;
+    int   start_from_branch = 0;
+    if(match_lens && (i<(int)match_lens->size()))
+      start_from_branch = match_lens->at(i).second;
+    bool first_branch = (start_from_branch == 0);
+    if(!atom->match(source+*piecelen, &start_from_branch, &atomlen, empty_pieces.begin(), empty_pieces.end()))
+    {
+      if(match_lens)
+      {
+        if(i >= (int)match_lens->size())
+          match_lens->push_back(std::pair<int,int>(*piecelen, 0));
+        else
+          (*match_lens)[i] = std::pair<int,int>(*piecelen, 0);
+      }
+      return false;
+    }
     if(match_lens)
-      match_lens->push_back(*piecelen);
-    int   atomlen;
-    if(!atom->match(source+*piecelen, &atomlen))
-      return false;
+    {
+      if(i >= (int)match_lens->size())
+        match_lens->push_back(std::pair<int,int>(*piecelen, start_from_branch));
+      else
+        (*match_lens)[i] = std::pair<int,int>(*piecelen, start_from_branch);
+    }
     *piecelen += atomlen;
     if(!atomlen && !source[*piecelen])
     {
       atom->regex_intern->reachedEnd = true;
       break;
     }
+    if(first_branch && (atomlen == 0))//avoid infinite loop
+    {
+      break;
+    }
   }
   if(match_lens)
-    match_lens->push_back(*piecelen);
+  {
+  //  if(i >= match_lens->size())
+      match_lens->push_back(std::pair<int,int>(*piecelen, 0));
+  //  else
+  //    (*match_lens)[i] = std::pair<int,int>(*piecelen, 0);
+  }
 
   return true;
 }
 
 //match any of chargroups
-bool CRegexAscii_chargroup::match(const char *source, int *matched_len)
+bool CRegexAscii_chargroup::match_internal(const char *source, int *start_from_branch, int *matched_len)
 {
   *matched_len = 0;
   std::list<chargroup_t>::iterator  cgt_it;
@@ -975,26 +1843,184 @@
       return false;
   }
 
-  if(source[0] == 0x0A)
+  if((source[0] == 0x0A) || ((source[0] == 0x0D) && (source[1] == 0x0A)))
   {
     if((regex_intern->flags & REGEX_ASCII_MULTILINE) &&
         (chargroup_list.size() == 1) && (chargroup_list.begin()->flags == CHARGROUP_FLAGS_ENDLINE))
     {
-      *matched_len = 1;
+      //*matched_len = 1;
       return true;
     }
   }
 
+  bool found = false;
   for(cgt_it = chargroup_list.begin(); cgt_it != chargroup_list.end(); cgt_it++)
   {
-    if(cgt_it->flags == CHARGROUP_FLAGS_MULTICHAR)
-    {
-      switch(cgt_it->c1)
-      {
-        case 'p'://catEsc
-        case 'P'://complEsc
-          //ignore the prop for now
-          throw XQUERY_EXCEPTION( err::FORX0002 );
+    switch(cgt_it->flags&0x7F)
+    {
+    case CHARGROUP_FLAGS_MULTICHAR_p:
+      switch(cgt_it->c1)
+      {
+      case unicode::UNICODE_Ll + 50:
+        if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Ll) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Lm) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Lo) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Lt) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Lu))
+        {
+          if(!(cgt_it->flags & 0x80))
+            found = true;
+        }
+        else
+        {
+          if(cgt_it->flags & 0x80)
+            found = true;
+        }
+        break;
+      case unicode::UNICODE_Mc + 50:
+        if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Mn) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Mc) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Me))
+        {
+          if(!(cgt_it->flags & 0x80))
+            found = true;
+        }
+        else
+        {
+          if(cgt_it->flags & 0x80)
+            found = true;
+        }
+        break;
+      case unicode::UNICODE_Nd + 50:
+        if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Nd) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Nl) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_No))
+        {
+          if(!(cgt_it->flags & 0x80))
+            found = true;
+        }
+        else
+        {
+          if(cgt_it->flags & 0x80)
+            found = true;
+        }
+        break;
+      case unicode::UNICODE_Pc + 50:
+        if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Pc) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Pd) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Ps) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Pe) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Pi) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Pf) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Po))
+        {
+          if(!(cgt_it->flags & 0x80))
+            found = true;
+        }
+        else
+        {
+          if(cgt_it->flags & 0x80)
+            found = true;
+        }
+        break;
+      case unicode::UNICODE_Zl + 50:
+        if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Zs) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Zl) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Zp))
+        {
+          if(!(cgt_it->flags & 0x80))
+            found = true;
+        }
+        else
+        {
+          if(cgt_it->flags & 0x80)
+            found = true;
+        }
+        break;
+      case unicode::UNICODE_Sc + 50:
+        if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Sm) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Sc) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Sk) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_So))
+        {
+          if(!(cgt_it->flags & 0x80))
+            found = true;
+        }
+        else
+        {
+          if(cgt_it->flags & 0x80)
+            found = true;
+        }
+        break;
+      case unicode::UNICODE_Cc + 50:
+        if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Cc) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Cf) ||
+           unicode::check_codepoint_category(source[0], unicode::UNICODE_Co))//ignore unicode::UNICODE_Cn
+        {
+          if(!(cgt_it->flags & 0x80))
+            found = true;
+        }
+        else
+        {
+          if(cgt_it->flags & 0x80)
+            found = true;
+        }
+        break;
+      default:
+        if(unicode::check_codepoint_category(source[0], (unicode::category)cgt_it->c1))
+        {
+          if(!(cgt_it->flags & 0x80))
+            found = true;
+        }
+        else
+        {
+          if(cgt_it->flags & 0x80)
+            found = true;
+        }
+        break;
+      }break;
+    case CHARGROUP_FLAGS_MULTICHAR_Is:
+    {
+      const unicode::code_point *cp = block_escape[cgt_it->c1].cp;
+      if(((unicode::code_point)source[0] >= cp[0]) && 
+        ((unicode::code_point)source[0] <= cp[1]))
+      {
+        if(!(cgt_it->flags & 0x80))
+          found = true;
+      }
+      else if(block_escape[cgt_it->c1].ext_cp)
+      {
+        cp = block_escape[cgt_it->c1].ext_cp;
+        while(*cp)
+        {
+          if(((unicode::code_point)source[0] >= cp[0]) && 
+            ((unicode::code_point)source[0] <= cp[1]))
+            break;
+          cp += 2;
+        }
+        if(*cp)
+        {
+          if(!(cgt_it->flags & 0x80))
+            found = true;
+        }
+        else
+        {
+          if(cgt_it->flags & 0x80)
+            found = true;
+        }
+      }
+      else
+      {
+        if(cgt_it->flags & 0x80)
+          found = true;
+      }
+    }break;
+    case CHARGROUP_FLAGS_MULTICHAR_OTHER:
+    {
+      bool value_true = true;
+      switch(cgt_it->c1)
+      {
+        case 'S':value_true = false;//[^\s]
         case 's'://[#x20\t\n\r]
           switch(source[0])
           {
@@ -1002,86 +2028,100 @@
           case '\r':
           case '\n':
           case ' ':
-            *matched_len = 1;
-            return true;
-          default:
-            return false;
-          }
-        case 'S'://[^\s]
-          switch(source[0])
-          {
-          case 0:
-            regex_intern->reachedEnd = true;
-          case '\t':
-          case '\r':
-          case '\n':
-          case ' ':
-            return false;
-          default:
-            *matched_len = 1;
-            return true;
-          }
+            found = true;
+          default:
+            break;
+          }
+          break;
+        case 'I':value_true = false;//[^\i]
         case 'i'://the set of initial name characters, those matched by Letter | '_' | ':'
           if((source[0] == '_') ||
             (source[0] == ':') ||
             XQCharType::isLetter(source[0]))
           {
-            *matched_len = 1;
-            return true;
-          }
-          return false;
-        case 'I':
-          if((source[0] == '_') ||
-            (source[0] == ':') ||
-            XQCharType::isLetter(source[0]))
-          {
-            return false;
-          }
-          *matched_len = 1;
-          return true;
+            found = true;
+          }
+          break;
+        case 'C':value_true = false;//[^\c]
         case 'c'://the set of name characters, those matched by NameChar
           if(XQCharType::isNameChar(source[0]))
           {
-            *matched_len = 1;
-            return true;
-          }
-          return false;
-        case 'C':
-          if(XQCharType::isNameChar(source[0]))
-          {
-            return false;
-          }
-          *matched_len = 1;
-          return true;
+            found = true;
+          }
+          break;
+        case 'D':value_true = false;//[^\d]
         case 'd':
-        case 'D':
+          if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Nd))
+            found = true;
+          break;
+        case 'W':value_true = false;//[^\w]
         case 'w':
-        case 'W':
+         found = !(unicode::check_codepoint_category(source[0], unicode::UNICODE_Pc) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Pd) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Ps) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Pe) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Pi) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Pf) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Po) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Zs) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Zl) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Zp) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Cc) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Cf) ||
+                   unicode::check_codepoint_category(source[0], unicode::UNICODE_Co));//ignore unicode::UNICODE_Cn
+          break;
         default:
-          throw XQUERY_EXCEPTION( err::FORX0002 );
-      }
-      return false;
-    }
-    else if(cgt_it->flags == CHARGROUP_FLAGS_ENDLINE)
-    {
-      return false;
-    }
-    else
+          throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(source, ZED(REGEX_UNIMPLEMENTED)) );
+      }
+      if((found && value_true) || (!found && !value_true))
+      {
+        if(!source[0])
+          regex_intern->reachedEnd = true;
+        *matched_len = 1;
+        return true;
+      }
+      else
+        return false;
+    }
+    case CHARGROUP_FLAGS_ENDLINE:
+    {
+      return false;
+    }
+    case CHARGROUP_FLAGS_ONECHAR:
+    {
+      if(regex_intern->flags & REGEX_ASCII_CASE_INSENSITIVE)
+      {
+        char  sup = toupper(source[0]);
+        if(sup == toupper(cgt_it->c1))
+          found = true;
+      }
+      else
+      {
+        if(source[0] == cgt_it->c1)
+          found = true;
+      }
+      break;
+    }
+    default:
     {
       if(regex_intern->flags & REGEX_ASCII_CASE_INSENSITIVE)
       {
         char  sup = toupper(source[0]);
         if((sup >= toupper(cgt_it->c1)) &&
           (sup <= toupper(cgt_it->c2)))
-          break;
+          found = true;
       }
       else
       {
         if((source[0] >= cgt_it->c1) &&
           (source[0] <= cgt_it->c2))
-          break;
+          found = true;
       }
-    }
+      break;
+    }
+    }
+    if(found)
+      break;
   }
   if(cgt_it == chargroup_list.end())
     return false;
@@ -1089,7 +2129,7 @@
   if(classsub)
   {
     int   classsub_len;
-    if(classsub->match(source, &classsub_len))
+    if(classsub->match_internal(source, NULL, &classsub_len))
       return false;
   }
 
@@ -1097,14 +2137,14 @@
   return true;
 }
 
-bool CRegexAscii_negchargroup::match(const char *source, int *matched_len)
+bool CRegexAscii_negchargroup::match_internal(const char *source, int *start_from_branch, int *matched_len)
 {
   if(!source[0])
   {
     regex_intern->reachedEnd = true;
     return false;
   }
-  if(!CRegexAscii_chargroup::match(source, matched_len))
+  if(!CRegexAscii_chargroup::match_internal(source, start_from_branch, matched_len))
   {
     *matched_len = 1;
     return true;
@@ -1112,7 +2152,7 @@
   return false;
 }
 
-bool CRegexAscii_wildchar::match(const char *source, int *matched_len)
+bool CRegexAscii_wildchar::match_internal(const char *source, int *start_from_branch, int *matched_len)
 {
   *matched_len = 0;
   if(source[0])
@@ -1135,7 +2175,7 @@
   }
 }
 
-bool CRegexAscii_backref::match(const char *source, int *matched_len)
+bool CRegexAscii_backref::match_internal(const char *source, int *start_from_branch, int *matched_len)
 {
   const char *submatch = regex_intern->subregex.at(backref-1)->matched_source;
   if(!submatch)
@@ -1144,11 +2184,33 @@
     return true;
   }
   *matched_len = regex_intern->subregex.at(backref-1)->matched_len;
-  if(!strncmp(source, submatch, *matched_len))
-  {
-    return true;
-  }
-  *matched_len = 0;
+  if(regex_intern->flags & REGEX_ASCII_CASE_INSENSITIVE)
+  {
+    if(compare_ni(source, submatch, *matched_len)) 
+    {
+      return true;
+    }
+  }
+  else
+  {
+    if(!strncmp(source, submatch, *matched_len))
+    {
+      return true;
+    }
+  }
+  *matched_len = 0;
+  return false;
+}
+
+bool CRegexAscii_pinstart::match_internal(const char *source, int *start_from_branch, int *matched_len)
+{
+  *matched_len = 0;
+  if(source == regex_intern->source_start)
+    return true;
+  if((regex_intern->flags & REGEX_ASCII_MULTILINE) &&
+    ((source[-1] == '\n') || (source[-1] == '\r')))
+    return true;
+  
   return false;
 }
 

=== modified file 'src/util/regex_ascii.h'
--- src/util/regex_ascii.h	2011-07-18 14:25:21 +0000
+++ src/util/regex_ascii.h	2011-10-28 02:34:26 +0000
@@ -21,35 +21,55 @@
 #include <vector>
 
 #include <zorba/config.h>
+#include "util/unicode_util.h"
 
 namespace zorba {
   namespace regex_ascii{
 
 //matching flags
-#define REGEX_ASCII_CASE_INSENSITIVE    1
-#define REGEX_ASCII_DOTALL              2
-#define REGEX_ASCII_MULTILINE           4
-#define REGEX_ASCII_COMMENTS            8
-#define REGEX_ASCII_LITERAL             16
+#define REGEX_ASCII_CASE_INSENSITIVE    1   //i
+#define REGEX_ASCII_DOTALL              2   //s
+#define REGEX_ASCII_MULTILINE           4   //m
+#define REGEX_ASCII_NO_WHITESPACE       8   //x
+#define REGEX_ASCII_LITERAL             16  //q
+
+#define REGEX_ASCII_GET_LONGEST_BRANCH  32   //try all branches and get the longest match (or shortest for reluctant pieces)
+#define REGEX_ASCII_MINIMAL_MATCH       64   //consider all pieces as reluctant
+#define REGEX_ASCII_WHOLE_MATCH         128  //match only all string, like having "^regex$"
+#define REGEX_ASCII_GROUPING_LEN_WHOLE_PIECE    256  //compute the len of a grouping as for the whole piece ( for example (a)+ when matching "aa" and referred as $1 will get string len 2 instead of last 1)
 
 class CRegexAscii_regex;
-
-class IRegexMatcher
-{
-public:
+class CRegexAscii_piece;
+
+struct RegexAscii_pieceinfo
+{
+  union
+  {
+    CRegexAscii_piece*  piece;
+    CRegexAscii_regex*  group_regex;
+  };
+  int nr_matches;
+
+  RegexAscii_pieceinfo(CRegexAscii_piece* piece) {nr_matches=0;this->piece=piece;}
+  RegexAscii_pieceinfo(CRegexAscii_regex* group_regex) {nr_matches=-1;this->group_regex=group_regex;}
+};
+
+
+class IRegexAtom
+{
+protected:
+  friend class CRegexAscii_piece;
   CRegexAscii_regex *regex_intern;
 public:
-  IRegexMatcher(CRegexAscii_regex* regex) : regex_intern(regex) {}
-  virtual ~IRegexMatcher() {}
-
-  virtual bool match(const char *source, int *matched_len) = 0;
-};
-
-class IRegexAtom : public IRegexMatcher
-{
-public:
-  IRegexAtom(CRegexAscii_regex* regex) : IRegexMatcher(regex) {}
+  IRegexAtom(CRegexAscii_regex* regex)  : regex_intern(regex) {}
   virtual ~IRegexAtom() {}
+
+  virtual bool match(const char *source, int *start_from_branch, int *matched_len,
+                    std::list<RegexAscii_pieceinfo>::iterator next_piece,
+                    std::list<RegexAscii_pieceinfo>::iterator end_piece);
+  virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len) = 0;
+  virtual void reset_match() {}
+//  virtual void restore_match() {}
 };
 
 class CRegexAscii_branch;
@@ -66,58 +86,74 @@
   friend class CRegexAscii_negchargroup;
   friend class CRegexAscii_wildchar;
   friend class CRegexAscii_backref;
+  friend class CRegexAscii_pinstart;
 public:
   CRegexAscii_regex(CRegexAscii_regex *);
   virtual ~CRegexAscii_regex();
 
   bool match_anywhere(const char *source, unsigned int flags, int *match_pos, int *matched_len);
   bool match_from(const char *source, unsigned int flags, int *match_pos, int *matched_len);
-  virtual bool match(const char *source, int *matched_len);
 
   //for replace $1, $2 ...
   bool  get_indexed_match(int index, const char **matched_source, int *matched_len);
   unsigned int get_indexed_regex_count();
 
   bool get_reachedEnd() {return reachedEnd;}
-  bool set_align_begin(bool align_begin);
+public:
+  virtual bool match(const char *source, int *start_from_branch, int *matched_len,
+                    std::list<RegexAscii_pieceinfo>::iterator next_piece,
+                    std::list<RegexAscii_pieceinfo>::iterator end_piece);
+  virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len) {return false;}//not impl
+  virtual void reset_match();
+//  virtual void restore_match();
 private:
   void add_branch(CRegexAscii_branch *branch);
+
+  void save_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex);
+  void load_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex);
 private:
   unsigned int flags;
   std::list<CRegexAscii_branch*>   branch_list;
-  bool  align_begin;
+
+  const char  *source_start;
 
   const char  *matched_source;
   int         matched_len;
+//  const char  *backup_matched_source;
+//  int         backup_matched_len;
   std::vector<CRegexAscii_regex*>    subregex;//for grouping
 
   bool        reachedEnd;
 };
 
-class CRegexAscii_branch : public IRegexMatcher
+class CRegexAscii_branch
 {
   friend class CRegexAscii_parser;
 public:
   CRegexAscii_branch(CRegexAscii_regex* regex);
   ~CRegexAscii_branch();
 
-  virtual bool match(const char *source, int *matched_len);
+  bool match(const char *source, int *matched_len,
+              CRegexAscii_regex* group_regex,
+              std::list<RegexAscii_pieceinfo>::iterator next_piece,
+              std::list<RegexAscii_pieceinfo>::iterator end_piece);
+  void reset();
+//  void restore();
 private:
-  std::list<CRegexAscii_piece*>   piece_list;
+  std::list<RegexAscii_pieceinfo>   piece_list;
 private:
   void add_piece(CRegexAscii_piece *piece);
  
-  bool match_piece_iter_reluctant(std::list<CRegexAscii_piece*>::iterator piece_it,
-                        const char *source, int *matched_len);
-  bool match_piece_iter_normal(std::list<CRegexAscii_piece*>::iterator piece_it,
-                        const char *source, int *matched_len);
 };
 
 class CRegexAscii_piece //: public IRegexMatcher
 {
   friend class CRegexAscii_parser;
-public:
+  friend class CRegexAscii_branch;
+
   IRegexAtom *atom;
+  CRegexAscii_regex *regex_atom;
+
   //quantifier
   bool  strict_max;
   int   min;
@@ -134,14 +170,35 @@
   void get_quantifier(int *min, int *max, bool *strict_max);
   bool get_is_reluctant();
 //  bool match(const char *source, int *matched_len);
+  bool match_piece(std::list<RegexAscii_pieceinfo>::iterator next_piece,
+                   std::list<RegexAscii_pieceinfo>::iterator end_piece,
+                   const char *source, int *matched_len);
+protected:
   bool match_piece_times(const char *source, 
                          int *piecelen, 
                          int times,
-                         std::vector<int>    *match_lens);
-};
-
-#define   CHARGROUP_FLAGS_MULTICHAR   1
-#define   CHARGROUP_FLAGS_ENDLINE     2
+                         std::vector<std::pair<int,int> >    *match_lens);
+  int  choose_another_branch(std::vector<std::pair<int,int> > &match_lens);
+  bool match_piece_iter_reluctant(std::list<RegexAscii_pieceinfo>::iterator next_piece,
+                        std::list<RegexAscii_pieceinfo>::iterator end_piece,
+                        const char *source, int *matched_len);
+  bool match_piece_iter_normal(std::list<RegexAscii_pieceinfo>::iterator next_piece,
+                        std::list<RegexAscii_pieceinfo>::iterator end_piece,
+                        const char *source, int *matched_len);
+  bool is_regex_atom();
+};
+
+
+enum CHARGROUP_t
+{
+CHARGROUP_NO_MULTICHAR = 0,
+CHARGROUP_FLAGS_CHAR_RANGE,
+CHARGROUP_FLAGS_MULTICHAR_p,
+CHARGROUP_FLAGS_MULTICHAR_Is,
+CHARGROUP_FLAGS_MULTICHAR_OTHER,
+CHARGROUP_FLAGS_ONECHAR,
+CHARGROUP_FLAGS_ENDLINE
+};
 
 class CRegexAscii_chargroup : public IRegexAtom
 {
@@ -152,19 +209,20 @@
 private:
   typedef struct
   {
-    unsigned char  flags;
+    CHARGROUP_t  flags;
     char  c1;
     char  c2;
   }chargroup_t;
   std::list<chargroup_t>    chargroup_list;
   CRegexAscii_chargroup *classsub;
 public:
-  void addMultiChar(char c);
+  void addMultiChar(char c, CHARGROUP_t multichar_type);
   void addEndLine();
   void addCharRange(char c1, char c2);
+  void addOneChar(char c);
   void addClassSub(CRegexAscii_chargroup* classsub);
 
-  virtual bool match(const char *source, int *matched_len);
+  virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len);
 };
 
 class CRegexAscii_negchargroup : public CRegexAscii_chargroup
@@ -173,7 +231,7 @@
   CRegexAscii_negchargroup(CRegexAscii_regex* regex);
   virtual ~CRegexAscii_negchargroup();
 
-  virtual bool match(const char *source, int *matched_len);
+  virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len);
 };
 
 class CRegexAscii_wildchar : public IRegexAtom
@@ -182,7 +240,7 @@
   CRegexAscii_wildchar(CRegexAscii_regex* regex);
   virtual ~CRegexAscii_wildchar();
 
-  virtual bool match(const char *source, int *matched_len);
+  virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len);
 };
 
 class CRegexAscii_backref : public IRegexAtom
@@ -191,14 +249,29 @@
   CRegexAscii_backref(CRegexAscii_regex* regex, unsigned int backref);
   virtual ~CRegexAscii_backref();
 
-  virtual bool match(const char *source, int *matched_len);
+  virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len);
 private:
   unsigned int backref;
 };
 
+class CRegexAscii_pinstart : public IRegexAtom
+{
+public:
+  CRegexAscii_pinstart(CRegexAscii_regex* regex);
+
+  virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len);
+};
+
 class CRegexAscii_parser
 {
 public:
+  typedef struct
+  {
+    const unicode::code_point    cp[2];//in pairs start, end
+    const unicode::code_point    *ext_cp;
+    const char    *group_name;
+  }block_escape_t;
+
   CRegexAscii_parser();
   ~CRegexAscii_parser();
 
@@ -211,7 +284,8 @@
   CRegexAscii_piece* parse_piece(const char *pattern, int *piece_len);
   char myishex(char c);
   bool myisdigit(char c);
-  char readChar(const char *pattern, int *char_len, bool *is_multichar);
+  bool myisletterAZ(char c);
+  char readChar(const char *pattern, bool for_atom, int *char_len, CHARGROUP_t *multichar_type);
   IRegexAtom* read_atom(const char *pattern, int *atom_len);
   CRegexAscii_chargroup* readchargroup(const char *pattern, int *chargroup_len);
   void read_quantifier(CRegexAscii_piece *piece, const char *pattern, int *quantif_len);
@@ -222,7 +296,8 @@
   unsigned int flags;
 };
 
-}}//end namespace zorba::regex_ascii
+}
+}//end namespace zorba::regex_ascii
 
 #endif
 /* vim:set et sw=2 ts=2: */

=== modified file 'src/util/string_util.cpp'
--- src/util/string_util.cpp	2011-07-07 18:48:27 +0000
+++ src/util/string_util.cpp	2011-10-28 02:34:26 +0000
@@ -121,6 +121,13 @@
   return result;
 }
 
+uint32_t hash( char const *data, size_t len ) {
+  uint32_t result = 5381;
+  for ( size_t i = 0; i < len; ++i )
+    result = (result << 5) + result + *data++;
+  return result;
+}
+
 char* itoa( long long n, char *buf ) {
   //
   // This implementation is much faster than using sprintf(3).

=== modified file 'src/util/string_util.h'
--- src/util/string_util.h	2011-10-11 17:59:20 +0000
+++ src/util/string_util.h	2011-10-28 02:34:26 +0000
@@ -138,6 +138,17 @@
  */
 #define BUILD_STRING(...) (::zorba::ztd::string_builder() << __VA_ARGS__)
 
+////////// String hash /////////////////////////////////////////////////////////
+
+/**
+ * Performs a hash of the given data.
+ *
+ * @param data A pointer to the start of the data.
+ * @param len The length of the data in bytes.
+ * @return Returns said hash.
+ */
+uint32_t hash( char const *data, size_t len );
+
 ////////// String equality /////////////////////////////////////////////////////
 
 /**

=== modified file 'src/util/unicode_categories.cpp'
--- src/util/unicode_categories.cpp	2011-06-14 17:26:33 +0000
+++ src/util/unicode_categories.cpp	2011-10-28 02:34:26 +0000
@@ -65812,7 +65812,7 @@
   { 0x100000, 0x100000, UNICODE_Co},
 };
 
-bool check_codepoint_category(code_point cp, UnicodeCategoriesEnum categ)
+bool check_codepoint_category(code_point cp, category categ)
 {
   if(cp < 0x10000)
     return codepoints_categories[cp] == categ;
@@ -65824,10 +65824,10 @@
       if(cp >= codepoints_categories2[i].cp1)
         return codepoints_categories2[i].category == categ;
       else
-        return false;
+        return categ ? false : true;
     }
   }
-  return false;
+  return categ ? false : true;
 }
 
 /*

=== modified file 'src/util/unicode_categories.h'
--- src/util/unicode_categories.h	2011-06-14 17:26:33 +0000
+++ src/util/unicode_categories.h	2011-10-28 02:34:26 +0000
@@ -22,46 +22,53 @@
 namespace zorba {
 namespace unicode {
 
-//Unicode codepoint categories, as from http://www.fileformat.info/info/unicode/category/index.htm
+///////////////////////////////////////////////////////////////////////////////
 
-enum UnicodeCategoriesEnum {
-UNICODE_Cc, //Other, Control
-UNICODE_Cf, //Other, Format
-UNICODE_Co, //Other, Private Use
-UNICODE_Cs, //Other, Surrogate
-UNICODE_Ll, //Letter, Lowercase
-UNICODE_Lm, //Letter, Modifier
-UNICODE_Lo, //Letter, Other
-UNICODE_Lt, //Letter, Titlecase
-UNICODE_Lu, //Letter, Uppercase
-UNICODE_Mc, //Mark, Spacing Combining
-UNICODE_Me, //Mark, Enclosing
-UNICODE_Mn, //Mark, Nonspacing
-UNICODE_Nd, //Number, Decimal Digit
-UNICODE_Nl, //Number, Letter
-UNICODE_No, //Number, Other
-UNICODE_Pc, //Punctuation, Connector
-UNICODE_Pd, //Punctuation, Dash
-UNICODE_Pe, //Punctuation, Close
-UNICODE_Pf, //Punctuation, Final quote (may behave like Ps or Pe depending on usage)
-UNICODE_Pi, //Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
-UNICODE_Po, //Punctuation, Other
-UNICODE_Ps, //Punctuation, Open
-UNICODE_Sc, //Symbol, Currency
-UNICODE_Sk, //Symbol, Modifier
-UNICODE_Sm, //Symbol, Math
-UNICODE_So, //Symbol, Other
-UNICODE_Zl, //Separator, Line
-UNICODE_Zp, //Separator, Paragraph
-UNICODE_Zs  //Separator, Space
+/**
+ * Unicode codepoint categories.
+ * See: http://www.fileformat.info/info/unicode/category/
+ */
+enum category {
+  UNICODE_Cn, // Not Assigned
+  UNICODE_Cc, // Other, Control
+  UNICODE_Cf, // Other, Format
+  UNICODE_Co, // Other, Private Use
+  UNICODE_Cs, // Other, Surrogate
+  UNICODE_Ll, // Letter, Lowercase
+  UNICODE_Lm, // Letter, Modifier
+  UNICODE_Lo, // Letter, Other
+  UNICODE_Lt, // Letter, Titlecase
+  UNICODE_Lu, // Letter, Uppercase
+  UNICODE_Mc, // Mark, Spacing Combining
+  UNICODE_Me, // Mark, Enclosing
+  UNICODE_Mn, // Mark, Nonspacing
+  UNICODE_Nd, // Number, Decimal Digit
+  UNICODE_Nl, // Number, Letter
+  UNICODE_No, // Number, Other
+  UNICODE_Pc, // Punctuation, Connector
+  UNICODE_Pd, // Punctuation, Dash
+  UNICODE_Pe, // Punctuation, Close
+  UNICODE_Pf, // Punctuation, Final quote (like Ps or Pe depending on usage)
+  UNICODE_Pi, // Punctuation, Initial quote (like Ps or Pe depending on usage)
+  UNICODE_Po, // Punctuation, Other
+  UNICODE_Ps, // Punctuation, Open
+  UNICODE_Sc, // Symbol, Currency
+  UNICODE_Sk, // Symbol, Modifier
+  UNICODE_Sm, // Symbol, Math
+  UNICODE_So, // Symbol, Other
+  UNICODE_Zl, // Separator, Line
+  UNICODE_Zp, // Separator, Paragraph
+  UNICODE_Zs  // Separator, Space
 };
 
 bool is_UnicodeNd(code_point cp, code_point *ret_zero);
 
-bool check_codepoint_category(code_point cp, UnicodeCategoriesEnum categ);
-
-}
-}
-
-#endif
+bool check_codepoint_category(code_point cp, category categ);
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace unicode
+} // namespaec zorba
+
+#endif /* ZORBA_UNICODE_CATEGORIES */
 /* vim:set et sw=2 ts=2: */

=== modified file 'src/util/unicode_util.cpp'
--- src/util/unicode_util.cpp	2011-07-17 00:10:56 +0000
+++ src/util/unicode_util.cpp	2011-10-28 02:34:26 +0000
@@ -22,15 +22,19 @@
 #include <functional>                   /* for binary_function */
 #include <utility>                      /* for pair */
 
-#include <unicode/normlzr.h>
-#include <unicode/ustring.h>
+#ifndef ZORBA_NO_ICU
+# include <unicode/normlzr.h>
+# include <unicode/ustring.h>
+#endif /* ZORBA_NO_ICU */
 
 #include "cxx_util.h"
 #include "unicode_util.h"
 #include "utf8_util.h"
 
 using namespace std;
+#ifndef ZORBA_NO_ICU
 U_NAMESPACE_USE
+#endif /* ZORBA_NO_ICU */
 
 namespace zorba {
 namespace unicode {
@@ -2208,6 +2212,8 @@
   return to_case<upper>( c );
 }
 
+#ifndef ZORBA_NO_ICU
+
 bool normalize( string const &in, normalization::type n, string *out ) {
   UErrorCode status = U_ZERO_ERROR;
   UNormalizationMode icu_mode;
@@ -2230,8 +2236,11 @@
   return U_SUCCESS( status ) == TRUE;
 }
 
+#endif /* ZORBA_NO_ICU */
+
 bool to_string( char const *in, size_type in_len, char_type **out,
                 size_type *out_len ) {
+#ifndef ZORBA_NO_ICU
   size_type utf16_len;
   UErrorCode status = U_ZERO_ERROR;
   u_strFromUTF8WithSub(                 // pre-flight to get utf16_len
@@ -2250,9 +2259,16 @@
   }
   *out = utf16_buf;
   *out_len = utf16_len;
+#else
+  *out = new char_type[ in_len + 1 ];
+  *out_len = in_len;
+  ::strncpy( *out, in, *out_len );
+#endif /* ZORBA_NO_ICU */
   return true;
 }
 
+#ifndef ZORBA_NO_ICU
+
 bool to_string( char const *in, size_type in_len, string *out ) {
   char_type *const buf = out->getBuffer( in_len + 1 );
   size_type buf_len;
@@ -2271,6 +2287,8 @@
   return U_SUCCESS( status ) == TRUE;
 }
 
+#endif /* ZORBA_NO_ICU */
+
 ///////////////////////////////////////////////////////////////////////////////
 
 } // namespace unicode

=== modified file 'src/util/unicode_util.h'
--- src/util/unicode_util.h	2011-07-18 14:25:21 +0000
+++ src/util/unicode_util.h	2011-10-28 02:34:26 +0000
@@ -19,12 +19,18 @@
 
 #include <zorba/config.h>
 
-#ifndef ZORBA_NO_UNICODE
-
 #include <cctype>
 #include <cstring>
 #include <cwchar>
-#include <unicode/unistr.h>
+
+#include <zorba/internal/ztd.h>
+
+#ifdef ZORBA_NO_ICU
+# include "zorbamisc/config/stdint.h"
+# include "zorbatypes/zstring.h"
+#else
+# include <unicode/unistr.h>
+#endif /* ZORBA_NO_ICU */
 
 #include "stl_util.h"
 
@@ -37,13 +43,21 @@
  * The character type that can hold a Unicode character encoded in UTF-16.  Do
  * not assume that this is an unsigned type.
  */
-typedef UChar char_type;
+#ifdef ZORBA_NO_ICU
+  typedef char char_type;
+#else
+  typedef /* ICU's */ UChar char_type;
+#endif /* ZORBA_NO_ICU */
 
 /**
  * The type type that can hold a Unicode code-point.  Do not assume that this
  * is an unsigned type.
  */
-typedef UChar32 code_point;
+#ifdef ZORBA_NO_ICU
+typedef uint32_t code_point;
+#else
+typedef /* ICU's */ UChar32 code_point;
+#endif /* ZORBA_NO_ICU */
 
 typedef int32_t size_type;
 
@@ -60,10 +74,17 @@
   };
 }
 
+#ifndef ZORBA_NO_ICU
 /**
  * A Unicode string.
  */
 typedef U_NAMESPACE_QUALIFIER UnicodeString string;
+#else
+/**
+ * Since there is no ICU, just use a zstring as a "Unicode" string.
+ */
+typedef zstring string;
+#endif /* ZORBA_NO_ICU */
 
 ////////// code-point checking ////////////////////////////////////////////////
 
@@ -100,7 +121,7 @@
   return ascii_c == c && isspace( ascii_c );
 #else
   return isspace( c );
-#endif
+#endif /* WIN32 */
 }
 
 /**
@@ -119,8 +140,10 @@
  * @param c The code-point to check.
  * @return Returns \c true only if the code-point is valid.
  */
-template<class CodePointType>
-inline bool is_valid( CodePointType c ) {
+template<typename CodePointType> inline
+typename std::enable_if<ZORBA_TR1_NS::is_integral<CodePointType>::value,
+                        bool>::type
+is_valid( CodePointType c ) {
   return  (ztd::ge0( c ) && c <= 0x00D7FF)
       ||  (c >= 0x00E000 && c <= 0x00FFFD)
       ||  (c >= 0x010000 && c <= 0x10FFFF);
@@ -168,6 +191,7 @@
 
 ////////// normalization //////////////////////////////////////////////////////
 
+#ifndef ZORBA_NO_ICU
 /**
  * Normalizes the given string.
  *
@@ -177,9 +201,11 @@
  */
 ZORBA_DLL_PUBLIC
 bool normalize( string const &in, normalization::type n, string *out );
+#endif /* ZORBA_NO_ICU */
 
 ////////// string conversion //////////////////////////////////////////////////
 
+#ifndef ZORBA_NO_ICU
 /**
  * Converts a single UTF-8 encoded character into a single Unicode character.
  *
@@ -188,6 +214,7 @@
  * @return Returns \c true only if the conversion succeeded.
  */
 bool to_char( char const *in, char_type *out );
+#endif /* ZORBA_NO_ICU */
 
 /**
  * Converts a UTF-8 encoded string into a sequence of Unicode characters.
@@ -210,8 +237,15 @@
  * @param out The Unicode string result.
  * @return Returns \c true only if the conversion succeeded.
  */
+#ifndef ZORBA_NO_ICU
 ZORBA_DLL_PUBLIC
 bool to_string( char const *in, size_type in_len, string *out );
+#else
+inline bool to_string( char const *in, size_type in_len, string *out ) {
+  out->assign( in, in_len );
+  return true;
+}
+#endif /* ZORBA_NO_ICU */
 
 /**
  * Converts a C string to a Unicode string.
@@ -224,6 +258,8 @@
   return to_string( in, (size_type)std::strlen( in ), out );
 }
 
+#ifndef ZORBA_NO_ICU
+
 /**
  * Converts a wide-character string to a Unicode string.
  *
@@ -245,6 +281,8 @@
   return to_string( in, static_cast<size_type>( std::wcslen( in ) ), out );
 }
 
+#endif /* ZORBA_NO_ICU */
+
 /**
  * Converts a string to a Unicode string.
  *
@@ -263,13 +301,6 @@
 } // namespace unicode
 } // namespace zorba
 
-#else
-#endif /* ZORBA_NO_UNICODE */
-namespace zorba{
-namespace unicode{
-typedef int32_t size_type;
-} // namespace unicode
-} // namespace zorba
 #endif /* ZORBA_UNICODE_UTIL_H */
 /*
  * Local variables:

=== modified file 'src/util/utf8_util.cpp'
--- src/util/utf8_util.cpp	2011-07-17 00:10:56 +0000
+++ src/util/utf8_util.cpp	2011-10-28 02:34:26 +0000
@@ -15,16 +15,16 @@
  */
 #include "stdafx.h"
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
 #include <unicode/ustring.h>
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 
 #include "cxx_util.h"
 #include "utf8_util.h"
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
 U_NAMESPACE_USE
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 
 unsigned const Mask1Byte   = 0x80;
 unsigned const Mask2Bytes  = 0xC0;
@@ -152,7 +152,7 @@
   return len;
 }
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
 
 bool to_string( unicode::char_type const *in, unicode::size_type in_len,
                 storage_type **out, size_type *out_len ) {
@@ -216,7 +216,7 @@
   return true;
 }
 
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 
 storage_type const* validate( storage_type const *s ) {
   while ( *s ) {

=== modified file 'src/util/utf8_util.h'
--- src/util/utf8_util.h	2011-07-17 20:05:49 +0000
+++ src/util/utf8_util.h	2011-10-28 02:34:26 +0000
@@ -25,16 +25,20 @@
 
 #include "ascii_util.h"
 #include "cxx_util.h"
+#include "string_util.h"
 #include "unicode_util.h"
 #include "utf8_string.h"
 #include "utf8_util_base.h"
 
+#include "zorbatypes/collation_manager.h"
 #include "zorbautils/hashfun.h"
 
-#ifndef ZORBA_NO_UNICODE
-#include "zorbatypes/collation_manager.h"
-#include "zorbatypes/libicu.h"
-#endif
+#ifdef ZORBA_NO_ICU
+# include "diagnostics/assert.h"
+#else
+# include <unicode/coll.h>
+# include <unicode/sortkey.h>
+#endif /* ZORBA_NO_ICU */
 
 namespace zorba {
 namespace utf8 {
@@ -306,7 +310,7 @@
 
 ////////// Encoding conversion ////////////////////////////////////////////////
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
 
 /**
  * Converts a unicode::char_type array into a UTF-8 encoded string.
@@ -377,6 +381,8 @@
   return to_string( in, u_strlen( in ), out );
 }
 
+#endif /* ZORBA_NO_ICU */
+
 /**
  * Converts a unicode::string into a UTF-8 encoded string.
  *
@@ -386,9 +392,16 @@
  */
 template<class StringType> inline
 bool to_string( unicode::string const &in, StringType *out ) {
+#ifndef ZORBA_NO_ICU
   return to_string( in.getBuffer(), in.length(), out );
+#else
+  *out = in.c_str();
+  return true;
+#endif /* ZORBA_NO_ICU */
 }
 
+#ifndef ZORBA_NO_ICU
+
 //
 // On Windows, UChar == wchar_t, so these functions would multiply define those
 // previously.
@@ -512,7 +525,7 @@
   return to_wchar_t( in.data(), in.size(), out, out_len );
 }
 
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 
 ////////// HTML URI ///////////////////////////////////////////////////////////
 
@@ -670,7 +683,7 @@
 
 ////////// Unicode normalization //////////////////////////////////////////////
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
 /**
  * Normalizes the Unicode characters in the string.
  *
@@ -682,7 +695,7 @@
 template<class InputStringType,class OutputStringType>
 bool normalize( InputStringType const &in, unicode::normalization::type n,
                 OutputStringType *out );
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 
 ////////// Whitespace /////////////////////////////////////////////////////////
 
@@ -743,7 +756,6 @@
   std::reverse_copy( u_in.begin(), u_in.end(), std::back_inserter( u_out ) );
 }
 
-#ifndef ZORBA_NO_UNICODE
 /**
  * Strips all diacritical marks from all characters converting them to their
  * closest ASCII equivalents.
@@ -756,8 +768,6 @@
 template<class InputStringType,class OutputStringType>
 void strip_diacritics( InputStringType const &in, OutputStringType *out );
 
-#endif /* ZORBA_NO_UNICODE */
-
 /**
  *
  */
@@ -765,6 +775,7 @@
 int compare(const StringType1 &s1, const StringType2 &s2,
             const XQPCollator* collation)
 {
+#ifndef ZORBA_NO_ICU
   if (collation == NULL || collation->doMemCmp())
     return s1.compare(s2);
 
@@ -775,6 +786,9 @@
   unicode::to_string(s2, &us2);
 
   return static_cast<Collator*>( collation->getCollator() )->compare(us1, us2);
+#else
+  return s1.compare(s2);
+#endif /* ZORBA_NO_ICU */
 }
 
 
@@ -782,25 +796,13 @@
  *
  */
 template<class StringType> inline
-uint32_t hash(const StringType& s, const XQPCollator* collation = NULL)
-{
+uint32_t hash(const StringType& s, const XQPCollator* collation = NULL) {
+#ifndef ZORBA_NO_ICU
   if (!collation || collation->doMemCmp())
-  {
-    const char* str = s.data();
-    ulong len = (ulong)s.size();
-    uint32_t hash = 5381;
-    ulong i = 0;
-    int c;
-    while (i < len && (c = *str++))
-    {
-      hash = ((hash << 5) + hash) + c;
-      ++i;
-    }
-    return hash;
-    //return hashfun::h32((void*)(s.data()), s.size());
-  }
+#endif /* ZORBA_NO_ICU */
+    return ztd::hash( s.data(), s.size() );
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
   CollationKey collKey;
   UErrorCode status = U_ZERO_ERROR;
 
@@ -818,7 +820,7 @@
   return collKey.hashCode();
 #else
   ZORBA_ASSERT(false);
-#endif
+#endif /* ZORBA_NO_ICU */
 }
 
 ///////////////////////////////////////////////////////////////////////////////

=== modified file 'src/util/utf8_util.tcc'
--- src/util/utf8_util.tcc	2011-07-15 13:33:24 +0000
+++ src/util/utf8_util.tcc	2011-10-28 02:34:26 +0000
@@ -99,7 +99,7 @@
   return next_char( temp );
 }
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
 
 template<class InputStringType,class OutputStringType>
 bool normalize( InputStringType const &in, unicode::normalization::type n,
@@ -123,7 +123,11 @@
 template<class InputStringType,class OutputStringType>
 void strip_diacritics( InputStringType const &in, OutputStringType *out ) {
   InputStringType in_normalized;
+#ifndef ZORBA_NO_ICU
   normalize( in, unicode::normalization::NFKD, &in_normalized );
+#else
+  in_normalized = in.c_str();
+#endif /* ZORBA_NO_ICU */
   out->clear();
   out->reserve( in_normalized.size() );
   std::copy(
@@ -161,7 +165,7 @@
 }
 #endif /* WIN32 */
 
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 
 template<class InputStringType,class OutputStringType>
 void to_lower( InputStringType const &in, OutputStringType *out ) {

=== modified file 'src/zorbatypes/URI.cpp'
--- src/zorbatypes/URI.cpp	2011-06-24 23:00:33 +0000
+++ src/zorbatypes/URI.cpp	2011-10-28 02:34:26 +0000
@@ -1191,8 +1191,6 @@
   return is_set(Scheme) && !theScheme.empty();
 }
 
-
-
 /*******************************************************************************
 
 ********************************************************************************/
@@ -1347,7 +1345,6 @@
       path = base_path.substr(0, last_slash+1);
 //  else
 //    path = "/";
-
   }
 
   // 6b - append the relative URI path

=== modified file 'src/zorbatypes/collation_manager.cpp'
--- src/zorbatypes/collation_manager.cpp	2011-06-14 17:26:33 +0000
+++ src/zorbatypes/collation_manager.cpp	2011-10-28 02:34:26 +0000
@@ -17,9 +17,9 @@
 
 #include "common/common.h"
 
-#ifndef ZORBA_NO_UNICODE
-#include "zorbatypes/libicu.h"
-#endif
+#ifndef ZORBA_NO_ICU
+# include <unicode/coll.h>
+#endif /* ZORBA_NO_ICU */
 
 #include <vector>
 #include <iostream>
@@ -116,7 +116,7 @@
   
   Collator* lCollator;
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
   UErrorCode lError = U_ZERO_ERROR;
   if (lTokens.size() == 2) 
   {
@@ -136,37 +136,37 @@
 
 #else
   lCollator = new Collator;
-#endif
+#endif /* ZORBA_NO_ICU */
 
   if (lTokens[0].compare("PRIMARY") == 0) 
   {
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
     lCollator->setStrength(Collator::PRIMARY);
-#endif
+#endif /* ZORBA_NO_ICU */
   }
   else if (lTokens[0].compare("SECONDARY") == 0) 
   {
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
     lCollator->setStrength(Collator::SECONDARY);
-#endif
+#endif /* ZORBA_NO_ICU */
   }
   else if (lTokens[0].compare("TERTIARY") == 0) 
   {
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
     lCollator->setStrength(Collator::TERTIARY);
-#endif
+#endif /* ZORBA_NO_ICU */
   }
   else if (lTokens[0].compare("QUATERNARY") == 0) 
   {
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
     lCollator->setStrength(Collator::QUATERNARY);
-#endif
+#endif /* ZORBA_NO_ICU */
   }
   else if (lTokens[0].compare("IDENTICAL") == 0) 
   {
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
     lCollator->setStrength(Collator::IDENTICAL);
-#endif
+#endif /* ZORBA_NO_ICU */
   }
   else
   {
@@ -181,7 +181,7 @@
 CollationFactory::createCollator()
 {
   Collator* lCollator;
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
   UErrorCode lError = U_ZERO_ERROR;
   lCollator = Collator::createInstance(Locale("en", "US"), lError); 
   if( U_FAILURE(lError) ) {
@@ -190,7 +190,7 @@
   lCollator->setStrength(Collator::IDENTICAL);
 #else
   lCollator = new Collator;
-#endif
+#endif /* ZORBA_NO_ICU */
   return new XQPCollator(lCollator, (std::string)"");
 }
 

=== modified file 'src/zorbatypes/collation_manager.h'
--- src/zorbatypes/collation_manager.h	2011-06-14 17:26:33 +0000
+++ src/zorbatypes/collation_manager.h	2011-10-28 02:34:26 +0000
@@ -25,13 +25,13 @@
 namespace zorba
 {
 
-#ifdef ZORBA_NO_UNICODE
+#ifdef ZORBA_NO_ICU
 
-class   Collator
+class Collator
 {
 };
 
-#endif
+#endif /* ZORBA_NO_ICU */
 
 class XQPCollator
 {

=== removed file 'src/zorbatypes/libicu.h'
--- src/zorbatypes/libicu.h	2011-06-14 17:26:33 +0000
+++ src/zorbatypes/libicu.h	1970-01-01 00:00:00 +0000
@@ -1,32 +0,0 @@
-/*
- * Copyright 2006-2008 The FLWOR Foundation.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-#ifndef ZORBA_LIBICU_H
-#if defined CYGWIN
-#  define U_HAVE_INTTYPES 0
-#  define U_HAVE_INT8_T 1
-#  define U_HAVE_INT32_T 1
-#  define U_HAVE_UINT32_T 1
-#endif
-
-#include <unicode/utypes.h>
-#include <unicode/coll.h>
-#include <unicode/ustring.h>
-#include <unicode/stsearch.h>
-#include <unicode/ucnv.h>
-#include <unicode/normlzr.h>
-#endif
-/* vim:set et sw=2 ts=2: */

=== modified file 'src/zorbatypes/transcoder.cpp'
--- src/zorbatypes/transcoder.cpp	2011-06-14 17:26:33 +0000
+++ src/zorbatypes/transcoder.cpp	2011-10-28 02:34:26 +0000
@@ -25,17 +25,19 @@
 
 namespace zorba {
 
+///////////////////////////////////////////////////////////////////////////////
+
 transcoder::transcoder( std::ostream& output_stream, bool in_utf16 ) :
   os( output_stream ),
   utf16( in_utf16 )
 {
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
   utf8_buf_len_ = 0;
   utf8_char_len_ = 1;
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 }
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
 
 void transcoder::write_utf16( char const *s, std::streamsize len ) {
   unicode::char_type *u_s;
@@ -76,7 +78,9 @@
   }
 }
 
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
+
+///////////////////////////////////////////////////////////////////////////////
 
 } // namespace zorba
 /* vim:set et sw=2 ts=2: */

=== modified file 'src/zorbatypes/transcoder.h'
--- src/zorbatypes/transcoder.h	2011-06-14 17:26:33 +0000
+++ src/zorbatypes/transcoder.h	2011-10-28 02:34:26 +0000
@@ -40,21 +40,21 @@
   std::ostream &os;
   bool const utf16;
 
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
   utf8::encoded_char_type utf8_buf_;
   int utf8_buf_len_;
   int utf8_char_len_;
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 
 public:
   transcoder(std::ostream& output_stream, bool in_utf16);
 
   transcoder& write( char const *s, std::streamsize n ) {
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
     if ( utf16 )
       write_utf16( s, n );
     else
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
       os.write( s, n );
     return *this;
   }
@@ -68,11 +68,11 @@
   }
 
   transcoder& operator<<( char ch ) {
-#ifndef ZORBA_NO_UNICODE
-    if (utf16)
+#ifndef ZORBA_NO_ICU
+    if ( utf16 )
       write_utf16_char(ch);
     else
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
       os << ch;
     return *this;
   }
@@ -97,10 +97,10 @@
   }
 
 private:
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
   void write_utf16(const char* str, std::streamsize n);
   void write_utf16_char(char ch);
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
 };
 
 } // namespace zorba

=== modified file 'src/zorbautils/string_util.cpp'
--- src/zorbautils/string_util.cpp	2011-06-14 17:26:33 +0000
+++ src/zorbautils/string_util.cpp	2011-10-28 02:34:26 +0000
@@ -24,16 +24,23 @@
 #include "diagnostics/xquery_diagnostics.h"
 
 using namespace std;
+#ifndef ZORBA_NO_ICU
 U_NAMESPACE_USE
+#endif /* ZORBA_NO_ICU */
 
 namespace zorba {
 namespace utf8 {
 
+///////////////////////////////////////////////////////////////////////////////
+
 size_t find( char const *s, size_t s_len, char const *ss, size_t ss_len,
-            XQPCollator const *collator ) {
+             XQPCollator const *collator ) {
+#ifndef ZORBA_NO_ICU
   if ( !collator || collator->doMemCmp()) {
+#endif /* ZORBA_NO_ICU */
     char const *const result = ::strstr( s, ss );
     return result ? result - s : zstring::npos;
+#ifndef ZORBA_NO_ICU
   }
 
   unicode::string u_s, u_ss;
@@ -54,28 +61,19 @@
     }
   }
   return zstring::npos;
+#endif /* ZORBA_NO_ICU */
 }
 
 
-size_t rfind(
-    char const *s,
-    size_t s_len,
-    char const *ss,
-    size_t ss_len,
-    XQPCollator const *collator ) 
-{
-  if ( ! collator || collator->doMemCmp())
-  {
+size_t rfind( char const *s, size_t s_len, char const *ss, size_t ss_len,
+              XQPCollator const *collator ) {
+#ifndef ZORBA_NO_ICU
+  if ( ! collator || collator->doMemCmp()) {
+#endif /* ZORBA_NO_ICU */
     zstring_b tmp;
     tmp.wrap_memory(const_cast<char*>(s), s_len);
-
-    size_t pos = tmp.rfind(ss, ss_len);
-
-    //if (pos == zstring::npos)
-    //  return -1;
-    //else
-    //  return pos;
-    return pos;
+    return tmp.rfind(ss, ss_len);
+#ifndef ZORBA_NO_ICU
   }
 
   unicode::string u_s, u_ss;
@@ -102,6 +100,7 @@
   }
 
   return zstring::npos;
+#endif /* ZORBA_NO_ICU */
 }
 
 bool match_part( char const *in, char const *pattern, char const *flags ) {
@@ -116,6 +115,8 @@
   return re.match_whole( in );
 }
 
+///////////////////////////////////////////////////////////////////////////////
+
 } // namespace utf8
 } // namespace zorba
 /* vim:set et sw=2 ts=2: */

=== modified file 'src/zorbautils/string_util.h'
--- src/zorbautils/string_util.h	2011-06-14 17:26:33 +0000
+++ src/zorbautils/string_util.h	2011-10-28 02:34:26 +0000
@@ -13,12 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #pragma once
 #ifndef ZORBA_UTILS_STRING_UTIL_H
 #define ZORBA_UTILS_STRING_UTIL_H
 
 #include <cstring>
 
+#include <zorba/config.h>
+
 #include "diagnostics/xquery_diagnostics.h"
 #include "zorbatypes/collation_manager.h"
 
@@ -145,9 +148,13 @@
                   char const *replacement, OutputStringType *out ) {
   unicode::regex re;
   re.compile( pattern, flags );
+#ifndef ZORBA_NO_ICU
   unicode::string u_out;
   return  re.replace_all( in, replacement, &u_out ) &&
           utf8::to_string( u_out.getBuffer(), u_out.length(), out );
+#else
+  return re.replace_all( in, replacement, out );
+#endif /* ZORBA_NO_ICU */
 }
 
 /**
@@ -175,9 +182,13 @@
                   OutputStringType *out ) {
   unicode::regex re;
   re.compile( pattern, flags );
+#ifndef ZORBA_NO_ICU
   unicode::string u_out;
   return  re.replace_all( in, replacement, &u_out ) &&
           utf8::to_string( u_out.getBuffer(), u_out.length(), out );
+#else
+  return re.replace_all( in, replacement, out );
+#endif /* ZORBA_NO_ICU */
 }
 
 /**
@@ -207,9 +218,13 @@
                   OutputStringType *out ) {
   unicode::regex re;
   re.compile( pattern, flags );
+#ifndef ZORBA_NO_ICU
   unicode::string u_out;
   return  re.replace_all( in, replacement, &u_out ) &&
           utf8::to_string( u_out.getBuffer(), u_out.length(), out );
+#else
+  return re.replace_all( in, replacement, out );
+#endif /* ZORBA_NO_ICU */
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -217,7 +232,6 @@
 } // namespace utf8
 } // namespace zorba
 #endif  /* ZORBA_UTILS_STRING_UTIL_H */
-
 /*
  * Local variables:
  * mode: c++

=== modified file 'test/commons/testdriver_comparator.cpp'
--- test/commons/testdriver_comparator.cpp	2011-09-12 23:22:24 +0000
+++ test/commons/testdriver_comparator.cpp	2011-10-28 02:34:26 +0000
@@ -280,6 +280,7 @@
   {
     // Wasn't a BOM; 'unread' it
     refStream.clear();
+    refStream.clear();
     refStream.seekg(0, std::ios_base::beg);
   }
 

=== added directory 'test/rbkt/ExpQueryResults/zorba/string/Regex'
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a1.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a1.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions";><fn:match>aa<fn:group nr="1">a</fn:group></fn:match></fn:analyze-string-result>
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a2.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a2.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a2.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions";><fn:match>aa<fn:group nr="1"><fn:group nr="2">a</fn:group></fn:group></fn:match></fn:analyze-string-result>
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a3.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a3.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a3.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions";><fn:match>aa<fn:group nr="1"><fn:group nr="2">a</fn:group></fn:group>c</fn:match></fn:analyze-string-result>
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a4.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a4.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a4.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions";><fn:match>a<fn:group nr="1"><fn:group nr="2"/><fn:group nr="3">c</fn:group></fn:group></fn:match></fn:analyze-string-result>
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a5.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a5.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a5.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions";><fn:match>aa<fn:group nr="1"><fn:group nr="2">a</fn:group><fn:group nr="3"/></fn:group>c</fn:match></fn:analyze-string-result>
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a6.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a6.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a6.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions";><fn:match>aa<fn:group nr="1"><fn:group nr="2">a</fn:group><fn:group nr="3"/></fn:group>c</fn:match></fn:analyze-string-result>
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a7.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a7.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a7.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions";><fn:match>aa<fn:group nr="1"><fn:group nr="2">a</fn:group><fn:group nr="3">a</fn:group></fn:group>c</fn:match></fn:analyze-string-result>
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a8.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a8.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a8.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions";><fn:non-match>aaaa</fn:non-match><fn:match><fn:group nr="1"></fn:group>c</fn:match></fn:analyze-string-result>
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a9.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a9.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a9.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions";><fn:non-match>aaaa</fn:non-match><fn:match><fn:group nr="1"></fn:group>c<fn:group nr="2"></fn:group></fn:match></fn:analyze-string-result>
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m1.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m1.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m10.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m10.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m10.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m11.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m11.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m11.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m12.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m12.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m12.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m13.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m13.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m13.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m14.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m14.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m14.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m15.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m15.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m15.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m16.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m16.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m16.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m17.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m17.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m17.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m18.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m18.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m18.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m19.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m19.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m19.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m2.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m2.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m2.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m20.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m20.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m20.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m21.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m21.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m21.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m22.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m22.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m22.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m23.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m23.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m23.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m24.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m24.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m24.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m25.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m25.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m25.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m26.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m26.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m26.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m27.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m27.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m27.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m28.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m28.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m28.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m29.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m29.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m29.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m3.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m3.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m3.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m30.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m30.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m30.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m31.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m31.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m31.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m32.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m32.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m32.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m33.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m33.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m33.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m34.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m34.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m34.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m35.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m35.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m35.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m36.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m36.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m36.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m37.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m37.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m37.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m38.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m38.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m38.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m39.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m39.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m39.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m4.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m4.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m4.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m40.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m40.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m40.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m41.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m41.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m41.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m42.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m42.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m42.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m43.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m43.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m43.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m44.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m44.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m44.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m45.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m45.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m45.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m46.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m46.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m46.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m47.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m47.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m47.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m48.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m48.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m48.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m49.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m49.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m49.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m5.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m5.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m5.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m50.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m50.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m50.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m51.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m51.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m51.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m52.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m52.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m52.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m53.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m53.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m53.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m6.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m6.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m6.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m7.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m7.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m7.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m8.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m8.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m8.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m9.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m9.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m9.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_prime1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_prime1.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_prime1.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true false
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r1.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r1.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+ac1ac1
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r10.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r10.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r10.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+b
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r11.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r11.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r11.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
++-+-+-0-1
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r2.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r2.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r2.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+1
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r3.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r3.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r3.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+11
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r4.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r4.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r4.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+a-aba-ab
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r5.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r5.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r5.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+acbaacba
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r6.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r6.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r6.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+acaabcab
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r9.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r9.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r9.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+11
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t1.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t1.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+ r c d r 
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t2.xml.res'
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t4.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t4.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t4.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+ 0 1
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t5.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t5.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t5.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+The cat sat on the mat
\ No newline at end of file

=== added file 'test/rbkt/ExpQueryResults/zorba/testdriver/bom_bug.xml.res'
--- test/rbkt/ExpQueryResults/zorba/testdriver/bom_bug.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/testdriver/bom_bug.xml.res	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+11
\ No newline at end of file

=== modified file 'test/rbkt/Queries/CMakeLists.txt'
--- test/rbkt/Queries/CMakeLists.txt	2011-10-13 12:46:50 +0000
+++ test/rbkt/Queries/CMakeLists.txt	2011-10-28 02:34:26 +0000
@@ -238,6 +238,8 @@
 #  EXPECTED_FAILURE (test/rbkt/zorba/file/dirname_basename ????need bugnum???)
 #ENDIF ()
 
+# test that must fail to pass, to check testdriver BOM bug that gives false positives
+EXPECTED_FAILURE (test/rbkt/zorba/testdriver/bom_bug 3381121)
 
 # --------------------------------------------------------------------------
 # the list of tests that are failing but can be accepted by the commit queue
@@ -287,6 +289,32 @@
 
 EXPECTED_FAILURE(test/rbkt/zorba/http-client/put/put3_binary_element 3391756)
 EXPECTED_FAILURE(test/rbkt/zorba/http-client/post/post3_binary_element 3391756)
+IF(NOT ZORBA_NO_ICU)
+  EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err10 3405597)
+  EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err12 3405597)
+  EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err15 3405597)
+  EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err16 3405597)
+  EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err25 3405597)
+  EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err7 3405597)
+  EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m11 3405597)
+ENDIF(NOT ZORBA_NO_ICU)
+
+IF(ZORBA_NO_ICU)
+  SET_TESTS_PROPERTIES(test/rbkt/zorba/string/CodepointToStringFunc/CodepointToStringFunc1
+                      test/rbkt/zorba/string/CodepointToStringFunc/UnicodeNormalization1
+                      test/rbkt/zorba/utf-8/utf8inDataDe
+                      test/rbkt/zorba/utf-8/utf8inDataFr
+                      test/rbkt/zorba/utf-8/utf8inDataNo
+                      test/rbkt/zorba/utf-8/utf8inDataRo
+                      test/rbkt/zorba/http-client/send-request/http2-read-svg
+                      test/rbkt/zorba/http-client/post/post2_element
+                      test/rbkt/zorba/http-client/post/post3_xml
+                      test/rbkt/zorba/http-client/put/put2_element
+                      test/rbkt/zorba/http-client/put/put3_xml
+                      test/rbkt/zorba/parsing_and_serializing/fn_serialize_04_xml_decl
+                      PROPERTIES WILL_FAIL TRUE)
+ENDIF(ZORBA_NO_ICU)
+
 
 EXPECTED_FAILURE(test/rbkt/zorba/reference/reference_5 868640)
 

=== added directory 'test/rbkt/Queries/zorba/string/Regex'
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a1.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a1.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaa", "(a)+")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a2.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a2.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a2.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaa", "((a))+")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a3.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a3.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a3.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaac", "((a))+?c")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a4.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a4.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a4.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("ac", "((a)|(c))+")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a5.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a5.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a5.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaac", "((a)|(c))+c")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a6.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a6.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a6.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaac", "((a)|(c))+c")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a7.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a7.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a7.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaaac", "((a)(a))+c")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a8.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a8.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a8.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaaac", "()c")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a9.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a9.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a9.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaaac", "()c($)")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err1.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err1.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err1.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err1.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err1.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "+")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err10.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err10.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err10.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err10.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err10.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err10.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\p{IsBasic-Latin}")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err11.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err11.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err11.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err11.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err11.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err11.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\p{IsBasicLatin2}")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err12.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err12.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err12.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err12.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err12.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err12.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\y")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err13.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err13.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err13.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err13.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err13.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err13.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\0")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err14.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err14.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err14.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err14.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err14.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err14.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "(1)\2")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err15.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err15.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err15.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err15.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err15.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err15.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "[a-[b] ]")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err16.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err16.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err16.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err16.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err16.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err16.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "[\s-e]")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err17.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err17.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err17.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err17.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err17.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err17.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "[e-\s]")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err18.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err18.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err18.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err18.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err18.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err18.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "[eb")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err19.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err19.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err19.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err19.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err19.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err19.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:backref to unended group:)
+
+fn:matches("a", "(a(b(c)\2))")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err2.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err2.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err2.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err2.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err2.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err2.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "}")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err20.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err20.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err20.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0001

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err20.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err20.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err20.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:unknown flag:)
+
+fn:matches("a", "a", "a")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err21.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err21.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err21.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0004

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err21.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err21.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err21.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:$ not followed by 0-9:)
+
+fn:replace("a", "a", "$a")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err22.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err22.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err22.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0004

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err22.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err22.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err22.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:\ outside constructs \\ or \$:)
+
+fn:replace("a", "a", "\a")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err23.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err23.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err23.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err23.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err23.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err23.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:group with ?: is not used in backreferencing:)
+
+fn:matches("a", "(a(?:b)\2)")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err24.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err24.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err24.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err24.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err24.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err24.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:{min,max} min is bigger:)
+
+fn:matches("a", "a{3,2}")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err25.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err25.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err25.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err25.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err25.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err25.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "a^")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err3.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err3.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err3.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err3.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err3.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err3.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "{")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err4.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err4.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err4.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err4.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err4.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err4.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "?")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err5.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err5.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err5.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err5.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err5.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err5.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "*")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err7.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err7.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err7.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err7.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err7.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err7.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "^^")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err8.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err8.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err8.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err8.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err8.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err8.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\p ")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err9.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err9.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err9.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err9.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err9.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err9.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\P{L ")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m1.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m1.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abracadabra", "bra")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m10.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m10.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m10.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ba", "a?b?")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m11.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m11.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m11.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ba", "[a-z-[ab]]")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m12.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m12.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m12.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaaab", "a*ab")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m13.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m13.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m13.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaaab", "a*?ab")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m14.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m14.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m14.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abc", "(a|ab)c")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m15.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m15.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m15.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("bbba", "((a)|(b))*\3")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m16.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m16.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m16.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaa", "^a*?$")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m17.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m17.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m17.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaabb", "a{1,3}ab")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m18.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m18.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m18.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+
+
+fn:matches("aaaa", "a{1,3}")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m19.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m19.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m19.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("baac", "(?:b)(a)\1c")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m2.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m2.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m2.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abracadabra", "^a.*a$")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m20.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m20.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m20.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaa", "(aaa|a){2,3}")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m21.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m21.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m21.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaa", "(aaa|a){2,3}?")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m22.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m22.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m22.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaac", "(aaa|a){2,3}?c")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m23.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m23.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m23.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaac", "(aaa|a){2,3}c")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m24.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m24.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m24.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaaab", "(a|b)*ab")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m25.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m25.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m25.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("t1t22t33", "(t.*){3}")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m26.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m26.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m26.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ac", "ab")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m27.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m27.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m27.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cat", "cat(aract|erpillar|) ")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m28.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m28.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m28.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cat", "c()a\1t")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m29.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m29.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m29.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cat", "cat(aract|erpillar|)")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m3.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m3.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m3.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abracadabra", "^bra")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m30.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m30.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m30.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cat", "c()a\1t ")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m31.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m31.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m31.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cat", "cat(aract||erpillar)")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m32.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m32.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m32.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "|")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m33.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m33.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m33.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "^a")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m34.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m34.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m34.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "^a$")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m35.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m35.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m35.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,4 @@
+fn:matches(
+"a
+b
+c", "^b", "m")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m36.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m36.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m36.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "b$|^a")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m37.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m37.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m37.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,4 @@
+fn:matches(
+"a
+b
+c", "e$|^c$", "m")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m38.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m38.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m38.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,4 @@
+fn:matches(
+"a
+b
+c", "e$|(^c$)+", "m")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m39.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m39.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m39.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "(^)a")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m4.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m4.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m4.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,6 @@
+let $poem := 
+<poem author="Wilhelm Busch"> Kaum hat dies
+            der Hahn gesehen, Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!! Tak, tak,
+            tak! - da kommen sie. </poem>
+return
+fn:matches($poem, "Kaum.*krahen")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m40.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m40.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m40.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "^+a")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m41.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m41.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m41.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "^?b")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m42.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m42.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m42.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "(c*)*")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m43.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m43.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m43.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "(c*)*?e")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m44.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m44.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m44.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "((c)*?)*?e")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m45.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m45.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m45.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "(c*){3,}e")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m46.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m46.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m46.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cabana", "(cab|caba)na")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m47.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m47.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m47.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cabana", "((a|c)(a|a)(a|b)|(a|c)(a|a)(a|b)(a|a))na")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m48.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m48.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m48.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abc", "^b")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m49.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m49.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m49.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abc", "b$")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m5.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m5.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m5.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,6 @@
+let $poem := 
+<poem author="Wilhelm Busch"> Kaum hat dies
+            der Hahn gesehen, Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!! Tak, tak,
+            tak! - da kommen sie. </poem>
+return
+fn:matches($poem, "Kaum.*krahen", "s")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m50.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m50.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m50.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,2 @@
+fn:matches("abc
+def", "b.*f", "s")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m51.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m51.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m51.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,2 @@
+fn:matches("abc
+def", "b.*f", "m")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m52.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m52.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m52.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("b", "[^B]", "i")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m53.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m53.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m53.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("bc d", "b c[ ]d", "x")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m6.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m6.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m6.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,6 @@
+let $poem := 
+<poem author="Wilhelm Busch"> Kaum hat dies der Hahn gesehen,
+        Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!! Tak, tak, tak! - da kommen sie. 
+</poem>
+return
+fn:matches($poem, "^ Kaum.*gesehen,$", "m")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m7.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m7.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m7.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,6 @@
+let $poem := 
+<poem author="Wilhelm Busch"> Kaum hat dies der Hahn gesehen,
+        Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!! Tak, tak, tak! - da kommen sie. 
+</poem>
+return
+fn:matches($poem, "^Kaum.*gesehen,$")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m8.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m8.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m8.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,7 @@
+let $poem := 
+<poem author="Wilhelm Busch"> Kaum hat dies der Hahn gesehen,
+        Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!! 
+        Tak, tak, tak! - da kommen sie. 
+</poem>
+return
+fn:matches($poem, "kiki", "i")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m9.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m9.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m9.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,7 @@
+let $poem := 
+<poem author="Wilhelm Busch"> Kaum hat dies der Hahn gesehen,
+        Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!! 
+        Tak, tak, tak! - da kommen sie. 
+</poem>
+return
+fn:matches($poem, "(tak.*){3}", "i")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_prime1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_prime1.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_prime1.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,17 @@
+
+declare function local:string-1-n($nr as xs:integer) as xs:string
+{
+  if($nr eq 0) then
+    ""
+  else 
+    concat("1", local:string-1-n($nr - 1))
+};
+
+declare function local:is-prime($nr as xs:integer) as xs:boolean
+{
+  let $str1 := local:string-1-n($nr)
+  return
+    fn:not(fn:matches($str1, "^(11+)\1+$"))
+};
+
+(local:is-prime(13), local:is-prime(24))

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r1.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r1.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "ab", "1")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r10.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r10.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r10.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("aba", "a", "")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r11.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r11.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r11.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("a-b-c-0-1", "\p{Ll}", "+")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r2.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r2.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r2.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("aaa", "a+", "1")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r3.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r3.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r3.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("aa", "a|aa", "1")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r4.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r4.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r4.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "c(ab)", "-$1")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r5.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r5.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r5.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "(a)(b)", "$2$1")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r6.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r6.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r6.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "(a)(b)(a)(c)", "$3$1$2$5$4")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r7_err.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_r7_err.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r7_err.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0003

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r7_err.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r7_err.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r7_err.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "(a)*(b)*(a)*(c)*", "$3$1$2$5$4")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r8_err.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_r8_err.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r8_err.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0004

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r8_err.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r8_err.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r8_err.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "(a)(b)(a)(c)", "$$3$1$2$5$4")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r9.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r9.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r9.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("aaaa", "(a|aa){1,2}", "1")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_t1.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t1.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:tokenize("abracadabra", "(ab)|(a)")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t2.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_t2.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t2.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:tokenize("", "(ab)|(a)")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t3_err.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_t3_err.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t3_err.spec	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0003

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t3_err.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_t3_err.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t3_err.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:tokenize("", "a*")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t4.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_t4.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t4.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,2 @@
+(:extract numbers:)
+fn:tokenize("x=0,y=1", "\P{Nd}+")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t5.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_t5.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t5.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:tokenize("The cat sat on the mat", "\s+")
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/testdriver/bom_bug.xq'
--- test/rbkt/Queries/zorba/testdriver/bom_bug.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/testdriver/bom_bug.xq	2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+1
\ No newline at end of file

=== modified file 'test/unit/CMakeLists.txt'
--- test/unit/CMakeLists.txt	2011-10-10 09:04:35 +0000
+++ test/unit/CMakeLists.txt	2011-10-28 02:34:26 +0000
@@ -64,7 +64,6 @@
   test_audit.cpp
   string_instantiate.cpp
   streamable_string.cpp
-  string_test.cpp
   unique_ptr.cpp
   main_sequential.cpp
   datetime.cpp
@@ -74,6 +73,10 @@
   staticcollectionmanager.cpp
 )
 
+IF(NOT ZORBA_NO_ICU)
+  LIST(APPEND UNIT_TESTS_SRCS string_test.cpp)
+ENDIF(NOT ZORBA_NO_ICU)
+
 IF (NOT ZORBA_NO_FULL_TEXT)
   LIST(APPEND UNIT_TESTS_SRCS
     stemmer.cpp
@@ -88,11 +91,6 @@
   LIST(APPEND SPEC_FILES "debug_iter_serialization.cpp")
 ENDIF(ZORBA_WITH_DEBUGGER)
 
-IF(WIN32)
-  # SF#3191791
-  LIST(REMOVE_ITEM UNIT_TESTS_SRCS "string_test.cpp")
-ENDIF(WIN32)
-
 CREATE_TEST_SOURCELIST(UnitTests
   UnitTests.cpp
   ${UNIT_TESTS_SRCS}

=== modified file 'test/unit/string_test.cpp'
--- test/unit/string_test.cpp	2011-08-24 12:48:09 +0000
+++ test/unit/string_test.cpp	2011-10-28 02:34:26 +0000
@@ -568,6 +568,7 @@
   ASSERT_TRUE( t == s );
 }
 
+#ifndef ZORBA_NO_ICU
 template<class StringType>
 static void test_to_string_from_wchar_t() {
   wchar_t const w[] = L"hello";
@@ -577,6 +578,7 @@
   for ( string::size_type i = 0; i < s.length(); ++i )
     ASSERT_TRUE( s[i] == w[i] );
 }
+#endif /* ZORBA_NO_ICU */
 
 template<class StringType>
 static void test_to_upper() {
@@ -604,6 +606,7 @@
   }
 }
 
+#ifndef ZORBA_NO_ICU
 static void test_to_wchar_t() {
   string const s = "hello";
   wchar_t *w;
@@ -615,6 +618,7 @@
     ASSERT_TRUE( w[i] == s[i] );
   delete[] w;
 }
+#endif /* ZORBA_NO_ICU */
 
 static void test_trim_start() {
   char const *s;
@@ -867,16 +871,20 @@
   test_to_string_from_utf8<zstring>();
   test_to_string_from_utf8<zstring_p>();
 
+#ifndef ZORBA_NO_ICU
   test_to_string_from_wchar_t<string>();
   test_to_string_from_wchar_t<zstring>();
   test_to_string_from_wchar_t<zstring_p>();
+#endif /* ZORBA_NO_ICU */
 
   test_to_upper<string>();
   test_to_upper<zstring>();
   test_to_upper<zstring_p>();
   test_to_upper<String>();
 
+#ifndef ZORBA_NO_ICU
   test_to_wchar_t();
+#endif /* ZORBA_NO_ICU */
 
   test_trim_start();
   test_trim_end();

=== modified file 'test/update/CMakeLists.txt'
--- test/update/CMakeLists.txt	2011-09-27 15:01:33 +0000
+++ test/update/CMakeLists.txt	2011-10-28 02:34:26 +0000
@@ -67,6 +67,15 @@
                 
 ENDFOREACH(TESTFILE)
 
+IF(ZORBA_NO_FULL_TEXT)
+  SET_TESTS_PROPERTIES(
+                      test/update/zorba/store/sc1
+                      test/update/zorba/store/sc2_ex
+                      PROPERTIES WILL_FAIL TRUE)
+ENDIF(ZORBA_NO_FULL_TEXT)
+
+
+
 IF (FOUND_XQUTS AND NOT ZORBA_TEST_W3C_TO_SUBMIT_RESULTS)
   # We "don't care" that these fail
   EXPECTED_FAILURE(test/update/w3c_update_testsuite/XQuery/Put/fn-put-005 3354993)