zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #01479
[Merge] lp:~zorba-coders/zorba/no_unicode into lp:zorba
Paul J. Lucas has proposed merging lp:~zorba-coders/zorba/no_unicode into lp:zorba.
Requested reviews:
Markos Zaharioudakis (markos-za)
Matthias Brantner (matthias-brantner)
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/no_unicode/+merge/80644
"No Unicode" is now "No ICU."
--
https://code.launchpad.net/~zorba-coders/zorba/no_unicode/+merge/80644
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'CMakeConfiguration.txt'
--- CMakeConfiguration.txt 2011-09-16 19:55:59 +0000
+++ CMakeConfiguration.txt 2011-10-28 02:34:26 +0000
@@ -139,14 +139,14 @@
SET (ZORBA_DEBUG_STRING ${ZORBA_DEBUG_STRING} CACHE BOOL "debug strings")
MESSAGE (STATUS "ZORBA_DEBUG_STRING: " ${ZORBA_DEBUG_STRING})
-SET(ZORBA_NO_UNICODE OFF CACHE BOOL "disable ICU")
-MESSAGE(STATUS "ZORBA_NO_UNICODE: " ${ZORBA_NO_UNICODE})
+SET(ZORBA_NO_ICU OFF CACHE BOOL "disable ICU")
+MESSAGE(STATUS "ZORBA_NO_ICU: " ${ZORBA_NO_ICU})
-IF (ZORBA_NO_UNICODE)
+IF (ZORBA_NO_ICU)
SET (no_full_text ON)
-ELSE (ZORBA_NO_UNICODE)
+ELSE (ZORBA_NO_ICU)
SET (no_full_text OFF)
-ENDIF (ZORBA_NO_UNICODE)
+ENDIF (ZORBA_NO_ICU)
SET (ZORBA_NO_FULL_TEXT ${no_full_text} CACHE BOOL "disable XQuery Full-Text support")
MESSAGE(STATUS "ZORBA_NO_FULL_TEXT: " ${ZORBA_NO_FULL_TEXT})
=== modified file 'CMakeLists.txt'
--- CMakeLists.txt 2011-10-09 13:56:39 +0000
+++ CMakeLists.txt 2011-10-28 02:34:26 +0000
@@ -123,10 +123,14 @@
CHECK_TYPE_SIZE("int64_t" ZORBA_HAVE_INT64_T)
CHECK_CXX_SOURCE_COMPILES ("#include <type_traits>\nint main() { std::enable_if<true,int> x; }" ZORBA_CXX_ENABLE_IF)
-CHECK_CXX_SOURCE_COMPILES ("int main() { int *p = nullptr; }" ZORBA_CXX_NULLPTR)
-CHECK_CXX_SOURCE_COMPILES ("int main() { static_assert(1,\"\"); }" ZORBA_CXX_STATIC_ASSERT)
+SET(CMAKE_EXTRA_INCLUDE_FILES wchar.h)
+CHECK_TYPE_SIZE("wchar_t" ZORBA_SIZEOF_WCHAR_T)
+SET(CMAKE_EXTRA_INCLUDE_FILES)
CHECK_CXX_SOURCE_COMPILES ("#include <memory>\nint main() { std::unique_ptr<int> p; }" ZORBA_CXX_UNIQUE_PTR)
+CHECK_CXX_SOURCE_COMPILES("int main() { int *p = nullptr; }" ZORBA_CXX_NULLPTR)
+CHECK_CXX_SOURCE_COMPILES("int main() { static_assert(1,\"\"); }" ZORBA_CXX_STATIC_ASSERT)
+
################################################################################
# Various cmake macros
=== modified file 'ChangeLog'
--- ChangeLog 2011-10-20 23:05:55 +0000
+++ ChangeLog 2011-10-28 02:34:26 +0000
@@ -54,6 +54,7 @@
* Fixed bug #872796 (validate-in-place can interfere with other update primitives)
* Fixed bug #872799 (validate-in-place can set incorrect types)
* Fixed bug #855715 (Invalid escaped characters in regex not caught)
+ * Fixed bug #868325 (fn:analyze-string fails with some recursive subgroups)
version 2.0.1
=== modified file 'KNOWN_ISSUES.txt'
--- KNOWN_ISSUES.txt 2011-10-07 08:28:43 +0000
+++ KNOWN_ISSUES.txt 2011-10-28 02:34:26 +0000
@@ -37,7 +37,7 @@
* The serializer currently doesn't implement character maps as specified
(http://www.w3.org/TR/xslt-xquery-serialization/#character-maps)
-* In the 2.0 release, setting the CMake variables ZORBA_NO_UNICODE to
+* In the 2.0 release, setting the CMake variables ZORBA_NO_ICU to
ON is not supported.
* The PHP language binding is not supported on Mac OS X. For details,
=== modified file 'doc/cxx/examples/context.cpp'
--- doc/cxx/examples/context.cpp 2011-07-22 08:12:31 +0000
+++ doc/cxx/examples/context.cpp 2011-10-28 02:34:26 +0000
@@ -149,7 +149,11 @@
outStream2 << lQuery << std::endl;
std::cout << outStream2.str() << std::endl;
+#ifndef ZORBA_NO_ICU
if (outStream2.str() != "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\nBook 1.1\n")
+#else
+ if (outStream2.str() != "<?xml version=\"1.0\"?>\nBook 1.1\n")
+#endif /* ZORBA_NO_ICU */
{
std::cerr << "Test 4 failed with a wrong result : " << std::endl
<< outStream2.str() << std::endl;
=== modified file 'include/zorba/config.h.cmake'
--- include/zorba/config.h.cmake 2011-08-22 15:14:14 +0000
+++ include/zorba/config.h.cmake 2011-10-28 02:34:26 +0000
@@ -93,6 +93,8 @@
typedef __int64 int64_t;
#endif /* ZORBA_HAVE_INT64_T */
+#cmakedefine ZORBA_SIZEOF_WCHAR_T @ZORBA_SIZEOF_WCHAR_T@
+
// Compiler
#cmakedefine CLANG
#cmakedefine MSVC
@@ -142,7 +144,7 @@
// Zorba features
#cmakedefine ZORBA_NO_FULL_TEXT
-#cmakedefine ZORBA_NO_UNICODE
+#cmakedefine ZORBA_NO_ICU
#cmakedefine ZORBA_NO_XMLSCHEMA
#cmakedefine ZORBA_NUMERIC_OPTIMIZATION
#cmakedefine ZORBA_VERIFY_PEER_SSL_CERTIFICATE
=== modified file 'src/api/collectionimpl.cpp'
--- src/api/collectionimpl.cpp 2011-09-15 13:11:51 +0000
+++ src/api/collectionimpl.cpp 2011-10-28 02:34:26 +0000
@@ -45,6 +45,7 @@
#include "context/static_context.h"
+#include "types/typeimpl.h"
#include "types/typeops.h"
#include "compiler/xqddf/collection_decl.h"
=== modified file 'src/api/serialization/serializer.cpp'
--- src/api/serialization/serializer.cpp 2011-06-20 15:38:42 +0000
+++ src/api/serialization/serializer.cpp 2011-10-28 02:34:26 +0000
@@ -180,7 +180,6 @@
for (; chars < chars_end; chars++ )
{
-#ifndef ZORBA_NO_UNICODE
// the input string is UTF-8
int char_length = utf8::char_length(*chars);
if (char_length == 0)
@@ -217,7 +216,6 @@
continue;
}
-#endif//ZORBA_NO_UNICODE
// raise an error iff (1) the serialization format is XML 1.0 and (2) the given character is an invalid XML 1.0 character
if (ser && ser->method == PARAMETER_VALUE_XML &&
@@ -332,14 +330,12 @@
{
tr << (char)0xEF << (char)0xBB << (char)0xBF;
}
-#ifndef ZORBA_NO_UNICODE
else if (ser->encoding == PARAMETER_VALUE_UTF_16)
{
// Little-endian
tr.verbatim((char)0xFF);
tr.verbatim((char)0xFE);
}
-#endif
}
}
@@ -792,13 +788,17 @@
emitter::emit_declaration();
if (ser->omit_xml_declaration == PARAMETER_VALUE_NO) {
- tr << "<?xml version=\"" << ser->version << "\" encoding=\"";
- if (ser->encoding == PARAMETER_VALUE_UTF_8) {
- tr << "UTF-8";
-#ifndef ZORBA_NO_UNICODE
- } else if (ser->encoding == PARAMETER_VALUE_UTF_16) {
- tr << "UTF-16";
-#endif
+ tr << "<?xml version=\"" << ser->version;
+ switch (ser->encoding) {
+ case PARAMETER_VALUE_UTF_8:
+ case PARAMETER_VALUE_UTF_16:
+ tr << "\" encoding=\"";
+ switch (ser->encoding) {
+ case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
+ case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
+ default : ZORBA_ASSERT(false);
+ }
+ break;
}
tr << "\"";
@@ -1104,14 +1104,18 @@
}
tr << "<meta http-equiv=\"content-type\" content=\""
- << ser->media_type << "; charset=";
-
- if (ser->encoding == PARAMETER_VALUE_UTF_8)
- tr << "UTF-8";
-#ifndef ZORBA_NO_UNICODE
- else if (ser->encoding == PARAMETER_VALUE_UTF_16)
- tr << "UTF-16";
-#endif
+ << ser->media_type;
+ switch (ser->encoding) {
+ case PARAMETER_VALUE_UTF_8:
+ case PARAMETER_VALUE_UTF_16:
+ tr << "\" charset=\"";
+ switch (ser->encoding) {
+ case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
+ case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
+ default : ZORBA_ASSERT(false);
+ }
+ break;
+ }
tr << "\"";
// closed_parent_tag = 1;
}
@@ -1301,14 +1305,18 @@
}
tr << "<meta http-equiv=\"content-type\" content=\""
- << ser->media_type << "; charset=";
-
- if (ser->encoding == PARAMETER_VALUE_UTF_8)
- tr << "UTF-8";
-#ifndef ZORBA_NO_UNICODE
- else if (ser->encoding == PARAMETER_VALUE_UTF_16)
- tr << "UTF-16";
-#endif
+ << ser->media_type;
+ switch (ser->encoding) {
+ case PARAMETER_VALUE_UTF_8:
+ case PARAMETER_VALUE_UTF_16:
+ tr << "\" charset=\"";
+ switch (ser->encoding) {
+ case PARAMETER_VALUE_UTF_8 : tr << "UTF-8" ; break;
+ case PARAMETER_VALUE_UTF_16: tr << "UTF-16"; break;
+ default : ZORBA_ASSERT(false);
+ }
+ break;
+ }
tr << "\"/";
//closed_parent_tag = 1;
}
@@ -1999,10 +2007,8 @@
{
if (!strcmp(aValue, "UTF-8"))
encoding = PARAMETER_VALUE_UTF_8;
-#ifndef ZORBA_NO_UNICODE
else if (!strcmp(aValue, "UTF-16"))
encoding = PARAMETER_VALUE_UTF_16;
-#endif
else
throw XQUERY_EXCEPTION(
err::SEPM0016, ERROR_PARAMS( aValue, aName, ZED( GoodValuesAreUTF8 ) )
@@ -2103,16 +2109,13 @@
{
tr = new transcoder(os, false);
}
-#ifndef ZORBA_NO_UNICODE
else if (encoding == PARAMETER_VALUE_UTF_16)
{
tr = new transcoder(os, true);
}
-#endif
else
{
- ZORBA_ASSERT(0);
- return false;
+ ZORBA_ASSERT(false);
}
if (method == PARAMETER_VALUE_XML)
=== modified file 'src/api/serialization/serializer.h'
--- src/api/serialization/serializer.h 2011-06-14 17:26:33 +0000
+++ src/api/serialization/serializer.h 2011-10-28 02:34:26 +0000
@@ -70,10 +70,8 @@
PARAMETER_VALUE_TEXT,
PARAMETER_VALUE_BINARY,
- PARAMETER_VALUE_UTF_8
-#ifndef ZORBA_NO_UNICODE
- ,PARAMETER_VALUE_UTF_16
-#endif
+ PARAMETER_VALUE_UTF_8,
+ PARAMETER_VALUE_UTF_16
} PARAMETER_VALUE_TYPE;
protected:
=== modified file 'src/diagnostics/diagnostic_en.xml'
--- src/diagnostics/diagnostic_en.xml 2011-10-26 21:32:57 +0000
+++ src/diagnostics/diagnostic_en.xml 2011-10-28 02:34:26 +0000
@@ -3025,85 +3025,167 @@
<value>item type is not a subtype of "$3"</value>
</entry>
- <entry key="U_REGEX_BAD_ESCAPE_SEQUENCE" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_BAD_ESCAPE_SEQUENCE" if="!defined(ZORBA_NO_ICU)">
<value>unrecognized backslash escape sequence</value>
</entry>
- <entry key="U_REGEX_BAD_INTERVAL" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_BAD_INTERVAL" if="!defined(ZORBA_NO_ICU)">
<value>error in {min,max} interval</value>
</entry>
- <entry key="U_REGEX_INTERNAL_ERROR" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_INTERNAL_ERROR" if="!defined(ZORBA_NO_ICU)">
<value>an internal ICU error (bug) was detected</value>
</entry>
- <entry key="U_REGEX_INVALID_BACK_REF" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_INVALID_BACK_REF" if="!defined(ZORBA_NO_ICU)">
<value>backreference to a non-existent capture group</value>
</entry>
- <entry key="U_REGEX_INVALID_FLAG" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_INVALID_FLAG" if="!defined(ZORBA_NO_ICU)">
<value>invalid value for match mode flags</value>
</entry>
- <entry key="U_REGEX_INVALID_RANGE" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_INVALID_RANGE" if="!defined(ZORBA_NO_ICU)">
<value>in character range [x-y], x is greater than y</value>
</entry>
- <entry key="U_REGEX_INVALID_STATE" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_INVALID_STATE" if="!defined(ZORBA_NO_ICU)">
<value>RegexMatcher in invalid state for requested operation</value>
</entry>
- <entry key="U_REGEX_LOOK_BEHIND_LIMIT" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_LOOK_BEHIND_LIMIT" if="!defined(ZORBA_NO_ICU)">
<value>look-behind pattern matches must have a bounded maximum length</value>
</entry>
- <entry key="U_REGEX_MAX_LT_MIN" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_MAX_LT_MIN" if="!defined(ZORBA_NO_ICU)">
<value>in {min,max}, max is less than min</value>
</entry>
- <entry key="U_REGEX_MISMATCHED_PAREN" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_MISMATCHED_PAREN" if="!defined(ZORBA_NO_ICU)">
<value>incorrectly nested parentheses</value>
</entry>
- <entry key="U_REGEX_MISSING_CLOSE_BRACKET" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_MISSING_CLOSE_BRACKET" if="!defined(ZORBA_NO_ICU)">
<value>missing ']'</value>
</entry>
- <entry key="U_REGEX_NUMBER_TOO_BIG" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_NUMBER_TOO_BIG" if="!defined(ZORBA_NO_ICU)">
<value>decimal number is too large</value>
</entry>
- <entry key="U_REGEX_OCTAL_TOO_BIG" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_OCTAL_TOO_BIG" if="!defined(ZORBA_NO_ICU)">
<value>octal character constants must be <= 0377</value>
</entry>
- <entry key="U_REGEX_PROPERTY_SYNTAX" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_PROPERTY_SYNTAX" if="!defined(ZORBA_NO_ICU)">
<value>incorrect Unicode property</value>
</entry>
- <entry key="U_REGEX_RULE_SYNTAX" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_RULE_SYNTAX" if="!defined(ZORBA_NO_ICU)">
<value>syntax error</value>
</entry>
- <entry key="U_REGEX_SET_CONTAINS_STRING" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_SET_CONTAINS_STRING" if="!defined(ZORBA_NO_ICU)">
<value>can not have UnicodeSets containing strings</value>
</entry>
- <entry key="U_REGEX_STACK_OVERFLOW" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_STACK_OVERFLOW" if="!defined(ZORBA_NO_ICU)">
<value>backtrack stack overflow</value>
</entry>
- <entry key="U_REGEX_STOPPED_BY_CALLER" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_STOPPED_BY_CALLER" if="!defined(ZORBA_NO_ICU)">
<value>matching operation aborted by user callback fn</value>
</entry>
- <entry key="U_REGEX_TIME_OUT" if="!defined(ZORBA_NO_UNICODE)">
+ <entry key="U_REGEX_TIME_OUT" if="!defined(ZORBA_NO_ICU)">
<value>maximum allowed match time exceeded</value>
</entry>
- <entry key="U_REGEX_UNIMPLEMENTED" if="!defined(ZORBA_NO_UNICODE)">
- <value>use of regular expression feature that is not yet implemented</value>
- </entry>
+ <entry key="U_REGEX_UNIMPLEMENTED" if="!defined(ZORBA_NO_ICU)">
+ <value>use of regular expression feature that is not yet implemented</value>
+ </entry>
+
+ <!-- Regex Ascii error messages-->
+ <entry key="REGEX_UNIMPLEMENTED" if="defined(ZORBA_NO_ICU)">
+ <value>use of regular expression feature that is not yet implemented</value>
+ </entry>
+
+ <entry key="REGEX_MISMATCHED_PAREN" if="defined(ZORBA_NO_ICU)">
+ <value>incorrectly nested parentheses</value>
+ </entry>
+
+ <entry key="REGEX_BROKEN_P_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+ <value>broken \\p construct</value>
+ </entry>
+
+ <entry key="REGEX_UNKNOWN_PL_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+ <value>unknown \\p{L?} category; supported categories: L, Lu, Ll, Lt, Lm, Lo</value>
+ </entry>
+
+ <entry key="REGEX_UNKNOWN_PM_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+ <value>unknown \\p{M?} category; supported categories: M, Mn, Mc, Me</value>
+ </entry>
+
+ <entry key="REGEX_UNKNOWN_PN_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+ <value>unknown \\p{N?} category; supported categories: N, Nd, Nl, No</value>
+ </entry>
+
+ <entry key="REGEX_UNKNOWN_PP_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+ <value>unknown \\p{P?} category; supported categories: P, Pc, Pd, Ps, Pe, Pi, Pf, Po</value>
+ </entry>
+
+ <entry key="REGEX_UNKNOWN_PZ_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+ <value>unknown \\p{Z?} category; supported categories: Z, Zs, Zl, Zp</value>
+ </entry>
+
+ <entry key="REGEX_UNKNOWN_PS_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+ <value>unknown \\p{S?} category; supported categories: S, Sm, Sc, Sk, So</value>
+ </entry>
+
+ <entry key="REGEX_UNKNOWN_PC_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+ <value>unknown \\p{C?} category; supported categories: C, Cc, Cf, Co, Cn(for not assigned)</value>
+ </entry>
+
+ <entry key="REGEX_BROKEN_PIs_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+ <value>broken \\p{Is} construct; valid characters are [a-zA-Z0-9-]</value>
+ </entry>
+
+ <entry key="REGEX_UNKNOWN_PIs_CONSTRUCT" if="defined(ZORBA_NO_ICU)">
+ <value>unknown \\p{Is} category block; see supported block escapes here: http://www.w3.org/TR/xmlschema-2/#charcter-classes</value>
+ </entry>
+
+ <entry key="REGEX_UNKNOWN_ESC_CHAR" if="defined(ZORBA_NO_ICU)">
+ <value>unknown \\? escape char; supported escapes are: \\[nrt\\|.?*+(){}[]-^$] for char escapes, \\[pP] for categories and \\[sSiIcCdDwW] for multichar groups</value>
+ </entry>
+
+ <entry key="REGEX_INVALID_BACK_REF" if="defined(ZORBA_NO_ICU)">
+ <value>\\$3 backreference to a non-existent capture group ($4 groups so far)</value>
+ </entry>
+
+ <entry key="REGEX_INVALID_ATOM_CHAR" if="defined(ZORBA_NO_ICU)">
+ <value>$3 - invalid character for at atom; forbidden characters are: [{}?*+|^]</value>
+ </entry>
+
+ <entry key="REGEX_INVALID_SUBCLASS" if="defined(ZORBA_NO_ICU)">
+ <value>malformed class subtraction</value>
+ </entry>
+
+ <entry key="REGEX_INVALID_USE_OF_SUBCLASS" if="defined(ZORBA_NO_ICU)">
+ <value>improper use of class subtraction: it must be the last construct in a class group [xxx-[yyy]]</value>
+ </entry>
+
+ <entry key="REGEX_MULTICHAR_IN_CHAR_RANGE" if="defined(ZORBA_NO_ICU)">
+ <value>multichars or char categories cannot be part of a char range</value>
+ </entry>
+
+ <entry key="REGEX_MISSING_CLOSE_BRACKET" if="defined(ZORBA_NO_ICU)">
+ <value>missing close bracket in char group</value>
+ </entry>
+
+ <entry key="REGEX_MAX_LT_MIN" if="defined(ZORBA_NO_ICU)">
+ <value>in {min,max}, max is less than min</value>
+ </entry>
+
<entry key="UnaryArithOp">
<value>unary arithmetic operator</value>
=== modified file 'src/diagnostics/dict_XX_cpp.xq'
--- src/diagnostics/dict_XX_cpp.xq 2011-08-05 02:21:55 +0000
+++ src/diagnostics/dict_XX_cpp.xq 2011-10-28 02:34:26 +0000
@@ -64,6 +64,7 @@
return string-join(
( util:copyright(),
'#include "stdafx.h"',
+ '#include "zorba/config.h"',
'#include "diagnostics/dict_impl.h"',
'',
'namespace zorba {',
=== modified file 'src/diagnostics/pregenerated/dict_en.cpp'
--- src/diagnostics/pregenerated/dict_en.cpp 2011-10-26 21:32:57 +0000
+++ src/diagnostics/pregenerated/dict_en.cpp 2011-10-28 02:34:26 +0000
@@ -20,6 +20,7 @@
*/
#include "stdafx.h"
+#include "zorba/config.h"
#include "diagnostics/dict_impl.h"
namespace zorba {
@@ -560,6 +561,66 @@
{ "~ParserNoCreateTree", "XML tree creation failed" },
{ "~PromotionImpossible", "promotion not possible" },
{ "~QuotedColon_23", "\"$2\": $3" },
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_BROKEN_PIs_CONSTRUCT", "broken \\p{Is} construct; valid characters are [a-zA-Z0-9-]" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_BROKEN_P_CONSTRUCT", "broken \\p construct" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_INVALID_ATOM_CHAR", "$3 - invalid character for at atom; forbidden characters are: [{}?*+|^]" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_INVALID_BACK_REF", "\\$3 backreference to a non-existent capture group ($4 groups so far)" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_INVALID_SUBCLASS", "malformed class subtraction" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_INVALID_USE_OF_SUBCLASS", "improper use of class subtraction: it must be the last construct in a class group [xxx-[yyy]]" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_MISSING_CLOSE_BRACKET", "missing close bracket in char group" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_MULTICHAR_IN_CHAR_RANGE", "multichars or char categories cannot be part of a char range" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_UNKNOWN_ESC_CHAR", "unknown \\? escape char; supported escapes are: \\[nrt\\|.?*+(){}[]-^$] for char escapes, \\[pP] for categories and \\[sSiIcCdDwW] for multichar groups" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_UNKNOWN_PC_CONSTRUCT", "unknown \\p{C?} category; supported categories: C, Cc, Cf, Co, Cn(for not assigned)" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_UNKNOWN_PIs_CONSTRUCT", "unknown \\p{Is} category block; see supported block escapes here: http://www.w3.org/TR/xmlschema-2/#charcter-classes" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_UNKNOWN_PL_CONSTRUCT", "unknown \\p{L?} category; supported categories: L, Lu, Ll, Lt, Lm, Lo" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_UNKNOWN_PM_CONSTRUCT", "unknown \\p{M?} category; supported categories: M, Mn, Mc, Me" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_UNKNOWN_PN_CONSTRUCT", "unknown \\p{N?} category; supported categories: N, Nd, Nl, No" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_UNKNOWN_PP_CONSTRUCT", "unknown \\p{P?} category; supported categories: P, Pc, Pd, Ps, Pe, Pi, Pf, Po" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_UNKNOWN_PS_CONSTRUCT", "unknown \\p{S?} category; supported categories: S, Sm, Sc, Sk, So" },
+#endif
+#if defined(ZORBA_NO_ICU)
+ { "~REGEX_UNKNOWN_PZ_CONSTRUCT", "unknown \\p{Z?} category; supported categories: Z, Zs, Zl, Zp" },
+#endif
{ "~SEPM0009_Not10", "the version parameter has a value other than \"1.0\" and the doctype-system parameter is specified" },
{ "~SEPM0009_NotOmit", "the standalone attribute has a value other than \"omit\"" },
{ "~SchemaAttributeName", "schema-attribute name" },
@@ -583,64 +644,64 @@
{ "~TwoDecimalFormatsSameName_2", "\"$2\": two decimal formats with this name" },
{ "~TwoDefaultDecimalFormats", "two default decimal formats" },
{ "~TypeIsNotSubtype", "item type is not a subtype of \"$3\"" },
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_BAD_ESCAPE_SEQUENCE", "unrecognized backslash escape sequence" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_BAD_INTERVAL", "error in {min,max} interval" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_INTERNAL_ERROR", "an internal ICU error (bug) was detected" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_INVALID_BACK_REF", "backreference to a non-existent capture group" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_INVALID_FLAG", "invalid value for match mode flags" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_INVALID_RANGE", "in character range [x-y], x is greater than y" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_INVALID_STATE", "RegexMatcher in invalid state for requested operation" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_LOOK_BEHIND_LIMIT", "look-behind pattern matches must have a bounded maximum length" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_MAX_LT_MIN", "in {min,max}, max is less than min" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_MISMATCHED_PAREN", "incorrectly nested parentheses" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_MISSING_CLOSE_BRACKET", "missing ']'" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_NUMBER_TOO_BIG", "decimal number is too large" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_OCTAL_TOO_BIG", "octal character constants must be <= 0377" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_PROPERTY_SYNTAX", "incorrect Unicode property" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_RULE_SYNTAX", "syntax error" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_SET_CONTAINS_STRING", "can not have UnicodeSets containing strings" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_STACK_OVERFLOW", "backtrack stack overflow" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_STOPPED_BY_CALLER", "matching operation aborted by user callback fn" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_TIME_OUT", "maximum allowed match time exceeded" },
#endif
-#if !defined(ZORBA_NO_UNICODE)
+#if !defined(ZORBA_NO_ICU)
{ "~U_REGEX_UNIMPLEMENTED", "use of regular expression feature that is not yet implemented" },
#endif
{ "~UnaryArithOp", "unary arithmetic operator" },
=== modified file 'src/precompiled/stdafx.h'
--- src/precompiled/stdafx.h 2011-09-02 19:58:59 +0000
+++ src/precompiled/stdafx.h 2011-10-28 02:34:26 +0000
@@ -333,7 +333,6 @@
//#include "zorbatypes/floatimpl.h"
#include "zorbatypes/ft_token.h"
//#include "zorbatypes/integer.h"
- #include "zorbatypes/libicu.h"
#include "zorbatypes/m_apm.h"
//#include "zorbatypes/rchandle.h"
#include "zorbatypes/rclock.h"
=== modified file 'src/runtime/full_text/CMakeLists.txt'
--- src/runtime/full_text/CMakeLists.txt 2011-08-31 13:17:59 +0000
+++ src/runtime/full_text/CMakeLists.txt 2011-10-28 02:34:26 +0000
@@ -42,11 +42,11 @@
default_tokenizer.cpp
)
-IF (ZORBA_NO_UNICODE)
+IF (ZORBA_NO_ICU)
LIST(APPEND FULLTEXT_SRCS latin_tokenizer.cpp)
-ELSE (ZORBA_NO_UNICODE)
+ELSE (ZORBA_NO_ICU)
LIST(APPEND FULLTEXT_SRCS icu_tokenizer.cpp)
-ENDIF (ZORBA_NO_UNICODE)
+ENDIF (ZORBA_NO_ICU)
ADD_SRC_SUBFOLDER(FULLTEXT_SRCS stemmer LIBSTEMMER_SRCS)
=== modified file 'src/runtime/full_text/default_tokenizer.cpp'
--- src/runtime/full_text/default_tokenizer.cpp 2011-08-31 02:53:07 +0000
+++ src/runtime/full_text/default_tokenizer.cpp 2011-10-28 02:34:26 +0000
@@ -19,22 +19,22 @@
#include <zorba/config.h>
#include "default_tokenizer.h"
-#ifdef ZORBA_NO_UNICODE
+#ifdef ZORBA_NO_ICU
# include "latin_tokenizer.h"
#else
# include "icu_tokenizer.h"
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
namespace zorba {
///////////////////////////////////////////////////////////////////////////////
TokenizerProvider const& default_tokenizer_provider() {
-#ifdef ZORBA_NO_UNICODE
+#ifdef ZORBA_NO_ICU
static LatinTokenizerProvider const instance;
#else
static ICU_TokenizerProvider const instance;
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
return instance;
};
=== modified file 'src/runtime/numerics/format_integer_impl.cpp'
--- src/runtime/numerics/format_integer_impl.cpp 2011-07-07 12:47:14 +0000
+++ src/runtime/numerics/format_integer_impl.cpp 2011-10-28 02:34:26 +0000
@@ -881,7 +881,7 @@
utf8_result += (*valueit);
}
else
- utf8_result += (0x2080 + *valueit - '0');
+ utf8_result += (unicode::code_point)(0x2080 + *valueit - '0');
}
}
else if((c0 == 0x2460) || //CIRCLED DIGIT ONE (1-20)
=== modified file 'src/runtime/numerics/numerics_impl.cpp'
--- src/runtime/numerics/numerics_impl.cpp 2011-07-10 14:55:46 +0000
+++ src/runtime/numerics/numerics_impl.cpp 2011-10-28 02:34:26 +0000
@@ -490,7 +490,7 @@
minus( "-" )
{
utf8_string<zstring> u_per_mille( per_mille );
- u_per_mille = 0x2030;
+ u_per_mille = (unicode::code_point)0x2030;
}
void readFormat(const DecimalFormat_t& df_t)
=== modified file 'src/runtime/strings/strings_impl.cpp'
--- src/runtime/strings/strings_impl.cpp 2011-08-10 18:58:11 +0000
+++ src/runtime/strings/strings_impl.cpp 2011-10-28 02:34:26 +0000
@@ -607,7 +607,6 @@
zstring normForm;
zstring resStr;
unicode::normalization::type normType;
- bool success;
PlanIteratorState* state;
DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
@@ -657,10 +656,9 @@
}
item0->getStringValue2(resStr);
-#ifndef ZORBA_NO_UNICODE
- success = utf8::normalize(resStr, normType, &resStr);
- ZORBA_ASSERT(success);
-#endif//#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
+ ZORBA_ASSERT( utf8::normalize( resStr, normType, &resStr ) );
+#endif /* ZORBA_NO_ICU */
STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state );
}
else
@@ -1688,7 +1686,7 @@
GENV_ITEMFACTORY->createString(strid_item, zstrid);
store::Item_t id_attrib_item;
GENV_ITEMFACTORY->createAttributeNode(id_attrib_item, group_elem.getp(), nr_attrib_name, untyped_type_name, strid_item);
- if(match_startg < 0)
+ if((match_startg < 0) || (match_startg < match_endgood))
continue;
match_endgood = match_endg;
if((i+1)<nr_pattern_groups)
=== modified file 'src/system/globalenv.cpp'
--- src/system/globalenv.cpp 2011-06-26 09:43:12 +0000
+++ src/system/globalenv.cpp 2011-10-28 02:34:26 +0000
@@ -17,11 +17,11 @@
#include "common/common.h"
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
# include <unicode/uclean.h>
# include <unicode/utypes.h>
# include <unicode/udata.h>
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
#ifdef ZORBA_WITH_BIG_INTEGER
# include "zorbatypes/m_apm.h"
@@ -171,7 +171,7 @@
// from one thread only
// see http://www.icu-project.org/userguide/design.html#Init_and_Termination
// and http://www.icu-project.org/apiref/icu4c/uclean_8h.html
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
# if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE)
{
TCHAR self_path[1024];
@@ -201,13 +201,13 @@
udata_setCommonData(icu_appdata, &data_err);
ZORBA_ASSERT(data_err == U_ZERO_ERROR);
- // u_setDataDirectory(self_path);
+ // u_setDataDirectory(self_path);
}
# endif
UErrorCode lICUInitStatus = U_ZERO_ERROR;
u_init(&lICUInitStatus);
ZORBA_ASSERT(lICUInitStatus == U_ZERO_ERROR);
-#endif//ifndef ZORBA_NO_UNICODE
+#endif /* ZORBA_NO_ICU */
}
@@ -219,12 +219,12 @@
// releases statically initialized memory and prevents
// valgrind from reporting those problems at the end
// see http://www.icu-project.org/apiref/icu4c/uclean_8h.html#93f27d0ddc7c196a1da864763f2d8920
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
u_cleanup();
# if defined U_STATIC_IMPLEMENTATION && (defined WIN32 || defined WINCE)
delete[] icu_appdata;
# endif
-#endif//ifndef ZORBA_NO_UNICODE
+#endif /* ZORBA_NO_ICU */
}
=== modified file 'src/util/CMakeLists.txt'
--- src/util/CMakeLists.txt 2011-07-18 14:25:21 +0000
+++ src/util/CMakeLists.txt 2011-10-28 02:34:26 +0000
@@ -38,9 +38,9 @@
LIST(APPEND UTIL_SRCS mmap_file.cpp)
ENDIF(ZORBA_WITH_FILE_ACCESS)
-IF(ZORBA_NO_UNICODE)
+IF(ZORBA_NO_ICU)
LIST(APPEND UTIL_SRCS regex_ascii.cpp)
-ENDIF(ZORBA_NO_UNICODE)
+ENDIF(ZORBA_NO_ICU)
HEADER_GROUP_SUBFOLDER(UTIL_SRCS fx)
HEADER_GROUP_SUBFOLDER(UTIL_SRCS win32)
=== modified file 'src/util/regex.cpp'
--- src/util/regex.cpp 2011-09-24 00:16:36 +0000
+++ src/util/regex.cpp 2011-10-28 02:34:26 +0000
@@ -21,10 +21,10 @@
#include <vector>
#include <zorba/diagnostic_list.h>
-#include "diagnostics/xquery_exception.h"
#include "diagnostics/assert.h"
#include "diagnostics/dict.h"
+#include "diagnostics/xquery_exception.h"
#include "ascii_util.h"
#include "cxx_util.h"
@@ -33,8 +33,7 @@
#define INVALID_RE_EXCEPTION(...) \
XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS( __VA_ARGS__ ) )
-
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
# include <unicode/uversion.h>
U_NAMESPACE_USE
@@ -442,11 +441,11 @@
}
} // namespace unicode
-
-}//namespace zorba
-
-
-#else /* ZORBA_NO_UNICODE */
+} // namespace zorba
+
+///////////////////////////////////////////////////////////////////////////////
+
+#else /* ZORBA_NO_ICU */
#include "zorbatypes/zstring.h"
@@ -470,7 +469,7 @@
case 'i': flags |= REGEX_ASCII_CASE_INSENSITIVE; break;
case 's': flags |= REGEX_ASCII_DOTALL; break;
case 'm': flags |= REGEX_ASCII_MULTILINE; break;
- case 'x': flags |= REGEX_ASCII_COMMENTS; break;
+ case 'x': flags |= REGEX_ASCII_NO_WHITESPACE; break;
case 'q': flags |= REGEX_ASCII_LITERAL; break;
default:
throw XQUERY_EXCEPTION( err::FORX0001, ERROR_PARAMS( *p ) );
@@ -483,6 +482,7 @@
void regex::compile( char const *pattern, char const *flags)
{
parsed_flags = parse_regex_flags(flags);
+ regex_ascii::CRegexAscii_parser regex_parser;
regex_matcher = regex_parser.parse(pattern, parsed_flags);
if(!regex_matcher)
throw INVALID_RE_EXCEPTION(pattern);
@@ -517,6 +517,8 @@
bool regex::next_token( char const *s, size_type *pos, zstring *token,
bool *matched)
{
+ if(!s[*pos])
+ return false;
bool retval;
int match_pos;
int matched_len;
@@ -528,14 +530,8 @@
token->assign(s+*pos, match_pos);
*pos += match_pos + matched_len;
if(matched)
- if(match_pos)
- *matched = true;
- else
- *matched = false;
- if(match_pos)
- return true;
- else
- return false;
+ *matched = true;
+ return true;
}
else
{
@@ -544,7 +540,7 @@
*pos += strlen(s+*pos);
if(matched)
*matched = false;
- return s[*pos] != 0;
+ return true;
}
}
@@ -554,13 +550,9 @@
int matched_pos;
int matched_len;
- bool prev_align = regex_matcher->set_align_begin(true);
- retval = regex_matcher->match_from(s, parsed_flags, &matched_pos, &matched_len);
- regex_matcher->set_align_begin(prev_align);
+ retval = regex_matcher->match_anywhere(s, parsed_flags|REGEX_ASCII_WHOLE_MATCH, &matched_pos, &matched_len);
if(!retval)
return false;
- if(matched_len != strlen(s))
- return false;
return true;
}
@@ -587,14 +579,19 @@
//look for dollars
if(*temprepl == '\\')
{
- temprepl++;
- if(!*temprepl || (*temprepl != '\\') || (*temprepl != '$'))//Invalid replacement string.
- throw XQUERY_EXCEPTION( err::FORX0004, ERROR_PARAMS( replacement ) );
+ if(!(parsed_flags & REGEX_ASCII_LITERAL))
+ {
+ temprepl++;
+ if(!*temprepl)
+ temprepl--;
+ else if((*temprepl != '\\') && (*temprepl != '$'))//Invalid replacement string.
+ throw XQUERY_EXCEPTION( err::FORX0004, ERROR_PARAMS( replacement ) );
+ }
result->append(1, *temprepl);
temprepl++;
continue;
}
- if(*temprepl == '$')
+ if((*temprepl == '$') && !(parsed_flags & REGEX_ASCII_LITERAL))
{
temprepl++;
index = 0;
@@ -648,7 +645,7 @@
if(retval)
{
m_match_pos += m_pos;
- m_pos = m_match_pos = m_matched_len;
+ m_pos = m_match_pos + m_matched_len;
}
else
{
@@ -694,7 +691,7 @@
} // namespace unicode
} // namespace zorba
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
///////////////////////////////////////////////////////////////////////////////
=== modified file 'src/util/regex.h'
--- src/util/regex.h 2011-07-18 14:25:21 +0000
+++ src/util/regex.h 2011-10-28 02:34:26 +0000
@@ -17,15 +17,13 @@
#ifndef ZORBA_REGEX_H
#define ZORBA_REGEX_H
-#ifndef ZORBA_NO_UNICODE
-#include <unicode/regex.h>
-#endif
-
#include "cxx_util.h"
#include "unicode_util.h"
#include "zorbatypes/zstring.h"
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
+
+#include <unicode/regex.h>
namespace zorba {
@@ -44,7 +42,6 @@
namespace unicode {
-
/**
* The %regex class wraps the underlying Unicode regular expression library.
*/
@@ -423,14 +420,13 @@
return replace_all( in.c_str(), replacement.c_str(), out );
}
-
/**
* Set the string to work on, without doing matching yet.
*
* @param in The UTF-8 input string.
* @param len the size in bytes.
*/
- void set_string( const char* in, size_type len );
+ void set_string( char const *in, size_type len );
/**
* Find the next match in string set by set_string().
@@ -496,12 +492,15 @@
} // namespace unicode
} // namespace zorba
-#else ///ZORBA_NO_UNICODE (ascii part:)
+///////////////////////////////////////////////////////////////////////////////
+
+#else /* ZORBA_NO_ICU (ascii part:) */
#include "util/regex_ascii.h"
#include <string>
-namespace zorba{
+namespace zorba {
+
/**
* Converts an XQuery regular expression to the form used by the regular
* expression library Zorba is using (here regex_ascii).
@@ -513,10 +512,10 @@
void convert_xquery_re( zstring const &xq_re, zstring *lib_re,
char const *flags = "" );
-namespace unicode{
+namespace unicode {
+
////////// classes ////////////////////////////////////////////////////////////
-
/**
* The %regex class wraps the underlying Unicode regular expression library.
*/
@@ -525,7 +524,7 @@
/**
* Constructs a %regex.
*/
- regex() : regex_matcher( NULL ) { }
+ regex() : regex_matcher( nullptr ) { }
/**
* Destroys a %regex.
@@ -858,7 +857,6 @@
int get_match_end( int groupId = 0 );
private:
- regex_ascii::CRegexAscii_parser regex_parser;
regex_ascii::CRegexAscii_regex *regex_matcher;
uint32_t parsed_flags;
@@ -873,15 +871,13 @@
regex( regex const& );
regex& operator=( regex const& );
};
+
+///////////////////////////////////////////////////////////////////////////////
+
} // namespace unicode
} // namespace zorba
-#endif /* ZORBA_NO_UNICODE */
-
-
-///////////////////////////////////////////////////////////////////////////////
-
-
+#endif /* ZORBA_NO_ICU */
#endif /* ZORBA_REGEX_H */
/*
* Local variables:
=== modified file 'src/util/regex_ascii.cpp'
--- src/util/regex_ascii.cpp 2011-08-05 02:21:55 +0000
+++ src/util/regex_ascii.cpp 2011-10-28 02:34:26 +0000
@@ -1,4 +1,4 @@
-a/*
+/*
* Copyright 2006-2008 The FLWOR Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -21,6 +21,8 @@
#include "regex_ascii.h"
#include <string.h>
#include "zorbatypes/chartype.h"
+#include "util/unicode_categories.h"
+#include "util/ascii_util.h"
namespace zorba {
namespace regex_ascii{
@@ -62,6 +64,34 @@
+ http://www.w3.org/TR/xquery-operators/#regex-syntax (not implemented)
*/
+
+static bool compare_i(const char *str1, const char *str2)
+{
+ while(*str1 && *str2)
+ {
+ if(ascii::to_lower(*str1) != ascii::to_lower(*str2))
+ return false;
+ str1++;
+ str2++;
+ }
+ if(*str1 || *str2)
+ return false;
+ return true;
+}
+static bool compare_ni(const char *str1, const char *str2, unsigned int maxlen)
+{
+ while(*str1 && *str2 && maxlen)
+ {
+ if(ascii::to_lower(*str1) != ascii::to_lower(*str2))
+ return false;
+ str1++;
+ str2++;
+ maxlen--;
+ }
+ if(maxlen && (*str1 || *str2))
+ return false;
+ return true;
+}
////////////////////////////////////
////Regular expression parsing and building of the tree
////////////////////////////////////
@@ -69,17 +99,10 @@
CRegexAscii_regex* CRegexAscii_parser::parse(const char *pattern, unsigned int flags)
{
this->flags = flags;
- bool align_begin = false;
- if(!(flags & REGEX_ASCII_LITERAL) && (pattern[0] == '^'))
- align_begin = true;
-
int regex_len;
- CRegexAscii_regex* regex = parse_regexp(pattern + (align_begin?1:0), ®ex_len);
+ CRegexAscii_regex* regex = parse_regexp(pattern, ®ex_len);
- if(regex)
- regex->set_align_begin(align_begin);
-
return regex;
}
@@ -90,46 +113,52 @@
*regex_len = 0;
int branch_len;
regex_depth++;
- CRegexAscii_regex *regex = new CRegexAscii_regex(current_regex);
+ std::auto_ptr<CRegexAscii_regex> regex(new CRegexAscii_regex(current_regex));
if(!current_regex)
- current_regex = regex;
+ current_regex = regex.get();
if(regex_depth >= 2)
{
//mark this as group if it does not start with ?:
if(pattern[0] != '?' || pattern[1] != ':')
- current_regex->subregex.push_back(regex);
+ current_regex->subregex.push_back(regex.get());
else
*regex_len = 2;
}
CRegexAscii_branch *branch;
+ bool must_read_another_branch = true;
while(pattern[*regex_len] && (pattern[*regex_len] != ')'))
{
branch = parse_branch(pattern+*regex_len, &branch_len);
if(!branch)
{
regex_depth--;
- delete regex;
return NULL;
}
regex->add_branch(branch);
*regex_len += branch_len;
+ if(pattern[*regex_len] == '|')
+ (*regex_len)++;
+ else
+ must_read_another_branch = false;
}
- if((current_regex == regex) && (pattern[*regex_len] == ')'))
+ if((current_regex == regex.get()) && (pattern[*regex_len] == ')'))
{
- throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_MISMATCHED_PAREN)) );
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MISMATCHED_PAREN)) );
}
if(pattern[*regex_len])
(*regex_len)++;
+ if(must_read_another_branch)
+ regex->add_branch(new CRegexAscii_branch(current_regex));//add empty branch
regex->flags = 0;//finished initialization
regex_depth--;
- return regex;
+ return regex.release();
}
CRegexAscii_branch* CRegexAscii_parser::parse_branch(const char *pattern, int *branch_len)
{
int piece_len;
- CRegexAscii_branch *branch = new CRegexAscii_branch(current_regex);
+ std::auto_ptr<CRegexAscii_branch> branch(new CRegexAscii_branch(current_regex));
CRegexAscii_piece *piece;
*branch_len = 0;
while(pattern[*branch_len] && (pattern[*branch_len] != '|') && (pattern[*branch_len] != ')'))
@@ -137,21 +166,25 @@
piece = parse_piece(pattern+*branch_len, &piece_len);
if(!piece)
{
- delete branch;
return NULL;
}
+ if(branch->piece_list.size() && dynamic_cast<CRegexAscii_pinstart*>(piece->atom))
+ {
+ //found ^ that is not at the beginning of branch
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_ATOM_CHAR), '^') );
+ }
branch->add_piece(piece);
*branch_len += piece_len;
}
- if(pattern[*branch_len] == '|')
- (*branch_len)++;
- return branch;
+ //if(pattern[*branch_len] == '|')
+ // (*branch_len)++;
+ return branch.release();
}
//piece = atom + quantifier
CRegexAscii_piece* CRegexAscii_parser::parse_piece(const char *pattern, int *piece_len)
{
- CRegexAscii_piece *piece = new CRegexAscii_piece;
+ std::auto_ptr<CRegexAscii_piece> piece(new CRegexAscii_piece);
IRegexAtom *atom;
*piece_len = 0;
@@ -160,16 +193,15 @@
atom = read_atom(pattern, &atom_len);
if(!atom)
{
- delete piece;
return NULL;
}
piece->set_atom(atom);
if(!(flags & REGEX_ASCII_LITERAL))
- read_quantifier(piece, pattern+atom_len, &quantif_len);
+ read_quantifier(piece.get(), pattern+atom_len, &quantif_len);
*piece_len += atom_len + quantif_len;
- return piece;
+ return piece.release();
}
char CRegexAscii_parser::myishex(char c)
@@ -185,24 +217,123 @@
bool CRegexAscii_parser::myisdigit(char c)
{
- return (c >= '0') || (c <= '9');
-}
-
-char CRegexAscii_parser::readChar(const char *pattern, int *char_len, bool *is_multichar)
+ return (c >= '0') && (c <= '9');
+}
+
+bool CRegexAscii_parser::myisletterAZ(char c)
+{
+ return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
+}
+
+static const unicode::code_point specials_extcp[] = {0xFFF0, 0xFFFD, 0};
+
+static CRegexAscii_parser::block_escape_t block_escape[] =
+{
+{{0x0000, 0x007F}, NULL, "BasicLatin"},
+{{0x0080, 0x00FF}, NULL, "Latin-1Supplement"},
+{{0x0100, 0x017F}, NULL, "LatinExtended-A"},
+{{0x0180, 0x024F}, NULL, "LatinExtended-B"},
+{{0x0250, 0x02AF}, NULL, "IPAExtensions"},
+{{0x02B0, 0x02FF}, NULL, "SpacingModifierLetters"},
+{{0x0300, 0x036F}, NULL, "CombiningDiacriticalMarks"},
+{{0x0370, 0x03FF}, NULL, "Greek"},
+{{0x0400, 0x04FF}, NULL, "Cyrillic"},
+{{0x0530, 0x058F}, NULL, "Armenian"},
+{{0x0590, 0x05FF}, NULL, "Hebrew"},
+{{0x0600, 0x06FF}, NULL, "Arabic"},
+{{0x0700, 0x074F}, NULL, "Syriac"},
+{{0x0780, 0x07BF}, NULL, "Thaana"},
+{{0x0900, 0x097F}, NULL, "Devanagari"},
+{{0x0980, 0x09FF}, NULL, "Bengali"},
+{{0x0A00, 0x0A7F}, NULL, "Gurmukhi"},
+{{0x0A80, 0x0AFF}, NULL, "Gujarati"},
+{{0x0B00, 0x0B7F}, NULL, "Oriya"},
+{{0x0B80, 0x0BFF}, NULL, "Tamil"},
+{{0x0C00, 0x0C7F}, NULL, "Telugu"},
+{{0x0C80, 0x0CFF}, NULL, "Kannada"},
+{{0x0D00, 0x0D7F}, NULL, "Malayalam"},
+{{0x0D80, 0x0DFF}, NULL, "Sinhala"},
+{{0x0E00, 0x0E7F}, NULL, "Thai"},
+{{0x0E80, 0x0EFF}, NULL, "Lao"},
+{{0x0F00, 0x0FFF}, NULL, "Tibetan"},
+{{0x1000, 0x109F}, NULL, "Myanmar"},
+{{0x10A0, 0x10FF}, NULL, "Georgian"},
+{{0x1100, 0x11FF}, NULL, "HangulJamo"},
+{{0x1200, 0x137F}, NULL, "Ethiopic"},
+{{0x13A0, 0x13FF}, NULL, "Cherokee"},
+{{0x1400, 0x167F}, NULL, "UnifiedCanadianAboriginalSyllabics"},
+{{0x1680, 0x169F}, NULL, "Ogham"},
+{{0x16A0, 0x16FF}, NULL, "Runic"},
+{{0x1780, 0x17FF}, NULL, "Khmer"},
+{{0x1800, 0x18AF}, NULL, "Mongolian"},
+{{0x1E00, 0x1EFF}, NULL, "LatinExtendedAdditional"},
+{{0x1F00, 0x1FFF}, NULL, "GreekExtended"},
+{{0x2000, 0x206F}, NULL, "GeneralPunctuation"},
+{{0x2070, 0x209F}, NULL, "SuperscriptsandSubscripts"},
+{{0x20A0, 0x20CF}, NULL, "CurrencySymbols"},
+{{0x20D0, 0x20FF}, NULL, "CombiningMarksforSymbols"},
+{{0x2100, 0x214F}, NULL, "LetterlikeSymbols"},
+{{0x2150, 0x218F}, NULL, "NumberForms"},
+{{0x2190, 0x21FF}, NULL, "Arrows"},
+{{0x2200, 0x22FF}, NULL, "MathematicalOperators"},
+{{0x2300, 0x23FF}, NULL, "MiscellaneousTechnical"},
+{{0x2400, 0x243F}, NULL, "ControlPictures"},
+{{0x2440, 0x245F}, NULL, "OpticalCharacterRecognition"},
+{{0x2460, 0x24FF}, NULL, "EnclosedAlphanumerics"},
+{{0x2500, 0x257F}, NULL, "BoxDrawing"},
+{{0x2580, 0x259F}, NULL, "BlockElements"},
+{{0x25A0, 0x25FF}, NULL, "GeometricShapes"},
+{{0x2600, 0x26FF}, NULL, "MiscellaneousSymbols"},
+{{0x2700, 0x27BF}, NULL, "Dingbats"},
+{{0x2800, 0x28FF}, NULL, "BraillePatterns"},
+{{0x2E80, 0x2EFF}, NULL, "CJKRadicalsSupplement"},
+{{0x2F00, 0x2FDF}, NULL, "KangxiRadicals"},
+{{0x2FF0, 0x2FFF}, NULL, "IdeographicDescriptionCharacters"},
+{{0x3000, 0x303F}, NULL, "CJKSymbolsandPunctuation"},
+{{0x3040, 0x309F}, NULL, "Hiragana"},
+{{0x30A0, 0x30FF}, NULL, "Katakana"},
+{{0x3100, 0x312F}, NULL, "Bopomofo"},
+{{0x3130, 0x318F}, NULL, "HangulCompatibilityJamo"},
+{{0x3190, 0x319F}, NULL, "Kanbun"},
+{{0x31A0, 0x31BF}, NULL, "BopomofoExtended"},
+{{0x3200, 0x32FF}, NULL, "EnclosedCJKLettersandMonths"},
+{{0x3300, 0x33FF}, NULL, "CJKCompatibility"},
+{{0x3400, 0x4DB5}, NULL, "CJKUnifiedIdeographsExtensionA"},
+{{0x4E00, 0x9FFF}, NULL, "CJKUnifiedIdeographs"},
+{{0xA000, 0xA48F}, NULL, "YiSyllables"},
+{{0xA490, 0xA4CF}, NULL, "YiRadicals"},
+{{0xAC00, 0xD7A3}, NULL, "HangulSyllables"},
+{{0xE000, 0xF8FF}, NULL, "PrivateUse"},
+{{0xF900, 0xFAFF}, NULL, "CJKCompatibilityIdeographs"},
+{{0xFB00, 0xFB4F}, NULL, "AlphabeticPresentationForms"},
+{{0xFB50, 0xFDFF}, NULL, "ArabicPresentationForms-A"},
+{{0xFE20, 0xFE2F}, NULL, "CombiningHalfMarks"},
+{{0xFE30, 0xFE4F}, NULL, "CJKCompatibilityForms"},
+{{0xFE50, 0xFE6F}, NULL, "SmallFormVariants"},
+{{0xFE70, 0xFEFE}, NULL, "ArabicPresentationForms-B"},
+{{0xFEFF, 0xFEFF}, specials_extcp, "Specials"},
+{{0xFF00, 0xFFEF}, NULL, "HalfwidthandFullwidthForms"}
+};
+
+char CRegexAscii_parser::readChar(const char *pattern,
+ bool for_atom,
+ int *char_len, CHARGROUP_t *multichar_type)
{
char c = 0;
*char_len = 0;
- *is_multichar = false;
+ *multichar_type = CHARGROUP_NO_MULTICHAR;
switch(pattern[*char_len])
{
case '\\':
- { (*char_len)++;
+ {
+ (*char_len)++;
switch(pattern[*char_len])
{
case 'n': c = '\n';break;
case 'r': c = '\r';break;
case 't': c = '\t';break;
case '\\':
+ case '/'://+
case '|':
case '.':
case '?':
@@ -216,18 +347,213 @@
case '['://#x5B
case ']'://#x5D
case '^'://#x5E
+ case '$'://+
c = pattern[*char_len];
break;
case 'p'://catEsc
case 'P'://complEsc
//ignore the prop for now
- c = pattern[*char_len];
- *is_multichar = true;
- if(pattern[*char_len+1] == '{')
- {
- while(pattern[*char_len] != '}')
+ *multichar_type = (CHARGROUP_t)((pattern[*char_len] == 'P') ? 128 : 0);
+ if(pattern[(*char_len)+1] != '{')
+ {
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
+ }
+ (*char_len) += 2;
+ switch(pattern[*char_len])
+ {//IsCategory
+ case 'L':
+ {
+ unsigned int temp_int = *multichar_type;
+ temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+ *multichar_type = (CHARGROUP_t)temp_int;
+ switch(pattern[(*char_len)+1])
+ {
+ case '}':
+ c = unicode::UNICODE_Ll + 50;break;
+ case 'u':
+ c = unicode::UNICODE_Lu; (*char_len)++;break;
+ case 'l':
+ c = unicode::UNICODE_Ll; (*char_len)++;break;
+ case 't':
+ c = unicode::UNICODE_Lt; (*char_len)++;break;
+ case 'm':
+ c = unicode::UNICODE_Lm; (*char_len)++;break;
+ case 'o':
+ c = unicode::UNICODE_Lo; (*char_len)++;break;
+ default:
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PL_CONSTRUCT)) );
+ }
+ }break;
+ case 'M':
+ {
+ unsigned int temp_int = *multichar_type;
+ temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+ *multichar_type = (CHARGROUP_t)temp_int;
+ switch(pattern[(*char_len)+1])
+ {
+ case '}':
+ c = unicode::UNICODE_Mc + 50;break;
+ case 'n':
+ c = unicode::UNICODE_Mn; (*char_len)++;break;
+ case 'c':
+ c = unicode::UNICODE_Mc; (*char_len)++;break;
+ case 'e':
+ c = unicode::UNICODE_Me; (*char_len)++;break;
+ default:
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PM_CONSTRUCT)) );
+ }
+ }break;
+ case 'N':
+ {
+ unsigned int temp_int = *multichar_type;
+ temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+ *multichar_type = (CHARGROUP_t)temp_int;
+ switch(pattern[(*char_len)+1])
+ {
+ case '}':
+ c = unicode::UNICODE_Nd + 50;break;
+ case 'd':
+ c = unicode::UNICODE_Nd; (*char_len)++;break;
+ case 'l':
+ c = unicode::UNICODE_Nl; (*char_len)++;break;
+ case 'o':
+ c = unicode::UNICODE_No; (*char_len)++;break;
+ default:
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PN_CONSTRUCT)) );
+ }
+ }break;
+ case 'P':
+ {
+ unsigned int temp_int = *multichar_type;
+ temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+ *multichar_type = (CHARGROUP_t)temp_int;
+ switch(pattern[(*char_len)+1])
+ {
+ case '}':
+ c = unicode::UNICODE_Pc + 50;break;
+ case 'c':
+ c = unicode::UNICODE_Pc; (*char_len)++;break;
+ case 'd':
+ c = unicode::UNICODE_Pd; (*char_len)++;break;
+ case 's':
+ c = unicode::UNICODE_Ps; (*char_len)++;break;
+ case 'e':
+ c = unicode::UNICODE_Pe; (*char_len)++;break;
+ case 'i':
+ c = unicode::UNICODE_Pi; (*char_len)++;break;
+ case 'f':
+ c = unicode::UNICODE_Pf; (*char_len)++;break;
+ case 'o':
+ c = unicode::UNICODE_Po; (*char_len)++;break;
+ default:
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PP_CONSTRUCT)) );
+ }
+ }break;
+ case 'Z':
+ {
+ unsigned int temp_int = *multichar_type;
+ temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+ *multichar_type = (CHARGROUP_t)temp_int;
+ switch(pattern[(*char_len)+1])
+ {
+ case '}':
+ c = unicode::UNICODE_Zl + 50;break;
+ case 's':
+ c = unicode::UNICODE_Zs; (*char_len)++;break;
+ case 'l':
+ c = unicode::UNICODE_Zl; (*char_len)++;break;
+ case 'p':
+ c = unicode::UNICODE_Zp; (*char_len)++;break;
+ default:
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PZ_CONSTRUCT)) );
+ }
+ }break;
+ case 'S':
+ {
+ unsigned int temp_int = *multichar_type;
+ temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+ *multichar_type = (CHARGROUP_t)temp_int;
+ switch(pattern[(*char_len)+1])
+ {
+ case '}':
+ c = unicode::UNICODE_Sc + 50;break;
+ case 'm':
+ c = unicode::UNICODE_Sm; (*char_len)++;break;
+ case 'c':
+ c = unicode::UNICODE_Sc; (*char_len)++;break;
+ case 'k':
+ c = unicode::UNICODE_Sk; (*char_len)++;break;
+ case 'o':
+ c = unicode::UNICODE_So; (*char_len)++;break;
+ default:
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PS_CONSTRUCT)) );
+ }
+ }break;
+ case 'C':
+ {
+ unsigned int temp_int = *multichar_type;
+ temp_int |= CHARGROUP_FLAGS_MULTICHAR_p;
+ *multichar_type = (CHARGROUP_t)temp_int;
+ switch(pattern[(*char_len)+1])
+ {
+ case '}':
+ c = unicode::UNICODE_Cc + 50;break;
+ case 'c':
+ c = unicode::UNICODE_Cc; (*char_len)++;break;
+ case 'f':
+ c = unicode::UNICODE_Cf; (*char_len)++;break;
+ case 'o':
+ c = unicode::UNICODE_Co; (*char_len)++;break;
+ case 'n':
+ c = unicode::UNICODE_Cn; (*char_len)++;break;
+ default:
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PC_CONSTRUCT)) );
+ }
+ }break;
+ case 'I':
+ switch(pattern[(*char_len)+1])
+ {
+ case 's'://IsBlock
+ {
+ unsigned int temp_int = *multichar_type;
+ temp_int |= CHARGROUP_FLAGS_MULTICHAR_Is;
+ *multichar_type = (CHARGROUP_t)temp_int;
(*char_len)++;
+ zstring block_name;
+ char tempc = pattern[(*char_len)+1];
+ while(tempc && (tempc != '}'))
+ {
+ if(!myisletterAZ(tempc) && !myisdigit(tempc) && (tempc != '-'))
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
+ block_name.append(1, tempc);
+ (*char_len)++;
+ tempc = pattern[(*char_len)+1];
+ }
+ if(!pattern[(*char_len)+1])
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
+ //search for the block name
+ int i;
+ int nr_blocks = sizeof(block_escape)/sizeof(CRegexAscii_parser::block_escape_t);
+ for(i=0;i<nr_blocks;i++)
+ {
+ if(compare_i(block_name.c_str(), block_escape[i].group_name))
+ {
+ c = i;
+ break;
+ }
+ }
+ if(i==nr_blocks)
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_PIs_CONSTRUCT)) );
+ }break;
+ default:
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_PIs_CONSTRUCT)) );
+ }break;
+ default:
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
}
+ if(pattern[(*char_len) + 1] != '}')
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_BROKEN_P_CONSTRUCT)) );
+ (*char_len)++;
break;
//multiCharEsc
case 's':
@@ -240,9 +566,11 @@
case 'D':
case 'w':
case 'W':
- *is_multichar = true;
+ *multichar_type = CHARGROUP_FLAGS_MULTICHAR_OTHER;
c = pattern[*char_len];
break;
+ default:
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_UNKNOWN_ESC_CHAR)) );
}
break;
}
@@ -262,6 +590,15 @@
}
(*char_len)++;
+ if((flags & REGEX_ASCII_NO_WHITESPACE) && for_atom &&
+ ((c == ' ') || (c == '\t') || (c == '\r') || (c == '\n')))
+ {
+ //ignore this whitespace
+ int char_len2;
+ char c2 = readChar(pattern + *char_len, for_atom, &char_len2, multichar_type);
+ *char_len += char_len2;
+ return c2;
+ }
return c;
}
@@ -281,13 +618,13 @@
(*atom_len)++;
if(pattern[*atom_len] == '0')
{
- throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_INVALID_BACK_REF)) );
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_BACK_REF), 0, current_regex->subregex.size()) );
}
unsigned int backref = pattern[*atom_len] - '0';
if((backref > current_regex->subregex.size()) ||
(current_regex->subregex.at(backref-1)->flags != 0))
{
- throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(U_REGEX_INVALID_BACK_REF)) );
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_BACK_REF), backref, current_regex->subregex.size()) );
}
while(current_regex->subregex.size() >= backref*10)
{
@@ -303,9 +640,19 @@
break;
}
}
+ (*atom_len)++;
return new CRegexAscii_backref(current_regex, backref);
}
}
+ if((!(flags & REGEX_ASCII_LITERAL)) && (c == '^'))
+ {
+ (*atom_len)++;
+ return new CRegexAscii_pinstart(current_regex);
+ }
+ if((c == '}') || (c == '{') || (c == '?') || (c == '*') || (c == '+') || (c == '|'))
+ {
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_ATOM_CHAR), c) );
+ }
switch(c)
{
case '[':
@@ -350,23 +697,23 @@
{
char c;
int c_len;
- bool is_multichar = false;
+ CHARGROUP_t multichar_type = CHARGROUP_NO_MULTICHAR;
if(!(flags & REGEX_ASCII_LITERAL))
- c = readChar(pattern+*atom_len, &c_len, &is_multichar);
+ c = readChar(pattern+*atom_len, true, &c_len, &multichar_type);
else
{
c = pattern[*atom_len];
c_len = 1;
}
- CRegexAscii_chargroup *chargroup = new CRegexAscii_chargroup(current_regex);
- if(is_multichar)
- chargroup->addMultiChar(c);
+ std::auto_ptr<CRegexAscii_chargroup> chargroup(new CRegexAscii_chargroup(current_regex));
+ if(multichar_type)
+ chargroup->addMultiChar(c, multichar_type);
else if(is_end_line)
chargroup->addEndLine();
else
- chargroup->addCharRange(c, c);
+ chargroup->addOneChar(c);
*atom_len += c_len;
- return chargroup;
+ return chargroup.release();
}
}
}
@@ -376,19 +723,19 @@
//charRange ::= seRange | XmlCharIncDash
CRegexAscii_chargroup* CRegexAscii_parser::readchargroup(const char *pattern, int *chargroup_len)
{
- CRegexAscii_chargroup *chargroup = NULL;
+ std::auto_ptr<CRegexAscii_chargroup> chargroup;
*chargroup_len = 0;
if(pattern[*chargroup_len] == '^')//negative group
{
(*chargroup_len)++;
- chargroup = new CRegexAscii_negchargroup(current_regex);
+ chargroup.reset(new CRegexAscii_negchargroup(current_regex));
}
else
- chargroup = new CRegexAscii_chargroup(current_regex);
+ chargroup.reset(new CRegexAscii_chargroup(current_regex));
while(pattern[*chargroup_len] && (pattern[*chargroup_len]!=']'))
{
char c1, c2;
- bool is_multichar;
+ CHARGROUP_t multichar_type = CHARGROUP_NO_MULTICHAR;
int c1_len;
c1 = pattern[*chargroup_len];
c2 = pattern[*chargroup_len+1];
@@ -398,23 +745,26 @@
CRegexAscii_chargroup *classsub = readchargroup(pattern + *chargroup_len+1 + 1, &classsub_len);
if(!classsub)
{
- delete chargroup;
- return NULL;
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_SUBCLASS)) );
}
chargroup->addClassSub(classsub);
*chargroup_len += 2 + classsub_len + 1;
if(pattern[*chargroup_len-1] != ']')
{
- delete chargroup;
- return NULL;
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_INVALID_USE_OF_SUBCLASS)) );
}
- return chargroup;
+ return chargroup.release();
}
- c1 = readChar(pattern+*chargroup_len, &c1_len, &is_multichar);
- if(is_multichar)//first char is multichar
+ c1 = readChar(pattern+*chargroup_len, false, &c1_len, &multichar_type);
+ if(multichar_type)//first char is multichar
{
- chargroup->addMultiChar(c1);
+ if((pattern[*chargroup_len+c1_len] == '-') &&///might be a range
+ (pattern[*chargroup_len+c1_len+1] != ']'))
+ {
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MULTICHAR_IN_CHAR_RANGE)) );
+ }
+ chargroup->addMultiChar(c1, multichar_type);
*chargroup_len += c1_len;
continue;
}
@@ -422,30 +772,36 @@
{
if(pattern[*chargroup_len+c1_len+1] == ']')//no range, just the last char is '-'
{
- chargroup->addCharRange(c1, c1);
- chargroup->addCharRange('-', '-');
+ chargroup->addOneChar(c1);
+ chargroup->addOneChar('-');
*chargroup_len += c1_len + 1;
continue;
}
- else
+ else if(pattern[*chargroup_len+c1_len+1] != '[')
{
//it is a range
char c3;
int c3_len;
- c3 = readChar(pattern+*chargroup_len+c1_len+1, &c3_len, &is_multichar);
- if(is_multichar)
- return NULL;//error
+ c3 = readChar(pattern+*chargroup_len+c1_len+1, false, &c3_len, &multichar_type);
+ if(multichar_type)
+ {
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MULTICHAR_IN_CHAR_RANGE)) );
+ }
chargroup->addCharRange(c1, c3);
*chargroup_len += c1_len + 1 + c3_len;
continue;
}
}
- chargroup->addCharRange(c1, c1);
+ chargroup->addOneChar(c1);
*chargroup_len += c1_len;
}
if(pattern[*chargroup_len])
(*chargroup_len)++;
- return chargroup;
+ else
+ {
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MISSING_CLOSE_BRACKET)) );
+ }
+ return chargroup.release();
}
void CRegexAscii_parser::read_quantifier(CRegexAscii_piece *piece,
@@ -496,6 +852,10 @@
max = max*10 + pattern[*quantif_len] - '0';
(*quantif_len)++;
}
+ if(max < min)
+ {
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(pattern, ZED(REGEX_MAX_LT_MIN)) );
+ }
piece->set_quantifier_min_max(min, max, true);
}
while(pattern[*quantif_len] && (pattern[*quantif_len] != '}'))
@@ -528,6 +888,8 @@
{
matched_source = NULL;
matched_len = 0;
+// backup_matched_source = NULL;
+// backup_matched_len = 0;
flags = 128;//set to 0 after initialization
}
@@ -548,13 +910,6 @@
*/
}
-bool CRegexAscii_regex::set_align_begin(bool align_begin)
-{
- bool prev_align = this->align_begin;
- this->align_begin = align_begin;
- return prev_align;
-}
-
void CRegexAscii_regex::add_branch(CRegexAscii_branch *branch)
{
branch_list.push_back(branch);
@@ -579,18 +934,19 @@
return subregex.size();
}
-CRegexAscii_branch::CRegexAscii_branch(CRegexAscii_regex* regex) :
- IRegexMatcher(regex)
+CRegexAscii_branch::CRegexAscii_branch(CRegexAscii_regex* regex)
+ //:
+ //IRegexMatcher(regex)
{
}
CRegexAscii_branch::~CRegexAscii_branch()
{
- std::list<CRegexAscii_piece*>::iterator piece_it;
+ std::list<RegexAscii_pieceinfo>::iterator piece_it;
for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
{
- delete (*piece_it);
+ delete (*piece_it).piece;
}
}
@@ -601,6 +957,8 @@
CRegexAscii_piece::CRegexAscii_piece()
{
+ atom = NULL;
+ regex_atom = NULL;
}
CRegexAscii_piece::~CRegexAscii_piece()
@@ -611,6 +969,7 @@
void CRegexAscii_piece::set_atom(IRegexAtom *atom)
{
this->atom = atom;
+ this->regex_atom = dynamic_cast<CRegexAscii_regex*>(atom);
}
void CRegexAscii_piece::set_quantifier_min_max(int min, int max, bool strict_max)
@@ -631,6 +990,8 @@
}
bool CRegexAscii_piece::get_is_reluctant()
{
+ if(atom->regex_intern->flags & REGEX_ASCII_MINIMAL_MATCH)
+ return true;
return is_reluctant;
}
@@ -646,10 +1007,10 @@
delete classsub;
}
-void CRegexAscii_chargroup::addMultiChar(char c)
+void CRegexAscii_chargroup::addMultiChar(char c, CHARGROUP_t multichar_type)
{
chargroup_t cgt;
- cgt.flags = CHARGROUP_FLAGS_MULTICHAR;
+ cgt.flags = multichar_type;
cgt.c1 = c;
cgt.c2 = 0;
chargroup_list.push_back(cgt);
@@ -667,12 +1028,20 @@
void CRegexAscii_chargroup::addCharRange(char c1, char c2)
{
chargroup_t cgt;
- cgt.flags = 0;
+ cgt.flags = CHARGROUP_FLAGS_CHAR_RANGE;
cgt.c1 = c1;
cgt.c2 = c2;
chargroup_list.push_back(cgt);
}
+void CRegexAscii_chargroup::addOneChar(char c)
+{
+ chargroup_t cgt;
+ cgt.flags = CHARGROUP_FLAGS_ONECHAR;
+ cgt.c1 = c;
+ chargroup_list.push_back(cgt);
+}
+
void CRegexAscii_chargroup::addClassSub(CRegexAscii_chargroup* classsub)
{
this->classsub = classsub;
@@ -706,6 +1075,11 @@
{
}
+CRegexAscii_pinstart::CRegexAscii_pinstart(CRegexAscii_regex* regex):
+ IRegexAtom(regex)
+{
+}
+
CRegexAscii_parser::CRegexAscii_parser()
{
current_regex = NULL;
@@ -720,6 +1094,65 @@
//////////////////////////////////////////
////Matching the pattern on a string
/////////////////////////////////////////
+static std::list<RegexAscii_pieceinfo> empty_pieces;//empty list of pieces
+/*
+std::list<RegexAscii_pieceinfo>::iterator
+IRegexAtom::choose_next_piece(const char *source, int *matched_len,
+ std::list<RegexAscii_pieceinfo>::iterator this_piece,
+ std::list<RegexAscii_pieceinfo>::iterator end_piece)
+{
+ //if this_piece is repetition, repeat until max, then go to next piece
+ int min, max;
+ bool strict_max;
+ while(this_piece != end_piece)
+ {
+ (*this_piece).piece->get_quantifier(&min, &max, &strict_max);
+ if(max <= ((*this_piece).nr_matches))//finished this piece
+ {
+ this_piece++;
+ }
+ else
+ break;
+ }
+ return this_piece;
+}
+*/
+
+bool IRegexAtom::match(const char *source, int *start_from_branch, int *matched_len,
+ std::list<RegexAscii_pieceinfo>::iterator this_piece,
+ std::list<RegexAscii_pieceinfo>::iterator end_piece)
+{
+ *start_from_branch = 0;
+ bool retmatch;
+ retmatch = match_internal(source, start_from_branch, matched_len);
+ if(!retmatch)
+ return false;
+
+ if(this_piece == end_piece)
+ return true;
+
+ (*this_piece).nr_matches++;
+ int min,max;
+ bool strict_max;
+ (*this_piece).piece->get_quantifier(&min, &max, &strict_max);
+ std::list<RegexAscii_pieceinfo>::iterator init_piece = this_piece;
+ if(((min == 1) && (max == 1)) || //the simple common case
+ ((*matched_len == 0) && ((*this_piece).nr_matches>=min)))//to avoid infinite loop
+ {
+ this_piece++;
+ if(this_piece == end_piece)
+ return true;
+ }
+ int matched_len2;
+ retmatch = (*this_piece).piece->match_piece(this_piece, end_piece, source + *matched_len, &matched_len2);
+ if(!retmatch)
+ {
+ (*init_piece).nr_matches--;
+ return false;
+ }
+ *matched_len += matched_len2;
+ return true;
+}
//try every position in source to match the pattern
bool CRegexAscii_regex::match_anywhere(const char *source, unsigned int flags,
@@ -734,6 +1167,7 @@
int *match_pos, int *matched_len)
{
this->flags = flags;
+ this->source_start = source;
reachedEnd = false;
std::vector<CRegexAscii_regex*>::iterator regex_it;
@@ -741,30 +1175,52 @@
{
(*regex_it)->matched_source = NULL;
}
-// if(!source[0])
-// {
-// if(branch_list.empty())
-// return true;
-// else
-// return false;
-// }
-
- bool skip_first_match = false;
- if(*match_pos && align_begin)
- skip_first_match = true;
+
+ std::vector<std::pair<const char*, int> > saved_subregex;
+
+ if(*match_pos && (flags & REGEX_ASCII_WHOLE_MATCH))
+ return false;
+
do
{
- if(!skip_first_match)
- {
- if(match(source + *match_pos, matched_len))
- return true;
- }
- skip_first_match = false;
- if(align_begin)
+ int start_from_branch = 0;
+ int longest_match = -1;
+ while(1)
+ {
+ if(!match(source + *match_pos, &start_from_branch, matched_len, empty_pieces.begin(), empty_pieces.end()))
+ break;
+ if(longest_match < *matched_len)
+ {
+ longest_match = *matched_len;
+ if(start_from_branch && (flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+ save_subregex_list(saved_subregex);
+ }
+ if(!start_from_branch || !(flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+ break;
+ //else try the other branches to see which is longer
+ }
+ if(longest_match != -1)
+ {
+ *matched_len = longest_match;
+ if(saved_subregex.size())
+ load_subregex_list(saved_subregex);
+ if(flags & REGEX_ASCII_WHOLE_MATCH)
+ {
+ if(!source[*match_pos+*matched_len])
+ return true;
+ if((flags & REGEX_ASCII_MULTILINE) &&
+ ((source[*match_pos+*matched_len] == '\n') || (source[*match_pos+*matched_len] == '\r')))
+ return true;
+ return false;
+ }
+ return true;
+ }
+
+ if(flags & REGEX_ASCII_WHOLE_MATCH)
{
if(flags & REGEX_ASCII_MULTILINE)
{
- //goto the next line
+ //go to next line
while(source[*match_pos] && (source[*match_pos] != '\n') && (source[*match_pos] != '\r'))
(*match_pos)++;
if(source[*match_pos] == '\n')
@@ -780,153 +1236,528 @@
(*match_pos)++;
}
if(!source[*match_pos])
- return false;
+ break;
continue;
}
- return false;
+ break;
}
if(!source[*match_pos])
break;
(*match_pos)++;
}
while(source[*match_pos]);
+ if(!source[*match_pos])
+ {
+ reachedEnd = true;
+ }
return false;
}
+void CRegexAscii_regex::reset_match()
+{
+// this->backup_matched_source = this->matched_source;
+// this->backup_matched_len = this->matched_len;
+ this->matched_source = NULL;
+ this->matched_len = 0;
+ std::list<CRegexAscii_branch*>::iterator branch_it;
+ for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
+ {
+ (*branch_it)->reset();
+ }
+}
+/*
+void CRegexAscii_regex::restore_match()
+{
+ this->matched_source = this->backup_matched_source;
+ this->matched_len = this->backup_matched_len;
+ std::list<CRegexAscii_branch*>::iterator branch_it;
+ for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
+ {
+ (*branch_it)->restore();
+ }
+}
+*/
//match any of the branches
-bool CRegexAscii_regex::match(const char *source, int *matched_len)
+bool CRegexAscii_regex::match(const char *source, int *start_from_branch, int *matched_len,
+ std::list<RegexAscii_pieceinfo>::iterator next_piece,
+ std::list<RegexAscii_pieceinfo>::iterator end_piece)
{
reachedEnd = false;
+ if(!(flags & REGEX_ASCII_GROUPING_LEN_WHOLE_PIECE) ||
+ (this->matched_source == NULL) || ((this->matched_source + this->matched_len) != source))
+ this->matched_source = source;
+ *matched_len = 0;
std::list<CRegexAscii_branch*>::iterator branch_it;
- for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
- {
- if((*branch_it)->match(source, matched_len))
- {
- matched_source = source;
- this->matched_len = *matched_len;
+ if(*start_from_branch == 0)
+ {
+ for(branch_it = branch_list.begin(); branch_it != branch_list.end(); branch_it++)
+ {
+ (*branch_it)->reset();
+ }
+ }
+
+ branch_it = branch_list.begin();
+ if(*start_from_branch)
+ {
+ for(int i=0;i<*start_from_branch;i++)
+ branch_it++;
+ }
+ (*start_from_branch)++;
+ for(; branch_it != branch_list.end(); branch_it++,(*start_from_branch)++)
+ {
+ if((*branch_it)->match(source, matched_len, this, next_piece, end_piece))
+ {
+ //matched_source = source;
+ //this->matched_len = *matched_len;
return true;
}
}
- matched_source = NULL;
- matched_len = 0;
+ *start_from_branch = 0;
+ if(this->matched_source == source)
+ this->matched_source = NULL;
+ *matched_len = 0;
return false;
}
+void CRegexAscii_regex::save_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex)
+{
+ saved_subregex.resize(0);
+ saved_subregex.reserve(subregex.size());
+ std::vector<CRegexAscii_regex*>::iterator it;
+ for(it=subregex.begin(); it != subregex.end(); it++)
+ {
+ saved_subregex.push_back(std::pair<const char*, int>((*it)->matched_source, (*it)->matched_len));
+ }
+}
+
+void CRegexAscii_regex::load_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex)
+{
+ std::vector<std::pair<const char*, int> >::iterator it;
+ std::vector<CRegexAscii_regex*>::iterator subit;
+ for(it=saved_subregex.begin(), subit = subregex.begin(); it != saved_subregex.end(); it++, subit++)
+ {
+ (*subit)->matched_source = (*it).first;
+ (*subit)->matched_len = (*it).second;
+ }
+}
+
+void CRegexAscii_branch::reset()
+{
+ std::list<RegexAscii_pieceinfo>::iterator piece_it;
+ for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
+ {
+ (*piece_it).piece->atom->reset_match();
+ }
+}
+/*
+void CRegexAscii_branch::restore()
+{
+ std::list<RegexAscii_pieceinfo>::iterator piece_it;
+ for(piece_it = piece_list.begin(); piece_it != piece_list.end(); piece_it++)
+ {
+ (*piece_it).piece->atom->restore_match();
+ }
+}
+*/
//match all the pieces
-bool CRegexAscii_branch::match(const char *source, int *matched_len)
+bool CRegexAscii_branch::match(const char *source, int *matched_len,
+ CRegexAscii_regex* group_regex,
+ std::list<RegexAscii_pieceinfo>::iterator next_piece,
+ std::list<RegexAscii_pieceinfo>::iterator end_piece)
{
- std::list<CRegexAscii_piece*>::iterator piece_it;
+ std::list<RegexAscii_pieceinfo>::iterator piece_it;
piece_it = piece_list.begin();
+ //if(piece_it == piece_list.end())
+ //if(!source[0])
+ // return true;
+ //else
+ // return false;
if(piece_it == piece_list.end())
- if(source[0])
- return false;
+ {
+ piece_it = next_piece;
+ if(next_piece == end_piece)
+ {
+ group_regex->matched_len = 0;
+ return true;
+ }
+ }
+
+ std::list<RegexAscii_pieceinfo> temp_pieces(piece_list);
+ temp_pieces.push_back(group_regex);//this will be used to store the group match
+ temp_pieces.insert(temp_pieces.end(), next_piece, end_piece);
+
+ return (*piece_it).piece->match_piece(temp_pieces.begin(), temp_pieces.end(), source, matched_len);
+}
+
+bool CRegexAscii_piece::match_piece(std::list<RegexAscii_pieceinfo>::iterator piece_it,
+ std::list<RegexAscii_pieceinfo>::iterator end_it,
+ const char *source, int *matched_len)
+{
+ if((*piece_it).nr_matches < 0)
+ {
+ //special case, store the group match
+ (*piece_it).group_regex->matched_len = source - (*piece_it).group_regex->matched_source;
+ piece_it++;
+ if(piece_it == end_it)
+ return true;
else
- return true;
- if(!(*piece_it)->get_is_reluctant())
- return match_piece_iter_normal(piece_it, source, matched_len);
+ return (*piece_it).piece->match_piece(piece_it, end_it, source, matched_len);
+ }
+
+ if(!get_is_reluctant())
+ return match_piece_iter_normal(piece_it, end_it, source, matched_len);
else
- return match_piece_iter_reluctant(piece_it, source, matched_len);
-}
-
-//match as less as possible
-bool CRegexAscii_branch::match_piece_iter_reluctant(
- std::list<CRegexAscii_piece*>::iterator piece_it,
+ return match_piece_iter_reluctant(piece_it, end_it, source, matched_len);
+}
+
+int CRegexAscii_piece::choose_another_branch(std::vector<std::pair<int,int> > &match_lens)
+{
+ int i = match_lens.size()-1;
+ i--;
+ while((i >= 0) && (match_lens.at(i).second == 0))
+ i--;
+ if(i < 0)
+ return -1;//no more branches
+ match_lens.resize(i+1);
+ i++;
+ return i;
+}
+
+bool CRegexAscii_piece::is_regex_atom()
+{
+ return regex_atom != NULL;
+}
+
+//match as less as possible (shortest string)
+bool CRegexAscii_piece::match_piece_iter_reluctant(
+ std::list<RegexAscii_pieceinfo>::iterator piece_it,
+ std::list<RegexAscii_pieceinfo>::iterator end_it,
const char *source, int *matched_len)
{
*matched_len = 0;
- if(piece_it == piece_list.end())
+ if(piece_it == end_it)
return true;
int min, max;
bool strict_max;
//std::vector<int> match_lens;
- (*piece_it)->get_quantifier(&min, &max, &strict_max);
- if(strict_max && (max >= 0))
+ (*piece_it).piece->get_quantifier(&min, &max, &strict_max);
+
+ std::vector<std::pair<const char*, int> > saved_subregex;
+
+ if(is_regex_atom())
{
- int timeslen;
- //check if the piece doesn't exceed the max match
- if((*piece_it)->match_piece_times(source, ×len, max+1, NULL))
- return false;///too many matches
+ //recursive
+ bool retmatch;
+ atom->regex_intern->save_subregex_list(saved_subregex);
+ if((*piece_it).nr_matches >= min)
+ {
+ //go to next piece
+ std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
+ next_it++;
+ if(next_it == end_it)
+ return true;
+ retmatch = (*next_it).piece->match_piece(next_it, end_it, source, matched_len);
+ if(retmatch)
+ return true;
+ }
+ if(((max == -1) || ((*piece_it).nr_matches < max)) &&//try further with this piece
+ (((*piece_it).nr_matches < min) || ((*piece_it).nr_matches == 0) || ((*piece_it).piece->regex_atom->matched_len)))//if matched_len is zero, avoid infinite loop
+ {
+ int start_from_branch = 0;
+ int shortest_len = -1;
+ bool branch_saved = false;
+ //try all branches to get the shortest len
+ (*piece_it).nr_matches++;
+ while(atom->match(source, &start_from_branch, matched_len, piece_it, end_it))
+ {
+ if((shortest_len == -1) || (shortest_len > *matched_len))
+ {
+ shortest_len = *matched_len;
+ if(start_from_branch && (atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+ {
+ atom->regex_intern->save_subregex_list(saved_subregex);
+ branch_saved = true;
+ }
+ }
+ if(!start_from_branch || !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+ break;
+ }
+ if(shortest_len != -1)
+ {
+ *matched_len = shortest_len;
+ if(branch_saved)
+ atom->regex_intern->load_subregex_list(saved_subregex);
+ return true;
+ }
+ else
+ {
+ (*piece_it).nr_matches--;
+ atom->regex_intern->load_subregex_list(saved_subregex);
+ return false;
+ }
+ }
+ else
+ {
+ atom->regex_intern->load_subregex_list(saved_subregex);
+ return false;
+ }
}
- int i=min;
- std::list<CRegexAscii_piece*>::iterator next_it = piece_it;
+ int i=0;
+ int shortest_len = -1;
+ int otherpieces_shortest = -1;
+ int i_shortest = -1;
+ std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
+ std::vector<std::pair<int,int> > match_lens;
next_it++;
int pieceslen = 0;
while(1)
{
- if((max > 0) && (i>max))
- break;
- int piecelen = 0;
- if((*piece_it)->match_piece_times(source+pieceslen, &piecelen, !pieceslen ? i : 1, NULL))
- {
- pieceslen += piecelen;
+ int piecelen = 0;
+ bool retmatch;
+ retmatch = match_piece_times(source, &piecelen, i < min ? min : i, &match_lens);
+ i = match_lens.size()-1;//number of matches
+ if(i<0)
+ i = 0;
+ if((i>=min))
+ {
+ pieceslen = piecelen;
+ if((shortest_len >= 0) && (shortest_len <= pieceslen))//this branch is longer
+ {//try another branch
+ i = choose_another_branch(match_lens);
+ if(i >= 0)
+ continue;//try another branch
+ else
+ break;
+ }
int otherpieces = 0;
- if((next_it == piece_list.end()) ||
- ((*next_it)->get_is_reluctant() && match_piece_iter_reluctant(next_it, source+pieceslen, &otherpieces)) ||
- (!(*next_it)->get_is_reluctant() && match_piece_iter_normal(next_it, source+pieceslen, &otherpieces)))
- {
- *matched_len = pieceslen + otherpieces;
- return true;
- }
+ if((next_it == end_it) ||
+ (*next_it).piece->match_piece(next_it, end_it, source+pieceslen, &otherpieces)
+ )
+ {
+ if((i == pieceslen) || (match_lens.at(0).second == 0) ||//minimum achieved already, cannot go lower than that
+ !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+ {
+ *matched_len = pieceslen + otherpieces;
+ return true;
+ }
+ if((shortest_len < 0) || (shortest_len > pieceslen))
+ {
+ shortest_len = pieceslen;
+ otherpieces_shortest = otherpieces;
+ i_shortest = i;
+ if(match_lens.at(0).second != 0)
+ atom->regex_intern->save_subregex_list(saved_subregex);
+ }
+ i = choose_another_branch(match_lens);
+ if(i >= 0)
+ continue;//try another branch
+ else
+ break;
+ }
+ else
+ {
+ //try further
+ if(retmatch)
+ {
+ i++;
+ if((max < 0) || (i<=max))
+ continue;
+ i--;
+ }
+ }
+ }
+
+ if(i==0)
+ {
+ break;
}
else
- break;
- i++;
+ {
+ i = choose_another_branch(match_lens);
+ if(i >= 0)
+ continue;//try another branch
+ else
+ break;
+ }
}
+ if(shortest_len >= 0)
+ {
+ if(strict_max && (max>=0) && (i_shortest > max))
+ return false;
+ *matched_len = shortest_len + otherpieces_shortest;
+ if(saved_subregex.size())
+ atom->regex_intern->load_subregex_list(saved_subregex);
+ return true;
+ }
return false;
}
//match as much as possible
-bool CRegexAscii_branch::match_piece_iter_normal(
- std::list<CRegexAscii_piece*>::iterator piece_it,
+bool CRegexAscii_piece::match_piece_iter_normal(
+ std::list<RegexAscii_pieceinfo>::iterator piece_it,
+ std::list<RegexAscii_pieceinfo>::iterator end_it,
const char *source, int *matched_len)
{
*matched_len = 0;
int min, max;
bool strict_max;
- std::vector<int> match_lens;
- (*piece_it)->get_quantifier(&min, &max, &strict_max);
- int timeslen;
- if(strict_max && (max >= 0))
+ std::vector<std::pair<int,int> > match_lens;
+ (*piece_it).piece->get_quantifier(&min, &max, &strict_max);
+ int timeslen = 0;
+ std::vector<std::pair<const char*, int> > saved_subregex;
+
+ if(is_regex_atom())
{
- //check if the piece doesn't exceed the max match
- //if((*piece_it)->match_piece_times(source, ×len, max+1, &match_lens))
- // return false;///too many matches
- (*piece_it)->match_piece_times(source, ×len, max, &match_lens);
+ //recursive
+ bool retmatch;
+ atom->regex_intern->save_subregex_list(saved_subregex);
+ if(((max == -1) || ((*piece_it).nr_matches < max)) && //try further with this piece
+ (((*piece_it).nr_matches < min) || ((*piece_it).nr_matches == 0) || ((*piece_it).piece->regex_atom->matched_len)))//if matched_len is zero, avoid infinite loop
+ {
+ int start_from_branch = 0;
+ int longest_len = -1;
+ bool branch_saved = false;
+ //try all branches to get the longest len
+ (*piece_it).nr_matches++;
+ while(atom->match(source, &start_from_branch, matched_len, piece_it, end_it))
+ {
+ if((longest_len < *matched_len))
+ {
+ longest_len = *matched_len;
+ if(start_from_branch && (atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+ {
+ atom->regex_intern->save_subregex_list(saved_subregex);
+ branch_saved = true;
+ }
+ }
+ if(!start_from_branch || !(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+ break;
+ }
+ if(longest_len != -1)
+ {
+ *matched_len = longest_len;
+ if(branch_saved)
+ atom->regex_intern->load_subregex_list(saved_subregex);
+ return true;
+ }
+ else
+ {
+ atom->regex_intern->load_subregex_list(saved_subregex);
+ (*piece_it).nr_matches--;
+ }
+ }
+ if((*piece_it).nr_matches >= min)
+ {
+ //go to next piece
+ std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
+ next_it++;
+ if(next_it == end_it)
+ return true;
+ retmatch = (*next_it).piece->match_piece(next_it, end_it, source, matched_len);
+ if(!retmatch)
+ atom->regex_intern->load_subregex_list(saved_subregex);
+ return retmatch;
+ }
+ else
+ {
+ // regex_atom->restore_match();
+ atom->regex_intern->load_subregex_list(saved_subregex);
+ return false;
+ }
}
- else if(!strict_max && (max >= 0))
- (*piece_it)->match_piece_times(source, ×len, max, &match_lens);
- else
- (*piece_it)->match_piece_times(source, ×len, -1, &match_lens);
- int i;
- std::list<CRegexAscii_piece*>::iterator next_it = piece_it;
+ int longest_len = -1;
+ int otherpieces_longest = -1;
+ int i_longest = -1;
+ int i = max;
+ std::list<RegexAscii_pieceinfo>::iterator next_it = piece_it;
next_it++;
- if(next_it == piece_list.end())
+
+ bool retmatch;
+ while(1)
{
- if((int)match_lens.size() > min)
- {
- *matched_len = timeslen;
- return true;
+ retmatch = match_piece_times(source, ×len, i, &match_lens);
+ i=match_lens.size()-1;//number of matches
+ if((i>=min))
+ {
+ if(timeslen < longest_len)
+ {//this branch is no use
+ i = choose_another_branch(match_lens);
+ if(i >= 0)
+ {
+ i = max;
+ continue;//try another branch
+ }
+ else
+ break;
+ }
+ //int piecelen = 0;
+ int otherpieces = 0;
+ if((next_it == end_it) ||
+ (*next_it).piece->match_piece(next_it, end_it, source+timeslen, &otherpieces)
+ )
+ {
+ if(timeslen > longest_len)
+ {
+ longest_len = timeslen;
+ otherpieces_longest = otherpieces;
+ i_longest = i;
+ if(!(atom->regex_intern->flags & REGEX_ASCII_GET_LONGEST_BRANCH))
+ {
+ *matched_len = longest_len + otherpieces_longest;
+ return true;
+ }
+ else
+ {
+ if(match_lens.at(0).second)
+ atom->regex_intern->save_subregex_list(saved_subregex);
+ }
+ }
+ }
+ else
+ {
+ if(!match_lens.at(0).second)
+ {
+ match_lens.resize(match_lens.size()-1);
+ i--;
+ if(i >= 0)
+ continue;//try smaller
+ else
+ break;
+ }
+ else
+ {
+ i = choose_another_branch(match_lens);
+ if(i >= 0)
+ continue;//try another branch
+ else
+ break;
+ }
+ }
+ }
+ //now try another branch
+ i = choose_another_branch(match_lens);
+ if(i >= 0)
+ {
+ i = max;
+ continue;//try another branch
}
else
- return false;
- }
- for(i=match_lens.size()-1; i>=min; i--)
+ break;
+ }//end while
+
+ if(longest_len >= 0)
{
- int piecelen = 0;
- int otherpieces = 0;
- if(((*next_it)->get_is_reluctant() && match_piece_iter_reluctant(next_it, source+match_lens[i]+piecelen, &otherpieces)) ||
- (!(*next_it)->get_is_reluctant() && match_piece_iter_normal(next_it, source+match_lens[i]+piecelen, &otherpieces)))
- {
- *matched_len = match_lens[i] + piecelen + otherpieces;
- return true;
- }
+ *matched_len = longest_len + otherpieces_longest;
+ if(saved_subregex.size())
+ atom->regex_intern->load_subregex_list(saved_subregex);
+ return true;
}
return false;
@@ -935,31 +1766,68 @@
bool CRegexAscii_piece::match_piece_times(const char *source,
int *piecelen,
int times,
- std::vector<int> *match_lens)
+ std::vector<std::pair<int,int> > *match_lens)
{
- *piecelen = 0;
- for(int i=0;(times < 0) || (i<times);i++)
- {
+ int i=0;
+ if(match_lens && match_lens->size())
+ {
+ i = match_lens->size()-1;
+ }
+ if(match_lens && match_lens->size())
+ *piecelen = match_lens->at(match_lens->size()-1).first;
+ else
+ *piecelen = 0;
+ if((times >= 0) && (i>=times))
+ return true;
+ for(;(times < 0) || (i<times);i++)
+ {
+ int atomlen;
+ int start_from_branch = 0;
+ if(match_lens && (i<(int)match_lens->size()))
+ start_from_branch = match_lens->at(i).second;
+ bool first_branch = (start_from_branch == 0);
+ if(!atom->match(source+*piecelen, &start_from_branch, &atomlen, empty_pieces.begin(), empty_pieces.end()))
+ {
+ if(match_lens)
+ {
+ if(i >= (int)match_lens->size())
+ match_lens->push_back(std::pair<int,int>(*piecelen, 0));
+ else
+ (*match_lens)[i] = std::pair<int,int>(*piecelen, 0);
+ }
+ return false;
+ }
if(match_lens)
- match_lens->push_back(*piecelen);
- int atomlen;
- if(!atom->match(source+*piecelen, &atomlen))
- return false;
+ {
+ if(i >= (int)match_lens->size())
+ match_lens->push_back(std::pair<int,int>(*piecelen, start_from_branch));
+ else
+ (*match_lens)[i] = std::pair<int,int>(*piecelen, start_from_branch);
+ }
*piecelen += atomlen;
if(!atomlen && !source[*piecelen])
{
atom->regex_intern->reachedEnd = true;
break;
}
+ if(first_branch && (atomlen == 0))//avoid infinite loop
+ {
+ break;
+ }
}
if(match_lens)
- match_lens->push_back(*piecelen);
+ {
+ // if(i >= match_lens->size())
+ match_lens->push_back(std::pair<int,int>(*piecelen, 0));
+ // else
+ // (*match_lens)[i] = std::pair<int,int>(*piecelen, 0);
+ }
return true;
}
//match any of chargroups
-bool CRegexAscii_chargroup::match(const char *source, int *matched_len)
+bool CRegexAscii_chargroup::match_internal(const char *source, int *start_from_branch, int *matched_len)
{
*matched_len = 0;
std::list<chargroup_t>::iterator cgt_it;
@@ -975,26 +1843,184 @@
return false;
}
- if(source[0] == 0x0A)
+ if((source[0] == 0x0A) || ((source[0] == 0x0D) && (source[1] == 0x0A)))
{
if((regex_intern->flags & REGEX_ASCII_MULTILINE) &&
(chargroup_list.size() == 1) && (chargroup_list.begin()->flags == CHARGROUP_FLAGS_ENDLINE))
{
- *matched_len = 1;
+ //*matched_len = 1;
return true;
}
}
+ bool found = false;
for(cgt_it = chargroup_list.begin(); cgt_it != chargroup_list.end(); cgt_it++)
{
- if(cgt_it->flags == CHARGROUP_FLAGS_MULTICHAR)
- {
- switch(cgt_it->c1)
- {
- case 'p'://catEsc
- case 'P'://complEsc
- //ignore the prop for now
- throw XQUERY_EXCEPTION( err::FORX0002 );
+ switch(cgt_it->flags&0x7F)
+ {
+ case CHARGROUP_FLAGS_MULTICHAR_p:
+ switch(cgt_it->c1)
+ {
+ case unicode::UNICODE_Ll + 50:
+ if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Ll) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Lm) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Lo) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Lt) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Lu))
+ {
+ if(!(cgt_it->flags & 0x80))
+ found = true;
+ }
+ else
+ {
+ if(cgt_it->flags & 0x80)
+ found = true;
+ }
+ break;
+ case unicode::UNICODE_Mc + 50:
+ if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Mn) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Mc) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Me))
+ {
+ if(!(cgt_it->flags & 0x80))
+ found = true;
+ }
+ else
+ {
+ if(cgt_it->flags & 0x80)
+ found = true;
+ }
+ break;
+ case unicode::UNICODE_Nd + 50:
+ if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Nd) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Nl) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_No))
+ {
+ if(!(cgt_it->flags & 0x80))
+ found = true;
+ }
+ else
+ {
+ if(cgt_it->flags & 0x80)
+ found = true;
+ }
+ break;
+ case unicode::UNICODE_Pc + 50:
+ if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Pc) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Pd) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Ps) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Pe) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Pi) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Pf) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Po))
+ {
+ if(!(cgt_it->flags & 0x80))
+ found = true;
+ }
+ else
+ {
+ if(cgt_it->flags & 0x80)
+ found = true;
+ }
+ break;
+ case unicode::UNICODE_Zl + 50:
+ if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Zs) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Zl) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Zp))
+ {
+ if(!(cgt_it->flags & 0x80))
+ found = true;
+ }
+ else
+ {
+ if(cgt_it->flags & 0x80)
+ found = true;
+ }
+ break;
+ case unicode::UNICODE_Sc + 50:
+ if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Sm) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Sc) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Sk) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_So))
+ {
+ if(!(cgt_it->flags & 0x80))
+ found = true;
+ }
+ else
+ {
+ if(cgt_it->flags & 0x80)
+ found = true;
+ }
+ break;
+ case unicode::UNICODE_Cc + 50:
+ if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Cc) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Cf) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Co))//ignore unicode::UNICODE_Cn
+ {
+ if(!(cgt_it->flags & 0x80))
+ found = true;
+ }
+ else
+ {
+ if(cgt_it->flags & 0x80)
+ found = true;
+ }
+ break;
+ default:
+ if(unicode::check_codepoint_category(source[0], (unicode::category)cgt_it->c1))
+ {
+ if(!(cgt_it->flags & 0x80))
+ found = true;
+ }
+ else
+ {
+ if(cgt_it->flags & 0x80)
+ found = true;
+ }
+ break;
+ }break;
+ case CHARGROUP_FLAGS_MULTICHAR_Is:
+ {
+ const unicode::code_point *cp = block_escape[cgt_it->c1].cp;
+ if(((unicode::code_point)source[0] >= cp[0]) &&
+ ((unicode::code_point)source[0] <= cp[1]))
+ {
+ if(!(cgt_it->flags & 0x80))
+ found = true;
+ }
+ else if(block_escape[cgt_it->c1].ext_cp)
+ {
+ cp = block_escape[cgt_it->c1].ext_cp;
+ while(*cp)
+ {
+ if(((unicode::code_point)source[0] >= cp[0]) &&
+ ((unicode::code_point)source[0] <= cp[1]))
+ break;
+ cp += 2;
+ }
+ if(*cp)
+ {
+ if(!(cgt_it->flags & 0x80))
+ found = true;
+ }
+ else
+ {
+ if(cgt_it->flags & 0x80)
+ found = true;
+ }
+ }
+ else
+ {
+ if(cgt_it->flags & 0x80)
+ found = true;
+ }
+ }break;
+ case CHARGROUP_FLAGS_MULTICHAR_OTHER:
+ {
+ bool value_true = true;
+ switch(cgt_it->c1)
+ {
+ case 'S':value_true = false;//[^\s]
case 's'://[#x20\t\n\r]
switch(source[0])
{
@@ -1002,86 +2028,100 @@
case '\r':
case '\n':
case ' ':
- *matched_len = 1;
- return true;
- default:
- return false;
- }
- case 'S'://[^\s]
- switch(source[0])
- {
- case 0:
- regex_intern->reachedEnd = true;
- case '\t':
- case '\r':
- case '\n':
- case ' ':
- return false;
- default:
- *matched_len = 1;
- return true;
- }
+ found = true;
+ default:
+ break;
+ }
+ break;
+ case 'I':value_true = false;//[^\i]
case 'i'://the set of initial name characters, those matched by Letter | '_' | ':'
if((source[0] == '_') ||
(source[0] == ':') ||
XQCharType::isLetter(source[0]))
{
- *matched_len = 1;
- return true;
- }
- return false;
- case 'I':
- if((source[0] == '_') ||
- (source[0] == ':') ||
- XQCharType::isLetter(source[0]))
- {
- return false;
- }
- *matched_len = 1;
- return true;
+ found = true;
+ }
+ break;
+ case 'C':value_true = false;//[^\c]
case 'c'://the set of name characters, those matched by NameChar
if(XQCharType::isNameChar(source[0]))
{
- *matched_len = 1;
- return true;
- }
- return false;
- case 'C':
- if(XQCharType::isNameChar(source[0]))
- {
- return false;
- }
- *matched_len = 1;
- return true;
+ found = true;
+ }
+ break;
+ case 'D':value_true = false;//[^\d]
case 'd':
- case 'D':
+ if(unicode::check_codepoint_category(source[0], unicode::UNICODE_Nd))
+ found = true;
+ break;
+ case 'W':value_true = false;//[^\w]
case 'w':
- case 'W':
+ found = !(unicode::check_codepoint_category(source[0], unicode::UNICODE_Pc) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Pd) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Ps) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Pe) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Pi) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Pf) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Po) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Zs) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Zl) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Zp) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Cc) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Cf) ||
+ unicode::check_codepoint_category(source[0], unicode::UNICODE_Co));//ignore unicode::UNICODE_Cn
+ break;
default:
- throw XQUERY_EXCEPTION( err::FORX0002 );
- }
- return false;
- }
- else if(cgt_it->flags == CHARGROUP_FLAGS_ENDLINE)
- {
- return false;
- }
- else
+ throw XQUERY_EXCEPTION( err::FORX0002, ERROR_PARAMS(source, ZED(REGEX_UNIMPLEMENTED)) );
+ }
+ if((found && value_true) || (!found && !value_true))
+ {
+ if(!source[0])
+ regex_intern->reachedEnd = true;
+ *matched_len = 1;
+ return true;
+ }
+ else
+ return false;
+ }
+ case CHARGROUP_FLAGS_ENDLINE:
+ {
+ return false;
+ }
+ case CHARGROUP_FLAGS_ONECHAR:
+ {
+ if(regex_intern->flags & REGEX_ASCII_CASE_INSENSITIVE)
+ {
+ char sup = toupper(source[0]);
+ if(sup == toupper(cgt_it->c1))
+ found = true;
+ }
+ else
+ {
+ if(source[0] == cgt_it->c1)
+ found = true;
+ }
+ break;
+ }
+ default:
{
if(regex_intern->flags & REGEX_ASCII_CASE_INSENSITIVE)
{
char sup = toupper(source[0]);
if((sup >= toupper(cgt_it->c1)) &&
(sup <= toupper(cgt_it->c2)))
- break;
+ found = true;
}
else
{
if((source[0] >= cgt_it->c1) &&
(source[0] <= cgt_it->c2))
- break;
+ found = true;
}
- }
+ break;
+ }
+ }
+ if(found)
+ break;
}
if(cgt_it == chargroup_list.end())
return false;
@@ -1089,7 +2129,7 @@
if(classsub)
{
int classsub_len;
- if(classsub->match(source, &classsub_len))
+ if(classsub->match_internal(source, NULL, &classsub_len))
return false;
}
@@ -1097,14 +2137,14 @@
return true;
}
-bool CRegexAscii_negchargroup::match(const char *source, int *matched_len)
+bool CRegexAscii_negchargroup::match_internal(const char *source, int *start_from_branch, int *matched_len)
{
if(!source[0])
{
regex_intern->reachedEnd = true;
return false;
}
- if(!CRegexAscii_chargroup::match(source, matched_len))
+ if(!CRegexAscii_chargroup::match_internal(source, start_from_branch, matched_len))
{
*matched_len = 1;
return true;
@@ -1112,7 +2152,7 @@
return false;
}
-bool CRegexAscii_wildchar::match(const char *source, int *matched_len)
+bool CRegexAscii_wildchar::match_internal(const char *source, int *start_from_branch, int *matched_len)
{
*matched_len = 0;
if(source[0])
@@ -1135,7 +2175,7 @@
}
}
-bool CRegexAscii_backref::match(const char *source, int *matched_len)
+bool CRegexAscii_backref::match_internal(const char *source, int *start_from_branch, int *matched_len)
{
const char *submatch = regex_intern->subregex.at(backref-1)->matched_source;
if(!submatch)
@@ -1144,11 +2184,33 @@
return true;
}
*matched_len = regex_intern->subregex.at(backref-1)->matched_len;
- if(!strncmp(source, submatch, *matched_len))
- {
- return true;
- }
- *matched_len = 0;
+ if(regex_intern->flags & REGEX_ASCII_CASE_INSENSITIVE)
+ {
+ if(compare_ni(source, submatch, *matched_len))
+ {
+ return true;
+ }
+ }
+ else
+ {
+ if(!strncmp(source, submatch, *matched_len))
+ {
+ return true;
+ }
+ }
+ *matched_len = 0;
+ return false;
+}
+
+bool CRegexAscii_pinstart::match_internal(const char *source, int *start_from_branch, int *matched_len)
+{
+ *matched_len = 0;
+ if(source == regex_intern->source_start)
+ return true;
+ if((regex_intern->flags & REGEX_ASCII_MULTILINE) &&
+ ((source[-1] == '\n') || (source[-1] == '\r')))
+ return true;
+
return false;
}
=== modified file 'src/util/regex_ascii.h'
--- src/util/regex_ascii.h 2011-07-18 14:25:21 +0000
+++ src/util/regex_ascii.h 2011-10-28 02:34:26 +0000
@@ -21,35 +21,55 @@
#include <vector>
#include <zorba/config.h>
+#include "util/unicode_util.h"
namespace zorba {
namespace regex_ascii{
//matching flags
-#define REGEX_ASCII_CASE_INSENSITIVE 1
-#define REGEX_ASCII_DOTALL 2
-#define REGEX_ASCII_MULTILINE 4
-#define REGEX_ASCII_COMMENTS 8
-#define REGEX_ASCII_LITERAL 16
+#define REGEX_ASCII_CASE_INSENSITIVE 1 //i
+#define REGEX_ASCII_DOTALL 2 //s
+#define REGEX_ASCII_MULTILINE 4 //m
+#define REGEX_ASCII_NO_WHITESPACE 8 //x
+#define REGEX_ASCII_LITERAL 16 //q
+
+#define REGEX_ASCII_GET_LONGEST_BRANCH 32 //try all branches and get the longest match (or shortest for reluctant pieces)
+#define REGEX_ASCII_MINIMAL_MATCH 64 //consider all pieces as reluctant
+#define REGEX_ASCII_WHOLE_MATCH 128 //match only all string, like having "^regex$"
+#define REGEX_ASCII_GROUPING_LEN_WHOLE_PIECE 256 //compute the len of a grouping as for the whole piece ( for example (a)+ when matching "aa" and referred as $1 will get string len 2 instead of last 1)
class CRegexAscii_regex;
-
-class IRegexMatcher
-{
-public:
+class CRegexAscii_piece;
+
+struct RegexAscii_pieceinfo
+{
+ union
+ {
+ CRegexAscii_piece* piece;
+ CRegexAscii_regex* group_regex;
+ };
+ int nr_matches;
+
+ RegexAscii_pieceinfo(CRegexAscii_piece* piece) {nr_matches=0;this->piece=piece;}
+ RegexAscii_pieceinfo(CRegexAscii_regex* group_regex) {nr_matches=-1;this->group_regex=group_regex;}
+};
+
+
+class IRegexAtom
+{
+protected:
+ friend class CRegexAscii_piece;
CRegexAscii_regex *regex_intern;
public:
- IRegexMatcher(CRegexAscii_regex* regex) : regex_intern(regex) {}
- virtual ~IRegexMatcher() {}
-
- virtual bool match(const char *source, int *matched_len) = 0;
-};
-
-class IRegexAtom : public IRegexMatcher
-{
-public:
- IRegexAtom(CRegexAscii_regex* regex) : IRegexMatcher(regex) {}
+ IRegexAtom(CRegexAscii_regex* regex) : regex_intern(regex) {}
virtual ~IRegexAtom() {}
+
+ virtual bool match(const char *source, int *start_from_branch, int *matched_len,
+ std::list<RegexAscii_pieceinfo>::iterator next_piece,
+ std::list<RegexAscii_pieceinfo>::iterator end_piece);
+ virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len) = 0;
+ virtual void reset_match() {}
+// virtual void restore_match() {}
};
class CRegexAscii_branch;
@@ -66,58 +86,74 @@
friend class CRegexAscii_negchargroup;
friend class CRegexAscii_wildchar;
friend class CRegexAscii_backref;
+ friend class CRegexAscii_pinstart;
public:
CRegexAscii_regex(CRegexAscii_regex *);
virtual ~CRegexAscii_regex();
bool match_anywhere(const char *source, unsigned int flags, int *match_pos, int *matched_len);
bool match_from(const char *source, unsigned int flags, int *match_pos, int *matched_len);
- virtual bool match(const char *source, int *matched_len);
//for replace $1, $2 ...
bool get_indexed_match(int index, const char **matched_source, int *matched_len);
unsigned int get_indexed_regex_count();
bool get_reachedEnd() {return reachedEnd;}
- bool set_align_begin(bool align_begin);
+public:
+ virtual bool match(const char *source, int *start_from_branch, int *matched_len,
+ std::list<RegexAscii_pieceinfo>::iterator next_piece,
+ std::list<RegexAscii_pieceinfo>::iterator end_piece);
+ virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len) {return false;}//not impl
+ virtual void reset_match();
+// virtual void restore_match();
private:
void add_branch(CRegexAscii_branch *branch);
+
+ void save_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex);
+ void load_subregex_list(std::vector<std::pair<const char*, int> > &saved_subregex);
private:
unsigned int flags;
std::list<CRegexAscii_branch*> branch_list;
- bool align_begin;
+
+ const char *source_start;
const char *matched_source;
int matched_len;
+// const char *backup_matched_source;
+// int backup_matched_len;
std::vector<CRegexAscii_regex*> subregex;//for grouping
bool reachedEnd;
};
-class CRegexAscii_branch : public IRegexMatcher
+class CRegexAscii_branch
{
friend class CRegexAscii_parser;
public:
CRegexAscii_branch(CRegexAscii_regex* regex);
~CRegexAscii_branch();
- virtual bool match(const char *source, int *matched_len);
+ bool match(const char *source, int *matched_len,
+ CRegexAscii_regex* group_regex,
+ std::list<RegexAscii_pieceinfo>::iterator next_piece,
+ std::list<RegexAscii_pieceinfo>::iterator end_piece);
+ void reset();
+// void restore();
private:
- std::list<CRegexAscii_piece*> piece_list;
+ std::list<RegexAscii_pieceinfo> piece_list;
private:
void add_piece(CRegexAscii_piece *piece);
- bool match_piece_iter_reluctant(std::list<CRegexAscii_piece*>::iterator piece_it,
- const char *source, int *matched_len);
- bool match_piece_iter_normal(std::list<CRegexAscii_piece*>::iterator piece_it,
- const char *source, int *matched_len);
};
class CRegexAscii_piece //: public IRegexMatcher
{
friend class CRegexAscii_parser;
-public:
+ friend class CRegexAscii_branch;
+
IRegexAtom *atom;
+ CRegexAscii_regex *regex_atom;
+
//quantifier
bool strict_max;
int min;
@@ -134,14 +170,35 @@
void get_quantifier(int *min, int *max, bool *strict_max);
bool get_is_reluctant();
// bool match(const char *source, int *matched_len);
+ bool match_piece(std::list<RegexAscii_pieceinfo>::iterator next_piece,
+ std::list<RegexAscii_pieceinfo>::iterator end_piece,
+ const char *source, int *matched_len);
+protected:
bool match_piece_times(const char *source,
int *piecelen,
int times,
- std::vector<int> *match_lens);
-};
-
-#define CHARGROUP_FLAGS_MULTICHAR 1
-#define CHARGROUP_FLAGS_ENDLINE 2
+ std::vector<std::pair<int,int> > *match_lens);
+ int choose_another_branch(std::vector<std::pair<int,int> > &match_lens);
+ bool match_piece_iter_reluctant(std::list<RegexAscii_pieceinfo>::iterator next_piece,
+ std::list<RegexAscii_pieceinfo>::iterator end_piece,
+ const char *source, int *matched_len);
+ bool match_piece_iter_normal(std::list<RegexAscii_pieceinfo>::iterator next_piece,
+ std::list<RegexAscii_pieceinfo>::iterator end_piece,
+ const char *source, int *matched_len);
+ bool is_regex_atom();
+};
+
+
+enum CHARGROUP_t
+{
+CHARGROUP_NO_MULTICHAR = 0,
+CHARGROUP_FLAGS_CHAR_RANGE,
+CHARGROUP_FLAGS_MULTICHAR_p,
+CHARGROUP_FLAGS_MULTICHAR_Is,
+CHARGROUP_FLAGS_MULTICHAR_OTHER,
+CHARGROUP_FLAGS_ONECHAR,
+CHARGROUP_FLAGS_ENDLINE
+};
class CRegexAscii_chargroup : public IRegexAtom
{
@@ -152,19 +209,20 @@
private:
typedef struct
{
- unsigned char flags;
+ CHARGROUP_t flags;
char c1;
char c2;
}chargroup_t;
std::list<chargroup_t> chargroup_list;
CRegexAscii_chargroup *classsub;
public:
- void addMultiChar(char c);
+ void addMultiChar(char c, CHARGROUP_t multichar_type);
void addEndLine();
void addCharRange(char c1, char c2);
+ void addOneChar(char c);
void addClassSub(CRegexAscii_chargroup* classsub);
- virtual bool match(const char *source, int *matched_len);
+ virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len);
};
class CRegexAscii_negchargroup : public CRegexAscii_chargroup
@@ -173,7 +231,7 @@
CRegexAscii_negchargroup(CRegexAscii_regex* regex);
virtual ~CRegexAscii_negchargroup();
- virtual bool match(const char *source, int *matched_len);
+ virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len);
};
class CRegexAscii_wildchar : public IRegexAtom
@@ -182,7 +240,7 @@
CRegexAscii_wildchar(CRegexAscii_regex* regex);
virtual ~CRegexAscii_wildchar();
- virtual bool match(const char *source, int *matched_len);
+ virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len);
};
class CRegexAscii_backref : public IRegexAtom
@@ -191,14 +249,29 @@
CRegexAscii_backref(CRegexAscii_regex* regex, unsigned int backref);
virtual ~CRegexAscii_backref();
- virtual bool match(const char *source, int *matched_len);
+ virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len);
private:
unsigned int backref;
};
+class CRegexAscii_pinstart : public IRegexAtom
+{
+public:
+ CRegexAscii_pinstart(CRegexAscii_regex* regex);
+
+ virtual bool match_internal(const char *source, int *start_from_branch, int *matched_len);
+};
+
class CRegexAscii_parser
{
public:
+ typedef struct
+ {
+ const unicode::code_point cp[2];//in pairs start, end
+ const unicode::code_point *ext_cp;
+ const char *group_name;
+ }block_escape_t;
+
CRegexAscii_parser();
~CRegexAscii_parser();
@@ -211,7 +284,8 @@
CRegexAscii_piece* parse_piece(const char *pattern, int *piece_len);
char myishex(char c);
bool myisdigit(char c);
- char readChar(const char *pattern, int *char_len, bool *is_multichar);
+ bool myisletterAZ(char c);
+ char readChar(const char *pattern, bool for_atom, int *char_len, CHARGROUP_t *multichar_type);
IRegexAtom* read_atom(const char *pattern, int *atom_len);
CRegexAscii_chargroup* readchargroup(const char *pattern, int *chargroup_len);
void read_quantifier(CRegexAscii_piece *piece, const char *pattern, int *quantif_len);
@@ -222,7 +296,8 @@
unsigned int flags;
};
-}}//end namespace zorba::regex_ascii
+}
+}//end namespace zorba::regex_ascii
#endif
/* vim:set et sw=2 ts=2: */
=== modified file 'src/util/string_util.cpp'
--- src/util/string_util.cpp 2011-07-07 18:48:27 +0000
+++ src/util/string_util.cpp 2011-10-28 02:34:26 +0000
@@ -121,6 +121,13 @@
return result;
}
+uint32_t hash( char const *data, size_t len ) {
+ uint32_t result = 5381;
+ for ( size_t i = 0; i < len; ++i )
+ result = (result << 5) + result + *data++;
+ return result;
+}
+
char* itoa( long long n, char *buf ) {
//
// This implementation is much faster than using sprintf(3).
=== modified file 'src/util/string_util.h'
--- src/util/string_util.h 2011-10-11 17:59:20 +0000
+++ src/util/string_util.h 2011-10-28 02:34:26 +0000
@@ -138,6 +138,17 @@
*/
#define BUILD_STRING(...) (::zorba::ztd::string_builder() << __VA_ARGS__)
+////////// String hash /////////////////////////////////////////////////////////
+
+/**
+ * Performs a hash of the given data.
+ *
+ * @param data A pointer to the start of the data.
+ * @param len The length of the data in bytes.
+ * @return Returns said hash.
+ */
+uint32_t hash( char const *data, size_t len );
+
////////// String equality /////////////////////////////////////////////////////
/**
=== modified file 'src/util/unicode_categories.cpp'
--- src/util/unicode_categories.cpp 2011-06-14 17:26:33 +0000
+++ src/util/unicode_categories.cpp 2011-10-28 02:34:26 +0000
@@ -65812,7 +65812,7 @@
{ 0x100000, 0x100000, UNICODE_Co},
};
-bool check_codepoint_category(code_point cp, UnicodeCategoriesEnum categ)
+bool check_codepoint_category(code_point cp, category categ)
{
if(cp < 0x10000)
return codepoints_categories[cp] == categ;
@@ -65824,10 +65824,10 @@
if(cp >= codepoints_categories2[i].cp1)
return codepoints_categories2[i].category == categ;
else
- return false;
+ return categ ? false : true;
}
}
- return false;
+ return categ ? false : true;
}
/*
=== modified file 'src/util/unicode_categories.h'
--- src/util/unicode_categories.h 2011-06-14 17:26:33 +0000
+++ src/util/unicode_categories.h 2011-10-28 02:34:26 +0000
@@ -22,46 +22,53 @@
namespace zorba {
namespace unicode {
-//Unicode codepoint categories, as from http://www.fileformat.info/info/unicode/category/index.htm
+///////////////////////////////////////////////////////////////////////////////
-enum UnicodeCategoriesEnum {
-UNICODE_Cc, //Other, Control
-UNICODE_Cf, //Other, Format
-UNICODE_Co, //Other, Private Use
-UNICODE_Cs, //Other, Surrogate
-UNICODE_Ll, //Letter, Lowercase
-UNICODE_Lm, //Letter, Modifier
-UNICODE_Lo, //Letter, Other
-UNICODE_Lt, //Letter, Titlecase
-UNICODE_Lu, //Letter, Uppercase
-UNICODE_Mc, //Mark, Spacing Combining
-UNICODE_Me, //Mark, Enclosing
-UNICODE_Mn, //Mark, Nonspacing
-UNICODE_Nd, //Number, Decimal Digit
-UNICODE_Nl, //Number, Letter
-UNICODE_No, //Number, Other
-UNICODE_Pc, //Punctuation, Connector
-UNICODE_Pd, //Punctuation, Dash
-UNICODE_Pe, //Punctuation, Close
-UNICODE_Pf, //Punctuation, Final quote (may behave like Ps or Pe depending on usage)
-UNICODE_Pi, //Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
-UNICODE_Po, //Punctuation, Other
-UNICODE_Ps, //Punctuation, Open
-UNICODE_Sc, //Symbol, Currency
-UNICODE_Sk, //Symbol, Modifier
-UNICODE_Sm, //Symbol, Math
-UNICODE_So, //Symbol, Other
-UNICODE_Zl, //Separator, Line
-UNICODE_Zp, //Separator, Paragraph
-UNICODE_Zs //Separator, Space
+/**
+ * Unicode codepoint categories.
+ * See: http://www.fileformat.info/info/unicode/category/
+ */
+enum category {
+ UNICODE_Cn, // Not Assigned
+ UNICODE_Cc, // Other, Control
+ UNICODE_Cf, // Other, Format
+ UNICODE_Co, // Other, Private Use
+ UNICODE_Cs, // Other, Surrogate
+ UNICODE_Ll, // Letter, Lowercase
+ UNICODE_Lm, // Letter, Modifier
+ UNICODE_Lo, // Letter, Other
+ UNICODE_Lt, // Letter, Titlecase
+ UNICODE_Lu, // Letter, Uppercase
+ UNICODE_Mc, // Mark, Spacing Combining
+ UNICODE_Me, // Mark, Enclosing
+ UNICODE_Mn, // Mark, Nonspacing
+ UNICODE_Nd, // Number, Decimal Digit
+ UNICODE_Nl, // Number, Letter
+ UNICODE_No, // Number, Other
+ UNICODE_Pc, // Punctuation, Connector
+ UNICODE_Pd, // Punctuation, Dash
+ UNICODE_Pe, // Punctuation, Close
+ UNICODE_Pf, // Punctuation, Final quote (like Ps or Pe depending on usage)
+ UNICODE_Pi, // Punctuation, Initial quote (like Ps or Pe depending on usage)
+ UNICODE_Po, // Punctuation, Other
+ UNICODE_Ps, // Punctuation, Open
+ UNICODE_Sc, // Symbol, Currency
+ UNICODE_Sk, // Symbol, Modifier
+ UNICODE_Sm, // Symbol, Math
+ UNICODE_So, // Symbol, Other
+ UNICODE_Zl, // Separator, Line
+ UNICODE_Zp, // Separator, Paragraph
+ UNICODE_Zs // Separator, Space
};
bool is_UnicodeNd(code_point cp, code_point *ret_zero);
-bool check_codepoint_category(code_point cp, UnicodeCategoriesEnum categ);
-
-}
-}
-
-#endif
+bool check_codepoint_category(code_point cp, category categ);
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace unicode
+} // namespaec zorba
+
+#endif /* ZORBA_UNICODE_CATEGORIES */
/* vim:set et sw=2 ts=2: */
=== modified file 'src/util/unicode_util.cpp'
--- src/util/unicode_util.cpp 2011-07-17 00:10:56 +0000
+++ src/util/unicode_util.cpp 2011-10-28 02:34:26 +0000
@@ -22,15 +22,19 @@
#include <functional> /* for binary_function */
#include <utility> /* for pair */
-#include <unicode/normlzr.h>
-#include <unicode/ustring.h>
+#ifndef ZORBA_NO_ICU
+# include <unicode/normlzr.h>
+# include <unicode/ustring.h>
+#endif /* ZORBA_NO_ICU */
#include "cxx_util.h"
#include "unicode_util.h"
#include "utf8_util.h"
using namespace std;
+#ifndef ZORBA_NO_ICU
U_NAMESPACE_USE
+#endif /* ZORBA_NO_ICU */
namespace zorba {
namespace unicode {
@@ -2208,6 +2212,8 @@
return to_case<upper>( c );
}
+#ifndef ZORBA_NO_ICU
+
bool normalize( string const &in, normalization::type n, string *out ) {
UErrorCode status = U_ZERO_ERROR;
UNormalizationMode icu_mode;
@@ -2230,8 +2236,11 @@
return U_SUCCESS( status ) == TRUE;
}
+#endif /* ZORBA_NO_ICU */
+
bool to_string( char const *in, size_type in_len, char_type **out,
size_type *out_len ) {
+#ifndef ZORBA_NO_ICU
size_type utf16_len;
UErrorCode status = U_ZERO_ERROR;
u_strFromUTF8WithSub( // pre-flight to get utf16_len
@@ -2250,9 +2259,16 @@
}
*out = utf16_buf;
*out_len = utf16_len;
+#else
+ *out = new char_type[ in_len + 1 ];
+ *out_len = in_len;
+ ::strncpy( *out, in, *out_len );
+#endif /* ZORBA_NO_ICU */
return true;
}
+#ifndef ZORBA_NO_ICU
+
bool to_string( char const *in, size_type in_len, string *out ) {
char_type *const buf = out->getBuffer( in_len + 1 );
size_type buf_len;
@@ -2271,6 +2287,8 @@
return U_SUCCESS( status ) == TRUE;
}
+#endif /* ZORBA_NO_ICU */
+
///////////////////////////////////////////////////////////////////////////////
} // namespace unicode
=== modified file 'src/util/unicode_util.h'
--- src/util/unicode_util.h 2011-07-18 14:25:21 +0000
+++ src/util/unicode_util.h 2011-10-28 02:34:26 +0000
@@ -19,12 +19,18 @@
#include <zorba/config.h>
-#ifndef ZORBA_NO_UNICODE
-
#include <cctype>
#include <cstring>
#include <cwchar>
-#include <unicode/unistr.h>
+
+#include <zorba/internal/ztd.h>
+
+#ifdef ZORBA_NO_ICU
+# include "zorbamisc/config/stdint.h"
+# include "zorbatypes/zstring.h"
+#else
+# include <unicode/unistr.h>
+#endif /* ZORBA_NO_ICU */
#include "stl_util.h"
@@ -37,13 +43,21 @@
* The character type that can hold a Unicode character encoded in UTF-16. Do
* not assume that this is an unsigned type.
*/
-typedef UChar char_type;
+#ifdef ZORBA_NO_ICU
+ typedef char char_type;
+#else
+ typedef /* ICU's */ UChar char_type;
+#endif /* ZORBA_NO_ICU */
/**
* The type type that can hold a Unicode code-point. Do not assume that this
* is an unsigned type.
*/
-typedef UChar32 code_point;
+#ifdef ZORBA_NO_ICU
+typedef uint32_t code_point;
+#else
+typedef /* ICU's */ UChar32 code_point;
+#endif /* ZORBA_NO_ICU */
typedef int32_t size_type;
@@ -60,10 +74,17 @@
};
}
+#ifndef ZORBA_NO_ICU
/**
* A Unicode string.
*/
typedef U_NAMESPACE_QUALIFIER UnicodeString string;
+#else
+/**
+ * Since there is no ICU, just use a zstring as a "Unicode" string.
+ */
+typedef zstring string;
+#endif /* ZORBA_NO_ICU */
////////// code-point checking ////////////////////////////////////////////////
@@ -100,7 +121,7 @@
return ascii_c == c && isspace( ascii_c );
#else
return isspace( c );
-#endif
+#endif /* WIN32 */
}
/**
@@ -119,8 +140,10 @@
* @param c The code-point to check.
* @return Returns \c true only if the code-point is valid.
*/
-template<class CodePointType>
-inline bool is_valid( CodePointType c ) {
+template<typename CodePointType> inline
+typename std::enable_if<ZORBA_TR1_NS::is_integral<CodePointType>::value,
+ bool>::type
+is_valid( CodePointType c ) {
return (ztd::ge0( c ) && c <= 0x00D7FF)
|| (c >= 0x00E000 && c <= 0x00FFFD)
|| (c >= 0x010000 && c <= 0x10FFFF);
@@ -168,6 +191,7 @@
////////// normalization //////////////////////////////////////////////////////
+#ifndef ZORBA_NO_ICU
/**
* Normalizes the given string.
*
@@ -177,9 +201,11 @@
*/
ZORBA_DLL_PUBLIC
bool normalize( string const &in, normalization::type n, string *out );
+#endif /* ZORBA_NO_ICU */
////////// string conversion //////////////////////////////////////////////////
+#ifndef ZORBA_NO_ICU
/**
* Converts a single UTF-8 encoded character into a single Unicode character.
*
@@ -188,6 +214,7 @@
* @return Returns \c true only if the conversion succeeded.
*/
bool to_char( char const *in, char_type *out );
+#endif /* ZORBA_NO_ICU */
/**
* Converts a UTF-8 encoded string into a sequence of Unicode characters.
@@ -210,8 +237,15 @@
* @param out The Unicode string result.
* @return Returns \c true only if the conversion succeeded.
*/
+#ifndef ZORBA_NO_ICU
ZORBA_DLL_PUBLIC
bool to_string( char const *in, size_type in_len, string *out );
+#else
+inline bool to_string( char const *in, size_type in_len, string *out ) {
+ out->assign( in, in_len );
+ return true;
+}
+#endif /* ZORBA_NO_ICU */
/**
* Converts a C string to a Unicode string.
@@ -224,6 +258,8 @@
return to_string( in, (size_type)std::strlen( in ), out );
}
+#ifndef ZORBA_NO_ICU
+
/**
* Converts a wide-character string to a Unicode string.
*
@@ -245,6 +281,8 @@
return to_string( in, static_cast<size_type>( std::wcslen( in ) ), out );
}
+#endif /* ZORBA_NO_ICU */
+
/**
* Converts a string to a Unicode string.
*
@@ -263,13 +301,6 @@
} // namespace unicode
} // namespace zorba
-#else
-#endif /* ZORBA_NO_UNICODE */
-namespace zorba{
-namespace unicode{
-typedef int32_t size_type;
-} // namespace unicode
-} // namespace zorba
#endif /* ZORBA_UNICODE_UTIL_H */
/*
* Local variables:
=== modified file 'src/util/utf8_util.cpp'
--- src/util/utf8_util.cpp 2011-07-17 00:10:56 +0000
+++ src/util/utf8_util.cpp 2011-10-28 02:34:26 +0000
@@ -15,16 +15,16 @@
*/
#include "stdafx.h"
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
#include <unicode/ustring.h>
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
#include "cxx_util.h"
#include "utf8_util.h"
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
U_NAMESPACE_USE
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
unsigned const Mask1Byte = 0x80;
unsigned const Mask2Bytes = 0xC0;
@@ -152,7 +152,7 @@
return len;
}
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
bool to_string( unicode::char_type const *in, unicode::size_type in_len,
storage_type **out, size_type *out_len ) {
@@ -216,7 +216,7 @@
return true;
}
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
storage_type const* validate( storage_type const *s ) {
while ( *s ) {
=== modified file 'src/util/utf8_util.h'
--- src/util/utf8_util.h 2011-07-17 20:05:49 +0000
+++ src/util/utf8_util.h 2011-10-28 02:34:26 +0000
@@ -25,16 +25,20 @@
#include "ascii_util.h"
#include "cxx_util.h"
+#include "string_util.h"
#include "unicode_util.h"
#include "utf8_string.h"
#include "utf8_util_base.h"
+#include "zorbatypes/collation_manager.h"
#include "zorbautils/hashfun.h"
-#ifndef ZORBA_NO_UNICODE
-#include "zorbatypes/collation_manager.h"
-#include "zorbatypes/libicu.h"
-#endif
+#ifdef ZORBA_NO_ICU
+# include "diagnostics/assert.h"
+#else
+# include <unicode/coll.h>
+# include <unicode/sortkey.h>
+#endif /* ZORBA_NO_ICU */
namespace zorba {
namespace utf8 {
@@ -306,7 +310,7 @@
////////// Encoding conversion ////////////////////////////////////////////////
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
/**
* Converts a unicode::char_type array into a UTF-8 encoded string.
@@ -377,6 +381,8 @@
return to_string( in, u_strlen( in ), out );
}
+#endif /* ZORBA_NO_ICU */
+
/**
* Converts a unicode::string into a UTF-8 encoded string.
*
@@ -386,9 +392,16 @@
*/
template<class StringType> inline
bool to_string( unicode::string const &in, StringType *out ) {
+#ifndef ZORBA_NO_ICU
return to_string( in.getBuffer(), in.length(), out );
+#else
+ *out = in.c_str();
+ return true;
+#endif /* ZORBA_NO_ICU */
}
+#ifndef ZORBA_NO_ICU
+
//
// On Windows, UChar == wchar_t, so these functions would multiply define those
// previously.
@@ -512,7 +525,7 @@
return to_wchar_t( in.data(), in.size(), out, out_len );
}
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
////////// HTML URI ///////////////////////////////////////////////////////////
@@ -670,7 +683,7 @@
////////// Unicode normalization //////////////////////////////////////////////
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
/**
* Normalizes the Unicode characters in the string.
*
@@ -682,7 +695,7 @@
template<class InputStringType,class OutputStringType>
bool normalize( InputStringType const &in, unicode::normalization::type n,
OutputStringType *out );
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
////////// Whitespace /////////////////////////////////////////////////////////
@@ -743,7 +756,6 @@
std::reverse_copy( u_in.begin(), u_in.end(), std::back_inserter( u_out ) );
}
-#ifndef ZORBA_NO_UNICODE
/**
* Strips all diacritical marks from all characters converting them to their
* closest ASCII equivalents.
@@ -756,8 +768,6 @@
template<class InputStringType,class OutputStringType>
void strip_diacritics( InputStringType const &in, OutputStringType *out );
-#endif /* ZORBA_NO_UNICODE */
-
/**
*
*/
@@ -765,6 +775,7 @@
int compare(const StringType1 &s1, const StringType2 &s2,
const XQPCollator* collation)
{
+#ifndef ZORBA_NO_ICU
if (collation == NULL || collation->doMemCmp())
return s1.compare(s2);
@@ -775,6 +786,9 @@
unicode::to_string(s2, &us2);
return static_cast<Collator*>( collation->getCollator() )->compare(us1, us2);
+#else
+ return s1.compare(s2);
+#endif /* ZORBA_NO_ICU */
}
@@ -782,25 +796,13 @@
*
*/
template<class StringType> inline
-uint32_t hash(const StringType& s, const XQPCollator* collation = NULL)
-{
+uint32_t hash(const StringType& s, const XQPCollator* collation = NULL) {
+#ifndef ZORBA_NO_ICU
if (!collation || collation->doMemCmp())
- {
- const char* str = s.data();
- ulong len = (ulong)s.size();
- uint32_t hash = 5381;
- ulong i = 0;
- int c;
- while (i < len && (c = *str++))
- {
- hash = ((hash << 5) + hash) + c;
- ++i;
- }
- return hash;
- //return hashfun::h32((void*)(s.data()), s.size());
- }
+#endif /* ZORBA_NO_ICU */
+ return ztd::hash( s.data(), s.size() );
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
CollationKey collKey;
UErrorCode status = U_ZERO_ERROR;
@@ -818,7 +820,7 @@
return collKey.hashCode();
#else
ZORBA_ASSERT(false);
-#endif
+#endif /* ZORBA_NO_ICU */
}
///////////////////////////////////////////////////////////////////////////////
=== modified file 'src/util/utf8_util.tcc'
--- src/util/utf8_util.tcc 2011-07-15 13:33:24 +0000
+++ src/util/utf8_util.tcc 2011-10-28 02:34:26 +0000
@@ -99,7 +99,7 @@
return next_char( temp );
}
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
template<class InputStringType,class OutputStringType>
bool normalize( InputStringType const &in, unicode::normalization::type n,
@@ -123,7 +123,11 @@
template<class InputStringType,class OutputStringType>
void strip_diacritics( InputStringType const &in, OutputStringType *out ) {
InputStringType in_normalized;
+#ifndef ZORBA_NO_ICU
normalize( in, unicode::normalization::NFKD, &in_normalized );
+#else
+ in_normalized = in.c_str();
+#endif /* ZORBA_NO_ICU */
out->clear();
out->reserve( in_normalized.size() );
std::copy(
@@ -161,7 +165,7 @@
}
#endif /* WIN32 */
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
template<class InputStringType,class OutputStringType>
void to_lower( InputStringType const &in, OutputStringType *out ) {
=== modified file 'src/zorbatypes/URI.cpp'
--- src/zorbatypes/URI.cpp 2011-06-24 23:00:33 +0000
+++ src/zorbatypes/URI.cpp 2011-10-28 02:34:26 +0000
@@ -1191,8 +1191,6 @@
return is_set(Scheme) && !theScheme.empty();
}
-
-
/*******************************************************************************
********************************************************************************/
@@ -1347,7 +1345,6 @@
path = base_path.substr(0, last_slash+1);
// else
// path = "/";
-
}
// 6b - append the relative URI path
=== modified file 'src/zorbatypes/collation_manager.cpp'
--- src/zorbatypes/collation_manager.cpp 2011-06-14 17:26:33 +0000
+++ src/zorbatypes/collation_manager.cpp 2011-10-28 02:34:26 +0000
@@ -17,9 +17,9 @@
#include "common/common.h"
-#ifndef ZORBA_NO_UNICODE
-#include "zorbatypes/libicu.h"
-#endif
+#ifndef ZORBA_NO_ICU
+# include <unicode/coll.h>
+#endif /* ZORBA_NO_ICU */
#include <vector>
#include <iostream>
@@ -116,7 +116,7 @@
Collator* lCollator;
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
UErrorCode lError = U_ZERO_ERROR;
if (lTokens.size() == 2)
{
@@ -136,37 +136,37 @@
#else
lCollator = new Collator;
-#endif
+#endif /* ZORBA_NO_ICU */
if (lTokens[0].compare("PRIMARY") == 0)
{
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
lCollator->setStrength(Collator::PRIMARY);
-#endif
+#endif /* ZORBA_NO_ICU */
}
else if (lTokens[0].compare("SECONDARY") == 0)
{
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
lCollator->setStrength(Collator::SECONDARY);
-#endif
+#endif /* ZORBA_NO_ICU */
}
else if (lTokens[0].compare("TERTIARY") == 0)
{
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
lCollator->setStrength(Collator::TERTIARY);
-#endif
+#endif /* ZORBA_NO_ICU */
}
else if (lTokens[0].compare("QUATERNARY") == 0)
{
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
lCollator->setStrength(Collator::QUATERNARY);
-#endif
+#endif /* ZORBA_NO_ICU */
}
else if (lTokens[0].compare("IDENTICAL") == 0)
{
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
lCollator->setStrength(Collator::IDENTICAL);
-#endif
+#endif /* ZORBA_NO_ICU */
}
else
{
@@ -181,7 +181,7 @@
CollationFactory::createCollator()
{
Collator* lCollator;
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
UErrorCode lError = U_ZERO_ERROR;
lCollator = Collator::createInstance(Locale("en", "US"), lError);
if( U_FAILURE(lError) ) {
@@ -190,7 +190,7 @@
lCollator->setStrength(Collator::IDENTICAL);
#else
lCollator = new Collator;
-#endif
+#endif /* ZORBA_NO_ICU */
return new XQPCollator(lCollator, (std::string)"");
}
=== modified file 'src/zorbatypes/collation_manager.h'
--- src/zorbatypes/collation_manager.h 2011-06-14 17:26:33 +0000
+++ src/zorbatypes/collation_manager.h 2011-10-28 02:34:26 +0000
@@ -25,13 +25,13 @@
namespace zorba
{
-#ifdef ZORBA_NO_UNICODE
+#ifdef ZORBA_NO_ICU
-class Collator
+class Collator
{
};
-#endif
+#endif /* ZORBA_NO_ICU */
class XQPCollator
{
=== removed file 'src/zorbatypes/libicu.h'
--- src/zorbatypes/libicu.h 2011-06-14 17:26:33 +0000
+++ src/zorbatypes/libicu.h 1970-01-01 00:00:00 +0000
@@ -1,32 +0,0 @@
-/*
- * Copyright 2006-2008 The FLWOR Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-#ifndef ZORBA_LIBICU_H
-#if defined CYGWIN
-# define U_HAVE_INTTYPES 0
-# define U_HAVE_INT8_T 1
-# define U_HAVE_INT32_T 1
-# define U_HAVE_UINT32_T 1
-#endif
-
-#include <unicode/utypes.h>
-#include <unicode/coll.h>
-#include <unicode/ustring.h>
-#include <unicode/stsearch.h>
-#include <unicode/ucnv.h>
-#include <unicode/normlzr.h>
-#endif
-/* vim:set et sw=2 ts=2: */
=== modified file 'src/zorbatypes/transcoder.cpp'
--- src/zorbatypes/transcoder.cpp 2011-06-14 17:26:33 +0000
+++ src/zorbatypes/transcoder.cpp 2011-10-28 02:34:26 +0000
@@ -25,17 +25,19 @@
namespace zorba {
+///////////////////////////////////////////////////////////////////////////////
+
transcoder::transcoder( std::ostream& output_stream, bool in_utf16 ) :
os( output_stream ),
utf16( in_utf16 )
{
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
utf8_buf_len_ = 0;
utf8_char_len_ = 1;
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
}
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
void transcoder::write_utf16( char const *s, std::streamsize len ) {
unicode::char_type *u_s;
@@ -76,7 +78,9 @@
}
}
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
+
+///////////////////////////////////////////////////////////////////////////////
} // namespace zorba
/* vim:set et sw=2 ts=2: */
=== modified file 'src/zorbatypes/transcoder.h'
--- src/zorbatypes/transcoder.h 2011-06-14 17:26:33 +0000
+++ src/zorbatypes/transcoder.h 2011-10-28 02:34:26 +0000
@@ -40,21 +40,21 @@
std::ostream &os;
bool const utf16;
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
utf8::encoded_char_type utf8_buf_;
int utf8_buf_len_;
int utf8_char_len_;
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
public:
transcoder(std::ostream& output_stream, bool in_utf16);
transcoder& write( char const *s, std::streamsize n ) {
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
if ( utf16 )
write_utf16( s, n );
else
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
os.write( s, n );
return *this;
}
@@ -68,11 +68,11 @@
}
transcoder& operator<<( char ch ) {
-#ifndef ZORBA_NO_UNICODE
- if (utf16)
+#ifndef ZORBA_NO_ICU
+ if ( utf16 )
write_utf16_char(ch);
else
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
os << ch;
return *this;
}
@@ -97,10 +97,10 @@
}
private:
-#ifndef ZORBA_NO_UNICODE
+#ifndef ZORBA_NO_ICU
void write_utf16(const char* str, std::streamsize n);
void write_utf16_char(char ch);
-#endif /* ZORBA_NO_UNICODE */
+#endif /* ZORBA_NO_ICU */
};
} // namespace zorba
=== modified file 'src/zorbautils/string_util.cpp'
--- src/zorbautils/string_util.cpp 2011-06-14 17:26:33 +0000
+++ src/zorbautils/string_util.cpp 2011-10-28 02:34:26 +0000
@@ -24,16 +24,23 @@
#include "diagnostics/xquery_diagnostics.h"
using namespace std;
+#ifndef ZORBA_NO_ICU
U_NAMESPACE_USE
+#endif /* ZORBA_NO_ICU */
namespace zorba {
namespace utf8 {
+///////////////////////////////////////////////////////////////////////////////
+
size_t find( char const *s, size_t s_len, char const *ss, size_t ss_len,
- XQPCollator const *collator ) {
+ XQPCollator const *collator ) {
+#ifndef ZORBA_NO_ICU
if ( !collator || collator->doMemCmp()) {
+#endif /* ZORBA_NO_ICU */
char const *const result = ::strstr( s, ss );
return result ? result - s : zstring::npos;
+#ifndef ZORBA_NO_ICU
}
unicode::string u_s, u_ss;
@@ -54,28 +61,19 @@
}
}
return zstring::npos;
+#endif /* ZORBA_NO_ICU */
}
-size_t rfind(
- char const *s,
- size_t s_len,
- char const *ss,
- size_t ss_len,
- XQPCollator const *collator )
-{
- if ( ! collator || collator->doMemCmp())
- {
+size_t rfind( char const *s, size_t s_len, char const *ss, size_t ss_len,
+ XQPCollator const *collator ) {
+#ifndef ZORBA_NO_ICU
+ if ( ! collator || collator->doMemCmp()) {
+#endif /* ZORBA_NO_ICU */
zstring_b tmp;
tmp.wrap_memory(const_cast<char*>(s), s_len);
-
- size_t pos = tmp.rfind(ss, ss_len);
-
- //if (pos == zstring::npos)
- // return -1;
- //else
- // return pos;
- return pos;
+ return tmp.rfind(ss, ss_len);
+#ifndef ZORBA_NO_ICU
}
unicode::string u_s, u_ss;
@@ -102,6 +100,7 @@
}
return zstring::npos;
+#endif /* ZORBA_NO_ICU */
}
bool match_part( char const *in, char const *pattern, char const *flags ) {
@@ -116,6 +115,8 @@
return re.match_whole( in );
}
+///////////////////////////////////////////////////////////////////////////////
+
} // namespace utf8
} // namespace zorba
/* vim:set et sw=2 ts=2: */
=== modified file 'src/zorbautils/string_util.h'
--- src/zorbautils/string_util.h 2011-06-14 17:26:33 +0000
+++ src/zorbautils/string_util.h 2011-10-28 02:34:26 +0000
@@ -13,12 +13,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
#pragma once
#ifndef ZORBA_UTILS_STRING_UTIL_H
#define ZORBA_UTILS_STRING_UTIL_H
#include <cstring>
+#include <zorba/config.h>
+
#include "diagnostics/xquery_diagnostics.h"
#include "zorbatypes/collation_manager.h"
@@ -145,9 +148,13 @@
char const *replacement, OutputStringType *out ) {
unicode::regex re;
re.compile( pattern, flags );
+#ifndef ZORBA_NO_ICU
unicode::string u_out;
return re.replace_all( in, replacement, &u_out ) &&
utf8::to_string( u_out.getBuffer(), u_out.length(), out );
+#else
+ return re.replace_all( in, replacement, out );
+#endif /* ZORBA_NO_ICU */
}
/**
@@ -175,9 +182,13 @@
OutputStringType *out ) {
unicode::regex re;
re.compile( pattern, flags );
+#ifndef ZORBA_NO_ICU
unicode::string u_out;
return re.replace_all( in, replacement, &u_out ) &&
utf8::to_string( u_out.getBuffer(), u_out.length(), out );
+#else
+ return re.replace_all( in, replacement, out );
+#endif /* ZORBA_NO_ICU */
}
/**
@@ -207,9 +218,13 @@
OutputStringType *out ) {
unicode::regex re;
re.compile( pattern, flags );
+#ifndef ZORBA_NO_ICU
unicode::string u_out;
return re.replace_all( in, replacement, &u_out ) &&
utf8::to_string( u_out.getBuffer(), u_out.length(), out );
+#else
+ return re.replace_all( in, replacement, out );
+#endif /* ZORBA_NO_ICU */
}
///////////////////////////////////////////////////////////////////////////////
@@ -217,7 +232,6 @@
} // namespace utf8
} // namespace zorba
#endif /* ZORBA_UTILS_STRING_UTIL_H */
-
/*
* Local variables:
* mode: c++
=== modified file 'test/commons/testdriver_comparator.cpp'
--- test/commons/testdriver_comparator.cpp 2011-09-12 23:22:24 +0000
+++ test/commons/testdriver_comparator.cpp 2011-10-28 02:34:26 +0000
@@ -280,6 +280,7 @@
{
// Wasn't a BOM; 'unread' it
refStream.clear();
+ refStream.clear();
refStream.seekg(0, std::ios_base::beg);
}
=== added directory 'test/rbkt/ExpQueryResults/zorba/string/Regex'
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a1.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a1.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions"><fn:match>aa<fn:group nr="1">a</fn:group></fn:match></fn:analyze-string-result>
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a2.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a2.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a2.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions"><fn:match>aa<fn:group nr="1"><fn:group nr="2">a</fn:group></fn:group></fn:match></fn:analyze-string-result>
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a3.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a3.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a3.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions"><fn:match>aa<fn:group nr="1"><fn:group nr="2">a</fn:group></fn:group>c</fn:match></fn:analyze-string-result>
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a4.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a4.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a4.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions"><fn:match>a<fn:group nr="1"><fn:group nr="2"/><fn:group nr="3">c</fn:group></fn:group></fn:match></fn:analyze-string-result>
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a5.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a5.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a5.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions"><fn:match>aa<fn:group nr="1"><fn:group nr="2">a</fn:group><fn:group nr="3"/></fn:group>c</fn:match></fn:analyze-string-result>
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a6.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a6.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a6.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions"><fn:match>aa<fn:group nr="1"><fn:group nr="2">a</fn:group><fn:group nr="3"/></fn:group>c</fn:match></fn:analyze-string-result>
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a7.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a7.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a7.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions"><fn:match>aa<fn:group nr="1"><fn:group nr="2">a</fn:group><fn:group nr="3">a</fn:group></fn:group>c</fn:match></fn:analyze-string-result>
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a8.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a8.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a8.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions"><fn:non-match>aaaa</fn:non-match><fn:match><fn:group nr="1"></fn:group>c</fn:match></fn:analyze-string-result>
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a9.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a9.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_a9.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+<fn:analyze-string-result xmlns:fn="http://www.w3.org/2005/xpath-functions"><fn:non-match>aaaa</fn:non-match><fn:match><fn:group nr="1"></fn:group>c<fn:group nr="2"></fn:group></fn:match></fn:analyze-string-result>
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m1.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m1.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m10.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m10.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m10.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m11.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m11.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m11.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m12.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m12.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m12.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m13.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m13.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m13.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m14.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m14.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m14.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m15.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m15.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m15.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m16.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m16.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m16.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m17.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m17.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m17.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m18.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m18.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m18.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m19.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m19.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m19.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m2.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m2.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m2.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m20.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m20.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m20.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m21.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m21.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m21.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m22.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m22.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m22.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m23.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m23.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m23.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m24.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m24.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m24.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m25.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m25.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m25.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m26.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m26.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m26.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m27.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m27.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m27.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m28.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m28.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m28.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m29.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m29.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m29.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m3.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m3.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m3.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m30.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m30.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m30.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m31.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m31.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m31.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m32.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m32.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m32.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m33.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m33.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m33.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m34.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m34.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m34.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m35.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m35.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m35.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m36.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m36.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m36.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m37.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m37.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m37.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m38.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m38.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m38.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m39.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m39.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m39.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m4.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m4.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m4.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m40.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m40.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m40.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m41.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m41.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m41.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m42.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m42.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m42.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m43.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m43.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m43.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m44.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m44.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m44.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m45.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m45.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m45.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m46.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m46.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m46.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m47.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m47.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m47.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m48.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m48.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m48.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m49.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m49.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m49.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m5.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m5.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m5.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m50.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m50.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m50.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m51.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m51.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m51.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m52.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m52.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m52.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m53.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m53.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m53.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m6.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m6.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m6.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m7.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m7.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m7.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m8.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m8.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m8.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m9.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m9.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_m9.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_prime1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_prime1.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_prime1.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+true false
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r1.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r1.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+ac1ac1
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r10.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r10.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r10.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+b
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r11.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r11.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r11.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
++-+-+-0-1
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r2.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r2.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r2.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+1
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r3.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r3.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r3.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+11
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r4.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r4.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r4.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+a-aba-ab
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r5.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r5.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r5.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+acbaacba
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r6.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r6.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r6.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+acaabcab
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r9.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r9.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_r9.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+11
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t1.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t1.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+ r c d r
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t2.xml.res'
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t4.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t4.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t4.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+ 0 1
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t5.xml.res'
--- test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t5.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/string/Regex/regex_t5.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+The cat sat on the mat
\ No newline at end of file
=== added file 'test/rbkt/ExpQueryResults/zorba/testdriver/bom_bug.xml.res'
--- test/rbkt/ExpQueryResults/zorba/testdriver/bom_bug.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/testdriver/bom_bug.xml.res 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+11
\ No newline at end of file
=== modified file 'test/rbkt/Queries/CMakeLists.txt'
--- test/rbkt/Queries/CMakeLists.txt 2011-10-13 12:46:50 +0000
+++ test/rbkt/Queries/CMakeLists.txt 2011-10-28 02:34:26 +0000
@@ -238,6 +238,8 @@
# EXPECTED_FAILURE (test/rbkt/zorba/file/dirname_basename ????need bugnum???)
#ENDIF ()
+# test that must fail to pass, to check testdriver BOM bug that gives false positives
+EXPECTED_FAILURE (test/rbkt/zorba/testdriver/bom_bug 3381121)
# --------------------------------------------------------------------------
# the list of tests that are failing but can be accepted by the commit queue
@@ -287,6 +289,32 @@
EXPECTED_FAILURE(test/rbkt/zorba/http-client/put/put3_binary_element 3391756)
EXPECTED_FAILURE(test/rbkt/zorba/http-client/post/post3_binary_element 3391756)
+IF(NOT ZORBA_NO_ICU)
+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err10 3405597)
+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err12 3405597)
+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err15 3405597)
+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err16 3405597)
+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err25 3405597)
+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_err7 3405597)
+ EXPECTED_FAILURE(test/rbkt/zorba/string/Regex/regex_m11 3405597)
+ENDIF(NOT ZORBA_NO_ICU)
+
+IF(ZORBA_NO_ICU)
+ SET_TESTS_PROPERTIES(test/rbkt/zorba/string/CodepointToStringFunc/CodepointToStringFunc1
+ test/rbkt/zorba/string/CodepointToStringFunc/UnicodeNormalization1
+ test/rbkt/zorba/utf-8/utf8inDataDe
+ test/rbkt/zorba/utf-8/utf8inDataFr
+ test/rbkt/zorba/utf-8/utf8inDataNo
+ test/rbkt/zorba/utf-8/utf8inDataRo
+ test/rbkt/zorba/http-client/send-request/http2-read-svg
+ test/rbkt/zorba/http-client/post/post2_element
+ test/rbkt/zorba/http-client/post/post3_xml
+ test/rbkt/zorba/http-client/put/put2_element
+ test/rbkt/zorba/http-client/put/put3_xml
+ test/rbkt/zorba/parsing_and_serializing/fn_serialize_04_xml_decl
+ PROPERTIES WILL_FAIL TRUE)
+ENDIF(ZORBA_NO_ICU)
+
EXPECTED_FAILURE(test/rbkt/zorba/reference/reference_5 868640)
=== added directory 'test/rbkt/Queries/zorba/string/Regex'
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a1.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a1.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaa", "(a)+")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a2.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a2.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a2.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaa", "((a))+")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a3.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a3.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a3.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaac", "((a))+?c")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a4.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a4.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a4.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("ac", "((a)|(c))+")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a5.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a5.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a5.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaac", "((a)|(c))+c")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a6.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a6.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a6.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaac", "((a)|(c))+c")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a7.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a7.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a7.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaaac", "((a)(a))+c")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a8.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a8.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a8.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaaac", "()c")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_a9.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_a9.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_a9.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:analyze-string("aaaac", "()c($)")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err1.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err1.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err1.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err1.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err1.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "+")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err10.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err10.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err10.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err10.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err10.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err10.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\p{IsBasic-Latin}")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err11.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err11.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err11.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err11.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err11.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err11.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\p{IsBasicLatin2}")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err12.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err12.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err12.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err12.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err12.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err12.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\y")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err13.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err13.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err13.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err13.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err13.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err13.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\0")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err14.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err14.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err14.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err14.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err14.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err14.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "(1)\2")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err15.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err15.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err15.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err15.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err15.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err15.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "[a-[b] ]")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err16.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err16.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err16.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err16.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err16.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err16.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "[\s-e]")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err17.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err17.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err17.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err17.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err17.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err17.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "[e-\s]")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err18.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err18.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err18.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err18.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err18.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err18.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "[eb")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err19.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err19.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err19.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err19.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err19.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err19.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:backref to unended group:)
+
+fn:matches("a", "(a(b(c)\2))")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err2.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err2.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err2.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err2.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err2.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err2.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "}")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err20.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err20.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err20.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0001
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err20.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err20.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err20.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:unknown flag:)
+
+fn:matches("a", "a", "a")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err21.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err21.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err21.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0004
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err21.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err21.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err21.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:$ not followed by 0-9:)
+
+fn:replace("a", "a", "$a")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err22.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err22.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err22.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0004
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err22.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err22.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err22.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:\ outside constructs \\ or \$:)
+
+fn:replace("a", "a", "\a")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err23.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err23.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err23.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err23.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err23.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err23.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:group with ?: is not used in backreferencing:)
+
+fn:matches("a", "(a(?:b)\2)")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err24.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err24.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err24.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err24.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err24.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err24.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+(:{min,max} min is bigger:)
+
+fn:matches("a", "a{3,2}")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err25.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err25.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err25.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err25.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err25.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err25.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "a^")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err3.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err3.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err3.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err3.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err3.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err3.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "{")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err4.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err4.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err4.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err4.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err4.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err4.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "?")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err5.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err5.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err5.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err5.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err5.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err5.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "*")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err7.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err7.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err7.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err7.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err7.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err7.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "^^")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err8.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err8.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err8.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err8.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err8.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err8.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\p ")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err9.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_err9.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err9.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0002
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_err9.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_err9.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_err9.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "\P{L ")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m1.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m1.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abracadabra", "bra")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m10.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m10.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m10.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ba", "a?b?")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m11.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m11.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m11.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ba", "[a-z-[ab]]")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m12.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m12.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m12.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaaab", "a*ab")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m13.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m13.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m13.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaaab", "a*?ab")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m14.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m14.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m14.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abc", "(a|ab)c")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m15.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m15.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m15.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("bbba", "((a)|(b))*\3")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m16.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m16.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m16.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaa", "^a*?$")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m17.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m17.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m17.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaabb", "a{1,3}ab")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m18.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m18.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m18.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,3 @@
+
+
+fn:matches("aaaa", "a{1,3}")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m19.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m19.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m19.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("baac", "(?:b)(a)\1c")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m2.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m2.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m2.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abracadabra", "^a.*a$")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m20.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m20.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m20.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaa", "(aaa|a){2,3}")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m21.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m21.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m21.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaa", "(aaa|a){2,3}?")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m22.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m22.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m22.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaac", "(aaa|a){2,3}?c")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m23.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m23.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m23.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaac", "(aaa|a){2,3}c")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m24.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m24.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m24.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("aaaaab", "(a|b)*ab")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m25.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m25.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m25.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("t1t22t33", "(t.*){3}")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m26.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m26.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m26.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ac", "ab")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m27.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m27.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m27.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cat", "cat(aract|erpillar|) ")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m28.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m28.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m28.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cat", "c()a\1t")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m29.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m29.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m29.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cat", "cat(aract|erpillar|)")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m3.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m3.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m3.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abracadabra", "^bra")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m30.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m30.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m30.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cat", "c()a\1t ")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m31.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m31.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m31.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cat", "cat(aract||erpillar)")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m32.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m32.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m32.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "|")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m33.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m33.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m33.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "^a")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m34.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m34.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m34.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "^a$")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m35.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m35.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m35.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,4 @@
+fn:matches(
+"a
+b
+c", "^b", "m")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m36.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m36.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m36.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "b$|^a")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m37.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m37.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m37.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,4 @@
+fn:matches(
+"a
+b
+c", "e$|^c$", "m")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m38.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m38.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m38.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,4 @@
+fn:matches(
+"a
+b
+c", "e$|(^c$)+", "m")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m39.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m39.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m39.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("a", "(^)a")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m4.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m4.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m4.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,6 @@
+let $poem :=
+<poem author="Wilhelm Busch"> Kaum hat dies
+ der Hahn gesehen, Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!! Tak, tak,
+ tak! - da kommen sie. </poem>
+return
+fn:matches($poem, "Kaum.*krahen")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m40.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m40.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m40.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "^+a")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m41.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m41.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m41.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "^?b")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m42.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m42.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m42.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "(c*)*")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m43.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m43.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m43.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "(c*)*?e")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m44.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m44.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m44.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "((c)*?)*?e")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m45.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m45.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m45.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("ab", "(c*){3,}e")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m46.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m46.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m46.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cabana", "(cab|caba)na")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m47.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m47.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m47.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("cabana", "((a|c)(a|a)(a|b)|(a|c)(a|a)(a|b)(a|a))na")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m48.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m48.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m48.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abc", "^b")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m49.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m49.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m49.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("abc", "b$")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m5.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m5.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m5.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,6 @@
+let $poem :=
+<poem author="Wilhelm Busch"> Kaum hat dies
+ der Hahn gesehen, Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!! Tak, tak,
+ tak! - da kommen sie. </poem>
+return
+fn:matches($poem, "Kaum.*krahen", "s")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m50.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m50.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m50.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,2 @@
+fn:matches("abc
+def", "b.*f", "s")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m51.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m51.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m51.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,2 @@
+fn:matches("abc
+def", "b.*f", "m")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m52.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m52.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m52.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("b", "[^B]", "i")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m53.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m53.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m53.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:matches("bc d", "b c[ ]d", "x")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m6.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m6.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m6.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,6 @@
+let $poem :=
+<poem author="Wilhelm Busch"> Kaum hat dies der Hahn gesehen,
+ Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!! Tak, tak, tak! - da kommen sie.
+</poem>
+return
+fn:matches($poem, "^ Kaum.*gesehen,$", "m")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m7.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m7.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m7.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,6 @@
+let $poem :=
+<poem author="Wilhelm Busch"> Kaum hat dies der Hahn gesehen,
+ Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!! Tak, tak, tak! - da kommen sie.
+</poem>
+return
+fn:matches($poem, "^Kaum.*gesehen,$")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m8.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m8.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m8.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,7 @@
+let $poem :=
+<poem author="Wilhelm Busch"> Kaum hat dies der Hahn gesehen,
+ Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!!
+ Tak, tak, tak! - da kommen sie.
+</poem>
+return
+fn:matches($poem, "kiki", "i")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_m9.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_m9.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_m9.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,7 @@
+let $poem :=
+<poem author="Wilhelm Busch"> Kaum hat dies der Hahn gesehen,
+ Fangt er auch schon an zu krahen: Kikeriki! Kikikerikih!!
+ Tak, tak, tak! - da kommen sie.
+</poem>
+return
+fn:matches($poem, "(tak.*){3}", "i")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_prime1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_prime1.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_prime1.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,17 @@
+
+declare function local:string-1-n($nr as xs:integer) as xs:string
+{
+ if($nr eq 0) then
+ ""
+ else
+ concat("1", local:string-1-n($nr - 1))
+};
+
+declare function local:is-prime($nr as xs:integer) as xs:boolean
+{
+ let $str1 := local:string-1-n($nr)
+ return
+ fn:not(fn:matches($str1, "^(11+)\1+$"))
+};
+
+(local:is-prime(13), local:is-prime(24))
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r1.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r1.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "ab", "1")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r10.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r10.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r10.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("aba", "a", "")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r11.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r11.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r11.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("a-b-c-0-1", "\p{Ll}", "+")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r2.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r2.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r2.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("aaa", "a+", "1")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r3.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r3.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r3.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("aa", "a|aa", "1")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r4.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r4.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r4.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "c(ab)", "-$1")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r5.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r5.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r5.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "(a)(b)", "$2$1")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r6.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r6.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r6.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "(a)(b)(a)(c)", "$3$1$2$5$4")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r7_err.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_r7_err.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r7_err.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0003
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r7_err.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r7_err.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r7_err.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "(a)*(b)*(a)*(c)*", "$3$1$2$5$4")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r8_err.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_r8_err.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r8_err.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0004
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r8_err.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r8_err.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r8_err.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("acabacab", "(a)(b)(a)(c)", "$$3$1$2$5$4")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_r9.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_r9.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_r9.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:replace("aaaa", "(a|aa){1,2}", "1")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t1.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_t1.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t1.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:tokenize("abracadabra", "(ab)|(a)")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t2.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_t2.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t2.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:tokenize("", "(ab)|(a)")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t3_err.spec'
--- test/rbkt/Queries/zorba/string/Regex/regex_t3_err.spec 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t3_err.spec 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:FORX0003
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t3_err.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_t3_err.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t3_err.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:tokenize("", "a*")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t4.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_t4.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t4.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,2 @@
+(:extract numbers:)
+fn:tokenize("x=0,y=1", "\P{Nd}+")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/string/Regex/regex_t5.xq'
--- test/rbkt/Queries/zorba/string/Regex/regex_t5.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/string/Regex/regex_t5.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+fn:tokenize("The cat sat on the mat", "\s+")
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/testdriver/bom_bug.xq'
--- test/rbkt/Queries/zorba/testdriver/bom_bug.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/testdriver/bom_bug.xq 2011-10-28 02:34:26 +0000
@@ -0,0 +1,1 @@
+1
\ No newline at end of file
=== modified file 'test/unit/CMakeLists.txt'
--- test/unit/CMakeLists.txt 2011-10-10 09:04:35 +0000
+++ test/unit/CMakeLists.txt 2011-10-28 02:34:26 +0000
@@ -64,7 +64,6 @@
test_audit.cpp
string_instantiate.cpp
streamable_string.cpp
- string_test.cpp
unique_ptr.cpp
main_sequential.cpp
datetime.cpp
@@ -74,6 +73,10 @@
staticcollectionmanager.cpp
)
+IF(NOT ZORBA_NO_ICU)
+ LIST(APPEND UNIT_TESTS_SRCS string_test.cpp)
+ENDIF(NOT ZORBA_NO_ICU)
+
IF (NOT ZORBA_NO_FULL_TEXT)
LIST(APPEND UNIT_TESTS_SRCS
stemmer.cpp
@@ -88,11 +91,6 @@
LIST(APPEND SPEC_FILES "debug_iter_serialization.cpp")
ENDIF(ZORBA_WITH_DEBUGGER)
-IF(WIN32)
- # SF#3191791
- LIST(REMOVE_ITEM UNIT_TESTS_SRCS "string_test.cpp")
-ENDIF(WIN32)
-
CREATE_TEST_SOURCELIST(UnitTests
UnitTests.cpp
${UNIT_TESTS_SRCS}
=== modified file 'test/unit/string_test.cpp'
--- test/unit/string_test.cpp 2011-08-24 12:48:09 +0000
+++ test/unit/string_test.cpp 2011-10-28 02:34:26 +0000
@@ -568,6 +568,7 @@
ASSERT_TRUE( t == s );
}
+#ifndef ZORBA_NO_ICU
template<class StringType>
static void test_to_string_from_wchar_t() {
wchar_t const w[] = L"hello";
@@ -577,6 +578,7 @@
for ( string::size_type i = 0; i < s.length(); ++i )
ASSERT_TRUE( s[i] == w[i] );
}
+#endif /* ZORBA_NO_ICU */
template<class StringType>
static void test_to_upper() {
@@ -604,6 +606,7 @@
}
}
+#ifndef ZORBA_NO_ICU
static void test_to_wchar_t() {
string const s = "hello";
wchar_t *w;
@@ -615,6 +618,7 @@
ASSERT_TRUE( w[i] == s[i] );
delete[] w;
}
+#endif /* ZORBA_NO_ICU */
static void test_trim_start() {
char const *s;
@@ -867,16 +871,20 @@
test_to_string_from_utf8<zstring>();
test_to_string_from_utf8<zstring_p>();
+#ifndef ZORBA_NO_ICU
test_to_string_from_wchar_t<string>();
test_to_string_from_wchar_t<zstring>();
test_to_string_from_wchar_t<zstring_p>();
+#endif /* ZORBA_NO_ICU */
test_to_upper<string>();
test_to_upper<zstring>();
test_to_upper<zstring_p>();
test_to_upper<String>();
+#ifndef ZORBA_NO_ICU
test_to_wchar_t();
+#endif /* ZORBA_NO_ICU */
test_trim_start();
test_trim_end();
=== modified file 'test/update/CMakeLists.txt'
--- test/update/CMakeLists.txt 2011-09-27 15:01:33 +0000
+++ test/update/CMakeLists.txt 2011-10-28 02:34:26 +0000
@@ -67,6 +67,15 @@
ENDFOREACH(TESTFILE)
+IF(ZORBA_NO_FULL_TEXT)
+ SET_TESTS_PROPERTIES(
+ test/update/zorba/store/sc1
+ test/update/zorba/store/sc2_ex
+ PROPERTIES WILL_FAIL TRUE)
+ENDIF(ZORBA_NO_FULL_TEXT)
+
+
+
IF (FOUND_XQUTS AND NOT ZORBA_TEST_W3C_TO_SUBMIT_RESULTS)
# We "don't care" that these fail
EXPECTED_FAILURE(test/update/w3c_update_testsuite/XQuery/Put/fn-put-005 3354993)