zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #17434
[Merge] lp:~zorba-coders/zorba/bug1073175 into lp:zorba
Juan Zacarias has proposed merging lp:~zorba-coders/zorba/bug1073175 into lp:zorba with lp:~paul-lucas/zorba/feature-utf8_streambuf as a prerequisite.
Requested reviews:
Sorin Marian Nasoi (sorin.marian.nasoi)
Chris Hillery (ceejatec)
Paul J. Lucas (paul-lucas)
Related bugs:
Bug #1073175 in Zorba: "FOTS: fn:unparsed-text-lines test hangs"
https://bugs.launchpad.net/zorba/+bug/1073175
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/bug1073175/+merge/144762
Fix for bug 1073175.
Changes to fn:unparsed-text, fn:unparsed-text-available and fn:unparsed-text-lines.
They now throw the correct error when the input contains unsupported utf-8 characters and no Encoding was stablished. fn:unparsed-text-available now returns the correct value when this case is present.
--
https://code.launchpad.net/~zorba-coders/zorba/bug1073175/+merge/144762
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/sequences/pregenerated/sequences.h'
--- src/runtime/sequences/pregenerated/sequences.h 2012-10-08 12:09:36 +0000
+++ src/runtime/sequences/pregenerated/sequences.h 2013-01-24 17:53:25 +0000
@@ -1429,6 +1429,7 @@
public:
std::unique_ptr<std::istream, StreamReleaser>* theStream; //the current stream
internal::StreamResource* theStreamResource; //the current iterator
+ bool isEncoded; //Input is Encoded
FnUnparsedTextLinesIteratorState();
=== modified file 'src/runtime/sequences/sequences_impl.cpp'
--- src/runtime/sequences/sequences_impl.cpp 2013-01-11 10:18:14 +0000
+++ src/runtime/sequences/sequences_impl.cpp 2013-01-24 17:53:25 +0000
@@ -31,6 +31,8 @@
#include <zorba/util/time.h>
#include <zorba/transcode_stream.h>
+#include <util/utf8_util_base.h>
+#include <util/utf8_streambuf.h>
#include <util/fs_util.h>
#include <util/uri_util.h>
@@ -1904,6 +1906,7 @@
QueryLoc const& loc,
store::Item_t& oResult)
{
+ zstring lValidateString;
//Normalize input to handle filesystem paths, etc.
zstring lNormUri;
normalizeInputUri(aUri, aSctx, loc, &lNormUri);
@@ -1942,6 +1945,34 @@
}
transcode::attach(*lStream.get(), aEncoding.c_str());
}
+ else
+ {
+ //Verify valid utf-8 on the stream
+ std::ios::iostate const orig_exceptions = lStream.get()->exceptions();
+ utf8::streambuf utf_buf(lStream.get()->rdbuf());
+ try
+ {
+ lStream.get()->ios::rdbuf(&utf_buf);
+ lStream.get()->ios::exceptions(orig_exceptions | ios::badbit);
+
+ //Read streambuf to verify utf-8
+ char buf[1024];
+ while (lStream.get()->good())
+ {
+ lStream.get()->read(buf, sizeof(buf));
+ }
+
+ lStream.get()->ios::rdbuf(utf_buf.original());
+ lStream.get()->ios::exceptions(orig_exceptions);
+ }
+ catch (...)
+ {
+ lStream.get()->ios::rdbuf(utf_buf.original());
+ lStream.get()->ios::exceptions(orig_exceptions);
+ throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(aUri), ERROR_LOC(loc));
+ }
+ }
+
//creates stream item
GENV_ITEMFACTORY->createStreamableString(
oResult,
@@ -2046,6 +2077,7 @@
zstring encodingString("UTF-8");
zstring lNormUri;
zstring lErrorMessage;
+ const char* isInvalid;
std::auto_ptr<internal::Resource> lResource;
StreamReleaser lStreamReleaser;
std::auto_ptr<zorba::URI> lUri;
@@ -2091,8 +2123,10 @@
state->theStreamResource->setStreamReleaser(nullptr);
//check if encoding is needed
+ state->isEncoded = false;
if (transcode::is_necessary(encodingString.c_str()))
{
+ state->isEncoded = true;
if (!transcode::is_supported(encodingString.c_str()))
{
throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(uriString), ERROR_LOC(loc));
@@ -2103,8 +2137,16 @@
while (state->theStream->get()->good())
{
getline(*state->theStream->get(), streamLine);
+ if(!state->isEncoded)
+ {
+ isInvalid = utf8::validate(streamLine.c_str());
+ if (isInvalid)
+ {
+ throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(uriString), ERROR_LOC(loc));
+ }
+ }
STACK_PUSH(GENV_ITEMFACTORY->createString(result, streamLine), state);
- }
+ }
STACK_END(state);
}
=== modified file 'src/runtime/spec/sequences/sequences.xml'
--- src/runtime/spec/sequences/sequences.xml 2012-10-15 13:35:59 +0000
+++ src/runtime/spec/sequences/sequences.xml 2013-01-24 17:53:25 +0000
@@ -1306,6 +1306,7 @@
brief="the current stream"/>
<zorba:member type=" internal::StreamResource*" name="theStreamResource"
brief="the current iterator"/>
+ <zorba:member type="bool" name="isEncoded" brief="Input is Encoded"/>
</zorba:state>
<zorba:methods>
=== modified file 'test/fots_driver/cli.xq'
--- test/fots_driver/cli.xq 2013-01-24 07:06:35 +0000
+++ test/fots_driver/cli.xq 2013-01-24 17:53:25 +0000
@@ -190,6 +190,7 @@
), "
")
};
+<<<<<<< TREE
(:~
Tokenize a string that contains a comma-separated list of tokens.
@@ -210,6 +211,17 @@
variable $testCasePrefixesMsg := "'testCasePrefixes' was set to: ";
+=======
+(:~ The test cases in this list have bugs assigned and should not be run :)
+variable $exceptedTestCases := (
+"cbcl-subsequence-011", "cbcl-subsequence-012", "cbcl-subsequence-013",
+"cbcl-subsequence-014" (:see bug lp:1069794 :)
+, "re00975", "re00976", "re00976a" (:see bug lp:1070533 :)
+);
+
+(:~ The test in this list have bugs assigned already and should not be run :)
+variable $exceptedTestSets := ();
+>>>>>>> MERGE-SOURCE
switch ($mode)
Follow ups