← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/bug1073175 into lp:zorba

 

Juan Zacarias has proposed merging lp:~zorba-coders/zorba/bug1073175 into lp:zorba.

Commit message:
Fix for bug 1073175. 
Changes to fn:unparsed-text, fn:unparsed-text-available and fn:unparsed-text-lines. 
They now throw the correct error when the input contains unsupported utf-8 characters and no Encoding was stablished. fn:unparsed-text-available now returns the correct value when this case is present.

Requested reviews:
  Chris Hillery (ceejatec)
  Sorin Marian Nasoi (sorin.marian.nasoi)
Related bugs:
  Bug #1073175 in Zorba: "FOTS: fn:unparsed-text-lines test hangs"
  https://bugs.launchpad.net/zorba/+bug/1073175

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/bug1073175/+merge/136488

Fix for bug 1073175. 
Changes to fn:unparsed-text, fn:unparsed-text-available and fn:unparsed-text-lines. 
They now throw the correct error when the input contains unsupported utf-8 characters and no Encoding was stablished. fn:unparsed-text-available now returns the correct value when this case is present.

-- 
https://code.launchpad.net/~zorba-coders/zorba/bug1073175/+merge/136488
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/sequences/pregenerated/sequences.h'
--- src/runtime/sequences/pregenerated/sequences.h	2012-10-08 12:09:36 +0000
+++ src/runtime/sequences/pregenerated/sequences.h	2012-11-27 18:35:30 +0000
@@ -1429,6 +1429,7 @@
 public:
   std::unique_ptr<std::istream, StreamReleaser>* theStream; //the current stream
    internal::StreamResource* theStreamResource; //the current iterator
+  bool isEncoded; //Input is Encoded
 
   FnUnparsedTextLinesIteratorState();
 

=== modified file 'src/runtime/sequences/sequences_impl.cpp'
--- src/runtime/sequences/sequences_impl.cpp	2012-10-16 14:30:02 +0000
+++ src/runtime/sequences/sequences_impl.cpp	2012-11-27 18:35:30 +0000
@@ -31,6 +31,7 @@
 #include <zorba/util/time.h>
 
 #include <zorba/transcode_stream.h>
+#include <util/utf8_util_base.h>
 
 #include <util/fs_util.h>
 #include <util/uri_util.h>
@@ -1904,6 +1905,9 @@
   QueryLoc const& loc,
   store::Item_t& oResult)
 {
+  bool lIsEncoded = false;
+  const char* lIsInvalid = false;
+  zstring lValidateString;
   //Normalize input to handle filesystem paths, etc.
   zstring lNormUri;
   normalizeInputUri(aUri, aSctx, loc, &lNormUri);
@@ -1928,18 +1932,29 @@
   //check if encoding is needed
   if (transcode::is_necessary(aEncoding.c_str()))
   {
+    lIsEncoded = true;
     if (!transcode::is_supported(aEncoding.c_str()))
     {
       throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(aUri), ERROR_LOC(loc));
     }
     transcode::attach(*lStream.get(), aEncoding.c_str());
   }
+
   //creates stream item
   GENV_ITEMFACTORY->createStreamableString(
     oResult,
     *lStream.release(),
     lStream.get_deleter()
     );
+  
+  if (!lIsEncoded)
+  { 
+    lIsInvalid = utf8::validate(oResult->getStringValue().c_str()); 
+    if (lIsInvalid)
+    {
+      throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(aUri), ERROR_LOC(loc));
+    }
+  }
 
   if (oResult.isNull())
   {
@@ -2038,6 +2053,7 @@
   zstring encodingString("UTF-8");
   zstring lNormUri;
   zstring lErrorMessage;
+  const char* isInvalid;
   std::auto_ptr<internal::Resource> lResource;
   StreamReleaser lStreamReleaser;
 
@@ -2074,8 +2090,10 @@
   state->theStreamResource->setStreamReleaser(nullptr);
 
   //check if encoding is needed
+ state->isEncoded = false;
   if (transcode::is_necessary(encodingString.c_str()))
   {
+    state->isEncoded = true;
     if (!transcode::is_supported(encodingString.c_str()))
     {
       throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(uriString), ERROR_LOC(loc));
@@ -2086,8 +2104,16 @@
   while (state->theStream->get()->good())
   {
     getline(*state->theStream->get(), streamLine);
+    if(!state->isEncoded)
+    {
+      isInvalid = utf8::validate(streamLine.c_str()); 
+      if (isInvalid)
+      {
+        throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(uriString), ERROR_LOC(loc));
+      }
+    }
     STACK_PUSH(GENV_ITEMFACTORY->createString(result, streamLine), state);
-  }
+  }  
 
   STACK_END(state);
 }

=== modified file 'src/runtime/spec/sequences/sequences.xml'
--- src/runtime/spec/sequences/sequences.xml	2012-10-15 13:35:59 +0000
+++ src/runtime/spec/sequences/sequences.xml	2012-11-27 18:35:30 +0000
@@ -1306,6 +1306,7 @@
                     brief="the current stream"/>
       <zorba:member type=" internal::StreamResource*" name="theStreamResource"
                   brief="the current iterator"/>
+      <zorba:member type="bool" name="isEncoded" brief="Input is Encoded"/>
     </zorba:state> 
     
     <zorba:methods>


Follow ups