← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/bug-996084-reuse-stream into lp:zorba

 

Matthias Brantner has proposed merging lp:~zorba-coders/zorba/bug-996084-reuse-stream into lp:zorba.

Requested reviews:
  Matthias Brantner (matthias-brantner)
  Dennis Knochenwefel (dennis-knochenwefel)
  William Candillon (wcandillon)
  Chris Hillery (ceejatec)
Related bugs:
  Bug #996084 in Zorba: "crash in Streamable*Item with file module"
  https://bugs.launchpad.net/zorba/+bug/996084

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/bug-996084-reuse-stream/+merge/105025

Add the ability to create a StreamableStringItem that re-uses the stream from another Streamable*Item (in a memory-ownership-safe way).

Added a base64:decode#2 function that also does transcoding to utf-8
-- 
https://code.launchpad.net/~zorba-coders/zorba/bug-996084-reuse-stream/+merge/105025
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'ChangeLog'
--- ChangeLog	2012-05-07 23:43:04 +0000
+++ ChangeLog	2012-05-08 03:24:31 +0000
@@ -18,6 +18,7 @@
     - fn:has-children#0
     - fn:nilled#0
     - fn:path
+  * Added base64:decode#2 function which also does transcoding
   * Extended API for Python, Java, PHP and Ruby.
   * Add jvm classpath to zorbacmd and to Zorba API. Tracked by #931816
   * Added full-text module.
@@ -49,6 +50,7 @@
   * Fixed bug #912586, #912593 and #912722 (assertion failures with lax validation)
   * Fixed bug #921458 (file:read-text-lines() blocking)
   * Fixed bug #981405 (do not hoist expr containing try-catch variables out of the associated try-catch expression)
+  * Fixed bug #996084 (crash in Streamable*Item with file module)
   * Fixed bug #947627 (throw XQST0099 if more than one declarations of context item type in same module)
   * Fixed bug #980526 (no-copy rule bug due to global var being set in "distant" udf)
   * Fixed bug #949910 (has-children may be invoked on all nodes). Internally, zorba::store::Item::getChildren() now returns NULL on node classes without offspring (instead of raising an error).

=== modified file 'modules/com/zorba-xquery/www/modules/converters/base64.xq'
--- modules/com/zorba-xquery/www/modules/converters/base64.xq	2012-05-03 12:31:51 +0000
+++ modules/com/zorba-xquery/www/modules/converters/base64.xq	2012-05-08 03:24:31 +0000
@@ -25,6 +25,8 @@
  :)
 module namespace base64 = "http://www.zorba-xquery.com/modules/converters/base64";;
 
+declare namespace zerr = "http://www.zorba-xquery.com/errors";;
+
 declare namespace ver = "http://www.zorba-xquery.com/options/versioning";;
 declare option ver:module-version "2.0";
 
@@ -32,7 +34,7 @@
  : Decode a xs:base64Binary.
  :
  : The function assumes that the content after decoding is valid
- : UTF-8. 
+ : UTF-8.
  :
  : @param $base64 The xs:base64Binary item to decode
  : @return the base64 decoded value as string
@@ -41,6 +43,24 @@
 as xs:string external;
 
 (:~
+ : Decode a xs:base64Binary.
+ :
+ : The function assumes that the content after decoding has
+ : the given encoding.
+ :
+ : @param $base64 The xs:base64Binary item to decode
+ : @param $encoding The encoding of the string after base64 decoding it.
+ :
+ : @return the base64 decoded value as string
+ :
+ : @error zerr:ZXQP0006 if the given encoding is invalid or not supported.
+ :)
+declare function base64:decode(
+  $base64 as xs:base64Binary,
+  $encoding as xs:string)
+as xs:string external;
+
+(:~
  : Encode a xs:string as xs:base64Binary.
  :
  : @param $string The item whose string-value should be encoded

=== modified file 'src/functions/pregenerated/func_base64.cpp'
--- src/functions/pregenerated/func_base64.cpp	2012-05-03 12:31:51 +0000
+++ src/functions/pregenerated/func_base64.cpp	2012-05-08 03:24:31 +0000
@@ -68,6 +68,19 @@
   {
     
 
+    DECL_WITH_KIND(sctx, fn_zorba_base64_decode,
+        (createQName("http://www.zorba-xquery.com/modules/converters/base64","","decode";), 
+        GENV_TYPESYSTEM.BASE64BINARY_TYPE_ONE, 
+        GENV_TYPESYSTEM.STRING_TYPE_ONE, 
+        GENV_TYPESYSTEM.STRING_TYPE_ONE),
+        FunctionConsts::FN_ZORBA_BASE64_DECODE_2);
+
+  }
+
+
+  {
+    
+
     DECL_WITH_KIND(sctx, fn_zorba_base64_encode,
         (createQName("http://www.zorba-xquery.com/modules/converters/base64","","encode";), 
         GENV_TYPESYSTEM.STRING_TYPE_ONE, 

=== modified file 'src/functions/pregenerated/function_enum.h'
--- src/functions/pregenerated/function_enum.h	2012-05-03 12:31:51 +0000
+++ src/functions/pregenerated/function_enum.h	2012-05-08 03:24:31 +0000
@@ -36,6 +36,7 @@
   FN_RESOLVE_URI_1,
   FN_RESOLVE_URI_2,
   FN_ZORBA_BASE64_DECODE_1,
+  FN_ZORBA_BASE64_DECODE_2,
   FN_ZORBA_BASE64_ENCODE_1,
   OP_IS_SAME_NODE_2,
   OP_NODE_BEFORE_2,

=== modified file 'src/runtime/base64/base64_impl.cpp'
--- src/runtime/base64/base64_impl.cpp	2012-05-03 12:31:51 +0000
+++ src/runtime/base64/base64_impl.cpp	2012-05-08 03:24:31 +0000
@@ -15,6 +15,9 @@
  */
 #include "stdafx.h"
 
+#include <sstream>
+#include <zorba/transcode_stream.h>
+
 #include "system/globalenv.h"
 
 #include "diagnostics/xquery_diagnostics.h"
@@ -33,6 +36,7 @@
 {
   store::Item_t lItem;
   zstring lResultString;
+  zstring lEncoding("UTF-8");
   const char* lContent;
   size_t lSize;
   result = NULL;
@@ -42,21 +46,45 @@
 
   consumeNext(lItem, theChildren[0].getp(), planState);
 
+  if (theChildren.size() == 2)
+  {
+    store::Item_t lEncodingItem;
+    consumeNext(lEncodingItem, theChildren[1].getp(), planState);
+    lEncoding = lEncodingItem->getStringValue();
+
+    if (!transcode::is_supported(lEncoding.c_str()))
+    {
+      throw XQUERY_EXCEPTION(
+        zerr::ZXQP0006_UNKNOWN_ENCODING,
+        ERROR_PARAMS( lEncoding ),
+        ERROR_LOC( loc )
+      );
+    }
+  }
+
   if (lItem->isStreamable())
   {
     if (lItem->isEncoded())
     {
-      // decode and eventually transcode
       lResultString = Base64::decode(lItem->getStream());
     }
     else
     {
-      // streamable string eventually transcoding
-      GENV_ITEMFACTORY->createStreamableString(
-          result,
-          lItem->getStream(),
-          lItem->getStreamReleaser(),
-          lItem->isSeekable());
+      if (transcode::is_necessary(lEncoding.c_str()))
+      {
+        transcode::attach(lItem->getStream(), lEncoding.c_str());
+        GENV_ITEMFACTORY->createStreamableString(
+            result,
+            lItem->getStream(),
+            lItem->getStreamReleaser(),
+            lItem->isSeekable());
+      }
+      else
+      {
+        GENV_ITEMFACTORY->createSharedStreamableString(
+            result,
+            lItem);
+      }
     }
   }
   else
@@ -74,10 +102,37 @@
     {
       lResultString.insert(0, lContent, lSize);
     }
-  }
-  if (!result) // otherwise it's a streamable string already
-  {
-    GENV_ITEMFACTORY->createString(result, lResultString);
+
+    if (transcode::is_necessary(lEncoding.c_str()))
+    {
+      try
+      {
+        zstring lTranscodedString;
+        transcode::stream<std::istringstream> lTranscoder(
+            lEncoding.c_str(),
+            lResultString.c_str()
+          );
+        char buf[1024];
+        while (lTranscoder.good())
+        {
+          lTranscoder.read(buf, 1024);
+          lTranscodedString.append(buf, lTranscoder.gcount());
+        }
+        GENV_ITEMFACTORY->createString(result, lTranscodedString);
+      }
+      catch (ZorbaException& e)
+      {
+        throw XQUERY_EXCEPTION(
+          zerr::ZOSE0006_TRANSCODING_ERROR,
+          ERROR_PARAMS( e.what() ),
+          ERROR_LOC( loc )
+        );
+      }
+    }
+    else
+    {
+      GENV_ITEMFACTORY->createString(result, lResultString);
+    }
   }
   STACK_PUSH (true, state);
 

=== modified file 'src/runtime/spec/base64/base64.xml'
--- src/runtime/spec/base64/base64.xml	2012-05-03 12:31:51 +0000
+++ src/runtime/spec/base64/base64.xml	2012-05-08 03:24:31 +0000
@@ -25,8 +25,13 @@
       <zorba:param>xs:base64Binary</zorba:param>
       <zorba:output>xs:string</zorba:output>
     </zorba:signature>
+    <zorba:signature localname="decode" prefix="fn-zorba-base64">
+      <zorba:param>xs:base64Binary</zorba:param>
+      <zorba:param>xs:string</zorba:param> <!-- encoding -->
+      <zorba:output>xs:string</zorba:output>
+    </zorba:signature>
   </zorba:function>
-    
+
 </zorba:iterator>
 
 <!--

=== modified file 'src/store/api/item_factory.h'
--- src/store/api/item_factory.h	2012-05-03 12:31:51 +0000
+++ src/store/api/item_factory.h	2012-05-08 03:24:31 +0000
@@ -97,6 +97,19 @@
       bool seekable = false) = 0;
 
   /**
+   * Create a StreamableStringItem which re-uses the stream from another
+   * Streamable*Item. This will maintain a reference to the original
+   * item to ensure the stream is not cleaned up before we are done with it.
+   *
+   * It only makes sense to use this method if either (a) the dependent item's
+   * stream is seekable and hence re-usable, or (b) you are sure that the
+   * dependent item will not be utilized after this new item is created.
+   */
+  virtual bool createSharedStreamableString(
+      Item_t& result,
+      Item_t& streamble_dependent) = 0;
+
+  /**
    * Specification: [http://www.w3.org/TR/xmlschema-2/#normalizedString]
    * @param value string representation of the value
    */

=== modified file 'src/store/naive/atomic_items.cpp'
--- src/store/naive/atomic_items.cpp	2012-05-04 14:32:28 +0000
+++ src/store/naive/atomic_items.cpp	2012-05-08 03:24:31 +0000
@@ -1673,6 +1673,36 @@
 /*******************************************************************************
   class StreamableStringItem
 ********************************************************************************/
+StreamableStringItem::StreamableStringItem(
+    std::istream& aStream,
+    StreamReleaser streamReleaser,
+    bool seekable) :
+  theIstream(aStream),
+  theIsMaterialized(false),
+  theIsConsumed(false),
+  theIsSeekable(seekable),
+  theStreamReleaser(streamReleaser),
+  theStreamableDependent(nullptr)
+{
+}
+
+StreamableStringItem::StreamableStringItem(
+    store::Item_t& aStreamableDependent) :
+  theIstream(aStreamableDependent->getStream()),
+  theIsMaterialized(false),
+  theIsConsumed(false),
+  theIsSeekable(aStreamableDependent->isSeekable()),
+  theStreamReleaser(nullptr),
+  theStreamableDependent(aStreamableDependent)
+{
+  ZORBA_ASSERT(theStreamableDependent->isStreamable());
+
+  // We copied the dependent item's stream and seekable flag in the initializer
+  // above, but did NOT copy the StreamReleaser. The dependent item maintains
+  // memory ownership of the stream in this way.
+}
+
+
 void StreamableStringItem::appendStringValue(zstring& aBuf) const
 {
   if (!theIsMaterialized) 

=== modified file 'src/store/naive/atomic_items.h'
--- src/store/naive/atomic_items.h	2012-05-03 12:31:51 +0000
+++ src/store/naive/atomic_items.h	2012-05-08 03:24:31 +0000
@@ -875,6 +875,8 @@
 
   StreamReleaser theStreamReleaser;
 
+  store::Item_t theStreamableDependent;
+
 public:
   bool equals(
         store::Item const*,
@@ -922,15 +924,10 @@
   StreamableStringItem(
       std::istream& aStream,
       StreamReleaser streamReleaser,
-      bool seekable = false)
-    :
-    theIstream(aStream),
-    theIsMaterialized(false),
-    theIsConsumed(false),
-    theIsSeekable(seekable),
-    theStreamReleaser(streamReleaser)
-  {
-  }
+      bool seekable = false);
+
+  StreamableStringItem(
+      store::Item_t& aStreamableDependent);
 
   void materialize() const;
 };

=== modified file 'src/store/naive/simple_item_factory.cpp'
--- src/store/naive/simple_item_factory.cpp	2012-05-03 12:31:51 +0000
+++ src/store/naive/simple_item_factory.cpp	2012-05-08 03:24:31 +0000
@@ -157,6 +157,14 @@
   return true;
 }
 
+bool BasicItemFactory::createSharedStreamableString(
+    store::Item_t &result,
+    store::Item_t &streamable_dependent)
+{
+  result = new StreamableStringItem( streamable_dependent );
+  return true;
+}
+
 
 bool BasicItemFactory::createNormalizedString(store::Item_t& result, zstring& value)
 {

=== modified file 'src/store/naive/simple_item_factory.h'
--- src/store/naive/simple_item_factory.h	2012-05-03 12:31:51 +0000
+++ src/store/naive/simple_item_factory.h	2012-05-08 03:24:31 +0000
@@ -106,6 +106,10 @@
       StreamReleaser,
       bool seekable = false);
 
+  bool createSharedStreamableString(
+      store::Item_t& result,
+      store::Item_t& streamable_dependent);
+
   bool createBase64Binary(store::Item_t& result, xs_base64Binary value);
 
   bool createBase64Binary(

=== added file 'test/rbkt/ExpQueryResults/zorba/base64/decode_iso-8859-1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/base64/decode_iso-8859-1.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/base64/decode_iso-8859-1.xml.res	2012-05-08 03:24:31 +0000
@@ -0,0 +1,1 @@
+äöü

=== added file 'test/rbkt/ExpQueryResults/zorba/base64/decode_iso-8859-1_file.xml.res'
--- test/rbkt/ExpQueryResults/zorba/base64/decode_iso-8859-1_file.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/base64/decode_iso-8859-1_file.xml.res	2012-05-08 03:24:31 +0000
@@ -0,0 +1,1 @@
+äöü

=== added file 'test/rbkt/ExpQueryResults/zorba/base64/reuse-stream.xml.res'
--- test/rbkt/ExpQueryResults/zorba/base64/reuse-stream.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/base64/reuse-stream.xml.res	2012-05-08 03:24:31 +0000
@@ -0,0 +1,1 @@
+<file><content>f0VMRgEBAQAAAAAAAAAAAAMAAwABAAAAIPxDADQAAAD0JTMHAAAAADQAIAAHACgAKAAlAAEAAAAAAAAAAAAAAAAAAAAv22gBL9toAQUAAAAAEAAAAQAAAJTbaAGU62gBlOtoAehnBwAwpgcABgAAAAAQAAACAAAAvKBuAbywbgG8sG4BGAEAABgBAAAGAAAABAAAAAQAAAAUAQAAFAEAABQBAAAkAAAAJAAAAAQAAAAEAAAAUOV0ZKyHJwGshycBrIcnAfyBCAD8gQgABAAAAAQAAABR5XRkAAAAAAAAAAAAAAAAAAAAAAAAAAAGAAAABAAAAFLldGSU22gBlOtoAZTraAFs5AUAbOQFAAQAAAABAAAABAAAABQAAAADAAAAR05VANMOFlHRDhyfmkw9H3+lYuAncwGhG0AAAAYDAAAAIAAAEgAAABBkRVIkEIgUIwhRukiBHLCOBAAEAgggoSCCg0CkED5YAAAGFAIgABkACJAAiQAwAFCQEwAQIABBAIIFYAIACAASAAZARACAABkTpAQB5EQIYEF0IBIAIgAhciYEoEICZGgAAQAABCAAkBACZAJAAQQUEhGAIIAAKBULIJYAAABRAKFV4AIWgQAEIYgAAIAAQECAMgBGiCIGiRAEkAABAMAIgEQAAAQAACgIJYAAAEAAAAGAAgtCpFwEAAZRQACAABAICAASiBBFgRhIAkEBMAApJchACQZDAIAAgABIAKQEgKEACGE05FgCAAAgACEAgEAAQoxkUAEAiBAGAAkCFiBoISRFFCBEBMV3gARBgBAosUJBQQAEEDIAUUCBAABAEAhgAEAAAAAAACApACIBBgHKkAWBQAAEAAAAAACDURggAkEEEIlEgIEADQZwAAATC5AQNYxCABABAAAAAAEAlQD0WGD3gAAAIEDKCCAQQMGNIAAQAAAACECCAADAbMAWABABaRAAUFQCgAAAgxJmDAEIAiIZCmIAIAIEgAESCSAjETEQIws2A8AIgAEAAAAAkGkAiEAKDEYEBRGJhFEAIACICAIBKMAGAAAHIAQBqIgAUAECgAAAAhoEER4BASMBBhEIIQGCAFbg8IiBYAgCAQgAgEBwCAAAIAEAQATGAj+QQaAIIQAIAAShAgg2AKQAAAQBkCoASAAAFAAgAYFqAQDCcoOEg0sAIhKAABQAAgpAAghAAIQgIBAkAAQBDA4gRAACAAIgAPBAUgBDARCAggB0GQBaEAg1Mk8CABEAAAQgBBgAECIAvEIhFCRAIBgAAEALyBEQAhACQoIKwAABkBiAVM4QAQlFgBAS4AASAIIkAZEkEgAsBMCABSCAKACAIBMAD4JSLEyAAGQBBqATAUECQUgACQGBGA==</content><md5>f027633a677f42caf1544f6913bc2dab</md5></file>
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/base64/decode_iso-8859-1.xq'
--- test/rbkt/Queries/zorba/base64/decode_iso-8859-1.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/base64/decode_iso-8859-1.xq	2012-05-08 03:24:31 +0000
@@ -0,0 +1,3 @@
+import module namespace b = "http://www.zorba-xquery.com/modules/converters/base64";;
+
+b:decode(xs:base64Binary("5Pb8Cg=="), "ISO-8859-1")

=== added file 'test/rbkt/Queries/zorba/base64/decode_iso-8859-1_file.xq'
--- test/rbkt/Queries/zorba/base64/decode_iso-8859-1_file.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/base64/decode_iso-8859-1_file.xq	2012-05-08 03:24:31 +0000
@@ -0,0 +1,5 @@
+import module namespace f = "http://expath.org/ns/file";;
+import module namespace b = "http://www.zorba-xquery.com/modules/converters/base64";;
+
+b:decode(xs:base64Binary(f:read-text(resolve-uri("iso-8859-1.txt"))), "ISO-8859-1")
+

=== added file 'test/rbkt/Queries/zorba/base64/iso-8859-1.txt'
--- test/rbkt/Queries/zorba/base64/iso-8859-1.txt	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/base64/iso-8859-1.txt	2012-05-08 03:24:31 +0000
@@ -0,0 +1,1 @@
+5Pb8Cg==
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/base64/reuse-stream.xq'
--- test/rbkt/Queries/zorba/base64/reuse-stream.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/base64/reuse-stream.xq	2012-05-08 03:24:31 +0000
@@ -0,0 +1,16 @@
+(: From bug 996084. We need to ensure that a streamable base64 item can be re-used. :)
+
+import module namespace base64="http://www.zorba-xquery.com/modules/converters/base64";;
+import module namespace hash = "http://www.zorba-xquery.com/modules/cryptography/hash";;
+import module namespace file = "http://expath.org/ns/file";;
+
+declare variable $filename as xs:anyURI := resolve-uri("decoded");
+
+let $data as xs:base64Binary := file:read-binary ($filename)
+let $md5 as xs:string := hash:md5(base64:decode($data))
+
+return 
+  <file>
+    <content>{$data}</content>
+    <md5>{$md5}</md5>
+  </file>


Follow ups