← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/ft-base64Binary into lp:zorba

 

Matthias Brantner has proposed merging lp:~zorba-coders/zorba/ft-base64Binary into lp:zorba.

Requested reviews:
  Zorba Coders (zorba-coders)

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/ft-base64Binary/+merge/94074

more efficient implementation of xs:base64Binary items
-- 
https://code.launchpad.net/~zorba-coders/zorba/ft-base64Binary/+merge/94074
Your team Zorba Coders is requested to review the proposed merge of lp:~zorba-coders/zorba/ft-base64Binary into lp:zorba.
=== modified file 'ChangeLog'
--- ChangeLog	2012-02-21 19:20:00 +0000
+++ ChangeLog	2012-02-22 01:40:29 +0000
@@ -20,6 +20,7 @@
   * Fixed bug #911585 (management of variables during eval)
   * Fixed bug #866423 (fn:empty and fn:exists iterators must reset their input in
     case of early-out)
+  * More efficient implementation for base64Binary items
   * Added index management function to the C++ api's StaticCollectionManager.
   * Fixed bug #872288 (reset recursive flag during node rename)
   * Fixed bug #905041 (allow for the default element and function namespaces to be

=== modified file 'include/zorba/item.h'
--- include/zorba/item.h	2012-01-11 17:30:25 +0000
+++ include/zorba/item.h	2012-02-22 01:40:29 +0000
@@ -363,6 +363,14 @@
   std::istream&
   getStream();
 
+  /**
+   * Returns true if the contents of a binary item is already encoded
+   * 
+   * @return true if the content is already encoded, false otherwise
+   */
+  bool
+  isEncoded() const;
+
   /** \brief Returns the name of the collection this node is stored in.
    *
    * @return The name of the collection or 0 if the given item is not

=== modified file 'include/zorba/item_factory.h'
--- include/zorba/item_factory.h	2012-01-11 17:30:25 +0000
+++ include/zorba/item_factory.h	2012-02-22 01:40:29 +0000
@@ -149,6 +149,25 @@
       virtual Item 
       createBase64Binary(const unsigned char* aBinData, size_t aLength) = 0;
 
+      /** \brief Creates a streamable Base64Binary Item
+       *         see [http://www.w3.org/TR/xmlschema-2/#base64Binary]
+       *
+       * @param stream An istream whence to read the binary's content.
+       * @param streamReleaser A function pointer which is invoked once
+       *        the StreamableBase64Binary is destroyed. Normally this function
+       *        will delete the std::istream object passed to it.
+       * @param seekable is the given stream seekable
+       * @param encoded is the contents of the given stream already base64
+       *        encoded
+       * @return The streamable String Item
+       */
+      virtual Item
+      createStreamableBase64Binary(
+          std::istream &stream,
+          StreamReleaser streamReleaser,
+          bool seekable = false,
+          bool encoded = false) = 0;
+
       /** \brief Creates a Boolean Item
        *         see [http://www.w3.org/TR/xmlschema-2/#bool]
        *

=== modified file 'modules/com/zorba-xquery/www/modules/converters/base64.xq'
--- modules/com/zorba-xquery/www/modules/converters/base64.xq	2011-08-01 10:18:53 +0000
+++ modules/com/zorba-xquery/www/modules/converters/base64.xq	2012-02-22 01:40:29 +0000
@@ -28,14 +28,17 @@
 declare namespace ver = "http://www.zorba-xquery.com/options/versioning";;
 declare option ver:module-version "2.0";
 
-
 (:~
  : Decode a xs:base64Binary.
  :
+ : The function assumes that the content after decoding is valid
+ : UTF-8. 
+ :
  : @param $base64 The xs:base64Binary item to decode
- : @return the decoded xs:base64Binary item as string
+ : @return the base64 decoded value as string
  :)
-declare function base64:decode($base64 as xs:base64Binary) as xs:string external;
+declare function base64:decode($base64 as xs:base64Binary)
+as xs:string external;
 
 (:~
  : Encode a xs:string as xs:base64Binary.

=== modified file 'modules/org/expath/ns/file.xq.src/file.cpp'
--- modules/org/expath/ns/file.xq.src/file.cpp	2012-02-16 14:11:02 +0000
+++ modules/org/expath/ns/file.xq.src/file.cpp	2012-02-22 01:40:29 +0000
@@ -144,19 +144,14 @@
   // actual read
   Item lItem;
   try {
-    std::ifstream lInStream;
-    lFile->openInputStream(lInStream, true, false);
-
-    std::stringstream lStrStream;
-    char lBuf[1024];
-    while (!lInStream.eof()) {
-      lInStream.read(lBuf, 1024);
-      lStrStream.write(lBuf, lInStream.gcount());
-    }  
-
-    String lContent(lStrStream.str());
-    String lEncodedContent = encoding::Base64::encode(lContent);
-    lItem = theModule->getItemFactory()->createBase64Binary(lEncodedContent.data(), lEncodedContent.size());
+    std::unique_ptr<std::ifstream> lInStream;
+    lInStream.reset( new std::ifstream() );
+    lFile->openInputStream(*lInStream.get(), true, false);
+
+    lItem = theModule->getItemFactory()->createStreamableBase64Binary(
+        *lInStream.release(), &FileModule::streamReleaser, true
+      );
+
   } catch (ZorbaException& ze) {
     std::stringstream lSs;
     lSs << "An unknown error occured: " << ze.what() << "Can not read file";

=== modified file 'modules/org/expath/ns/file.xq.src/file_function.cpp'
--- modules/org/expath/ns/file.xq.src/file_function.cpp	2012-02-16 14:11:02 +0000
+++ modules/org/expath/ns/file.xq.src/file_function.cpp	2012-02-22 01:40:29 +0000
@@ -26,6 +26,7 @@
 #include <zorba/user_exception.h>
 #include <zorba/util/path.h>
 #include <zorba/xquery_functions.h>
+#include <zorba/singleton_item_sequence.h>
 #include <zorba/zorba.h>
 
 #include "file_module.h"
@@ -255,10 +256,25 @@
 
     // if this is a binary write
     if (lBinary) {
-      Zorba_SerializerOptions lOptions;
-      lOptions.ser_method = ZORBA_SERIALIZATION_METHOD_BINARY;
-      Serializer_t lSerializer = Serializer::createSerializer(lOptions);
-      lSerializer->serialize(aArgs[1], lOutStream);
+      Item lBinaryItem;
+      Iterator_t lContentSeq = aArgs[1]->getIterator();
+      lContentSeq->open();
+      while (lContentSeq->next(lBinaryItem))
+      {
+        if (lBinaryItem.isStreamable() && !lBinaryItem.isEncoded())
+        {
+          lOutStream << lBinaryItem.getStream().rdbuf();
+        }
+        else
+        {
+          Zorba_SerializerOptions lOptions;
+          lOptions.ser_method = ZORBA_SERIALIZATION_METHOD_BINARY;
+          Serializer_t lSerializer = Serializer::createSerializer(lOptions);
+          SingletonItemSequence lSeq(lBinaryItem);
+          lSerializer->serialize(&lSeq, lOutStream);
+        }
+
+      }
     }
     // if we only write text
     else {

=== modified file 'src/api/item.cpp'
--- src/api/item.cpp	2012-02-02 09:56:52 +0000
+++ src/api/item.cpp	2012-02-22 01:40:29 +0000
@@ -470,6 +470,17 @@
   // TODO: throw exception
 }
 
+bool
+Item::isEncoded() const
+{
+  ITEM_TRY
+    SYNC_CODE(AutoLock lock(GENV_STORE.getGlobalLock(), Lock::READ);)
+
+    return m_item->isEncoded();
+  ITEM_CATCH
+  // TODO: throw exception
+}
+
 Item
 Item::getCollectionName() const
 {

=== modified file 'src/api/itemfactoryimpl.cpp'
--- src/api/itemfactoryimpl.cpp	2012-01-11 17:30:25 +0000
+++ src/api/itemfactoryimpl.cpp	2012-02-22 01:40:29 +0000
@@ -212,13 +212,32 @@
   std::stringstream lSs;
   while (aEncodedStream.good()) 
   {
-    lSs.put(aEncodedStream.get());
+    char c = aEncodedStream.get();
+    if (aEncodedStream.good())
+    {
+      lSs.put(c);
+    }
   }
   std::string lContent = lSs.str();
   return createBase64Binary(lContent.c_str(), lContent.size());
 }
 
 
+Item
+ItemFactoryImpl::createStreamableBase64Binary(
+    std::istream &stream,
+    StreamReleaser streamReleaser,
+    bool seekable,
+    bool encoded)
+{
+  store::Item_t lItem;
+  theItemFactory->createStreamableBase64Binary(
+      lItem, stream, streamReleaser, seekable, encoded
+    );
+  return &*lItem;
+}
+
+
 Item ItemFactoryImpl::createBoolean(bool aValue)
 {
   store::Item_t lItem;

=== modified file 'src/api/itemfactoryimpl.h'
--- src/api/itemfactoryimpl.h	2012-01-11 17:30:25 +0000
+++ src/api/itemfactoryimpl.h	2012-02-22 01:40:29 +0000
@@ -69,6 +69,13 @@
       virtual Item 
       createBase64Binary(const unsigned char* aBinData, size_t aLength);
 
+      virtual Item
+      createStreamableBase64Binary(
+          std::istream &stream,
+          StreamReleaser streamReleaser,
+          bool seekable = false,
+          bool encoded = false);
+
       virtual Item 
       createBoolean(bool aValue);
     

=== modified file 'src/api/options.cpp'
--- src/api/options.cpp	2012-01-11 17:30:25 +0000
+++ src/api/options.cpp	2012-02-22 01:40:29 +0000
@@ -63,6 +63,7 @@
     else if (strcmp(value, "html") == 0) ser_method = ZORBA_SERIALIZATION_METHOD_HTML;
     else if (strcmp(value, "xhtml") == 0) ser_method = ZORBA_SERIALIZATION_METHOD_XHTML;
     else if (strcmp(value, "text") == 0) ser_method = ZORBA_SERIALIZATION_METHOD_TEXT;
+    else if (strcmp(value, "binary") == 0) ser_method = ZORBA_SERIALIZATION_METHOD_BINARY;
     else
     {
       ; // TODO signal errors for incorrect values?

=== modified file 'src/api/serialization/serializer.cpp'
--- src/api/serialization/serializer.cpp	2012-01-11 17:30:25 +0000
+++ src/api/serialization/serializer.cpp	2012-02-22 01:40:29 +0000
@@ -368,22 +368,50 @@
 void serializer::emitter::emit_streamable_item(store::Item* item)
 {
   // Streamable item
-  char buffer[1024];
-  int rollover = 0;
-  std::streambuf *  pbuf;
-  std::streamsize   read_bytes;
-  std::istream& is = item->getStream();
-
-  // read bytes and do string expansion
-  do
-  {
-    //std::istream::read uses a try/catch internally so the Zorba_Exception is lost: that is why we are using std::streambuf::sgetn
-    pbuf = is.rdbuf();
-    read_bytes = pbuf->sgetn(buffer + rollover, 1024 - rollover);
-    rollover = emit_expanded_string(buffer, static_cast<zstring::size_type>(read_bytes + rollover));
-    memmove(buffer, buffer + 1024 - rollover, rollover);
-  }
-  while (read_bytes > 0);
+  store::SchemaTypeCode lTypeCode = item->getTypeCode();
+
+  switch (lTypeCode)
+  {
+  case store::XS_STRING:
+  {
+    char buffer[1024];
+    int rollover = 0;
+    std::streambuf *  pbuf;
+    std::streamsize   read_bytes;
+    std::istream& is = item->getStream();
+
+    // read bytes and do string expansion
+    do
+    {
+      //std::istream::read uses a try/catch internally so the Zorba_Exception is lost: that is why we are using std::streambuf::sgetn
+      pbuf = is.rdbuf();
+      read_bytes = pbuf->sgetn(buffer + rollover, 1024 - rollover);
+      rollover = emit_expanded_string(buffer, static_cast<zstring::size_type>(read_bytes + rollover));
+      memmove(buffer, buffer + 1024 - rollover, rollover);
+    }
+    while (read_bytes > 0);
+    break;
+  }
+  case store::XS_BASE64BINARY:
+  {
+    if (item->isEncoded())
+    {
+      std::istream& is = item->getStream();
+      char buf[1024];
+      while (is.good())
+      {
+        is.read(buf, 1024);
+        tr.write(buf, is.gcount());
+      }
+    }
+    else
+    {
+      tr << item->getStringValue();
+    }
+    break;
+  }
+  default: assert(false);
+  }
 
 }
 
@@ -1866,30 +1894,38 @@
 ********************************************************************************/
 void serializer::binary_emitter::emit_item(store::Item* item)
 {
-  xs_base64Binary lValue;
-
-  // First assume the item is a base64Binary item and try to get its value.
-  try
-  {
-    lValue = item->getBase64BinaryValue();
-  }
-  catch (...)
-  {
-    // If this fails, then just get the string value of the item and convert
-    // it to base64
-    zstring lStringValue;
-    item->getStringValue2(lStringValue);
-    Base64::encode(lStringValue, lValue);
-  }
-
-  std::vector<char> lDecodedData;
-  lValue.decode(lDecodedData);
-
-  for (std::vector<char>::const_iterator lIter = lDecodedData.begin();
-       lIter != lDecodedData.end();
-       ++lIter)
-  {
-    tr << *lIter;
+  if (item->isStreamable())
+  {
+    std::istream& stream = item->getStream();
+    if (item->isEncoded())
+    {
+      tr << Base64::decode(stream);
+    }
+    else
+    {
+      char buf[1024];
+      while (!stream.eof())
+      {
+        stream.read(buf, 1024);
+        tr.write(buf, stream.gcount());
+      }
+    }
+  }
+  else
+  {
+    char* value;
+    size_t len = item->getBase64BinaryValue(value);
+
+    if (item->isEncoded())
+    {
+      std::stringstream tmp;
+      tmp.write(value, len);
+      tr << Base64::decode(tmp);
+    }
+    else
+    {
+      tr.write(value, len);
+    }
   }
 }
 

=== modified file 'src/runtime/base64/base64_impl.cpp'
--- src/runtime/base64/base64_impl.cpp	2011-06-14 17:26:33 +0000
+++ src/runtime/base64/base64_impl.cpp	2012-02-22 01:40:29 +0000
@@ -22,37 +22,74 @@
 
 #include "runtime/base64/base64.h"
 
-
 #include "store/api/item.h"
 #include "store/api/item_factory.h"
 
 namespace zorba {
 
-bool Base64DecodeIterator::nextImpl(store::Item_t& result, PlanState& planState) const
+bool Base64DecodeIterator::nextImpl(
+    store::Item_t& result,
+    PlanState& planState) const
 {
   store::Item_t lItem;
-  Base64 lDecodedData;
   zstring lResultString;
+  char* lContent;
+  size_t lSize;
+  result = NULL;
 
   PlanIteratorState *state;
   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
 
-  if (consumeNext(lItem, theChildren[0].getp(), planState))
-  {
-    lDecodedData = lItem->getBase64BinaryValue();
-    lResultString = lDecodedData.decode().str();
+  consumeNext(lItem, theChildren[0].getp(), planState);
+
+  if (lItem->isStreamable())
+  {
+    if (lItem->isEncoded())
+    {
+      // decode and eventually transcode
+      lResultString = Base64::decode(lItem->getStream());
+    }
+    else
+    {
+      // streamable string eventually transcoding
+      GENV_ITEMFACTORY->createStreamableString(
+          result,
+          lItem->getStream(),
+          lItem->getStreamReleaser(),
+          lItem->isSeekable());
+    }
+  }
+  else
+  {
+    lSize = lItem->getBase64BinaryValue(lContent);
+
+    if (lItem->isEncoded())
+    {
+      std::vector<char> encoded(lContent, lContent+lSize);
+      std::vector<char> decoded;
+      Base64::decode(encoded, decoded);
+      lResultString.insert(0, &decoded[0], decoded.size());
+    }
+    else
+    {
+      lResultString.insert(0, lContent, lSize);
+    }
+  }
+  if (!result) // otherwise it's a streamable string already
+  {
     GENV_ITEMFACTORY->createString(result, lResultString);
-    STACK_PUSH (true, state);
   }
+  STACK_PUSH (true, state);
 
   STACK_END (state);
 }
 
 
-bool Base64EncodeIterator::nextImpl(store::Item_t& result, PlanState& planState) const
+bool Base64EncodeIterator::nextImpl(
+    store::Item_t& result,
+    PlanState& planState) const
 {
   store::Item_t lItem;
-  Base64        lBase64;
   zstring       lTmpString;
 
   PlanIteratorState* state;
@@ -61,17 +98,12 @@
   if (consumeNext(lItem, theChildren[0].getp(), planState)) 
   {
     lItem->getStringValue2(lTmpString);
-    Base64::encode(lTmpString, lBase64);
-    if (GENV_ITEMFACTORY->createBase64Binary(result, lBase64)) 
-    {
-      STACK_PUSH (true, state);
-    }
-    else
-    {
-      throw XQUERY_EXCEPTION(
-        zerr::ZXQP0025_ITEM_CREATION_FAILED, ERROR_LOC( loc )
-      );
-    } 
+    // create a base64Binary item
+    // the content is the non-encoded string
+    GENV_ITEMFACTORY->createBase64Binary(
+          result, lTmpString.c_str(), lTmpString.size(), false
+        );
+    STACK_PUSH (true, state);
   }
   STACK_END (state);
 }

=== modified file 'src/store/api/item.h'
--- src/store/api/item.h	2012-02-07 15:53:23 +0000
+++ src/store/api/item.h	2012-02-22 01:40:29 +0000
@@ -274,7 +274,14 @@
 
   /** Accessor for xs:base64Binary
    */
-  virtual xs_base64Binary getBase64BinaryValue() const;
+  virtual size_t getBase64BinaryValue(char*& data) const;
+
+  /**
+   * Checks whether a base64 item's content is already encoded
+   *
+   * @return true only if it is.
+   */
+  virtual bool isEncoded() const;
 
   /** Accessor for xs:boolean
    */

=== modified file 'src/store/api/item_factory.h'
--- src/store/api/item_factory.h	2011-12-21 14:40:33 +0000
+++ src/store/api/item_factory.h	2012-02-22 01:40:29 +0000
@@ -205,6 +205,31 @@
   virtual bool createBase64Binary(Item_t& result, xs_base64Binary value) = 0;
 
   /**
+   * Specification: [http://www.w3.org/TR/xmlschema-2/#base64Binary]
+   * creates a base64Binary item with the given content
+   * the encoded flag specifies whether the given content is already
+   * base64 encoded or not.
+   */
+  virtual bool createBase64Binary(
+      Item_t& result,
+      const char* value,
+      size_t size,
+      bool encoded) = 0;
+
+  /**
+   * Specification: [http://www.w3.org/TR/xmlschema-2/#base64Binary]
+   * the encoded flag specifies whether the given content is already
+   * base64 encoded or not.
+   */
+  virtual bool createStreamableBase64Binary(
+      Item_t& result,
+      std::istream&,
+      StreamReleaser,
+      bool seekable = false,
+      bool encoded = false) = 0;
+
+
+  /**
    * Specification: [http://www.w3.org/TR/xmlschema-2/#bool]
    * @param value
    */

=== modified file 'src/store/naive/atomic_items.cpp'
--- src/store/naive/atomic_items.cpp	2012-02-15 10:25:02 +0000
+++ src/store/naive/atomic_items.cpp	2012-02-22 01:40:29 +0000
@@ -3059,6 +3059,45 @@
 /*******************************************************************************
   class Base64BinaryItem
 ********************************************************************************/
+bool
+Base64BinaryItem::equals(
+      const store::Item* other,
+      long timezone,
+      const XQPCollator* aCollation) const
+{
+  if (isEncoded() == other->isEncoded())
+  {
+    char* this_data, *other_data;
+    size_t this_size = getBase64BinaryValue(this_data);
+    size_t other_size = other->getBase64BinaryValue(other_data);
+    return this_size == other_size &&
+      memcmp(this_data, other_data, this_size) == 0;
+  }
+  else
+  {
+    return getStringValue().compare(other->getStringValue()) == 0;
+  }
+}
+
+
+uint32_t
+Base64BinaryItem::hash(long timezone, const XQPCollator* aCollation) const
+{
+  // always need to hash on the string-value because otherwise
+  // a base64 item that is encoded would have a different hash-value
+  // as a base64 item that is decoded but represents the same binary content
+  return utf8::hash(getStringValue(), aCollation);
+}
+
+
+size_t
+Base64BinaryItem::getBase64BinaryValue(char*& data) const
+{
+  data = const_cast<char*>(&theValue[0]);
+  return theValue.size();
+}
+
+
 store::Item* Base64BinaryItem::getType() const
 {
   return GET_STORE().theSchemaTypeNames[store::XS_BASE64BINARY];
@@ -3067,19 +3106,51 @@
 
 zstring Base64BinaryItem::getStringValue() const
 {
-  return theValue.str();
+  if (theIsEncoded)
+  {
+    zstring tmp(&theValue[0], theValue.size());
+    return tmp;
+  }
+  else
+  {
+    std::vector<char> encoded;
+    encoded.reserve(theValue.size());
+    Base64::encode(theValue, encoded);
+    zstring tmp(&encoded[0], encoded.size());
+    return tmp;
+  }
 }
 
 
 void Base64BinaryItem::getStringValue2(zstring& val) const
 {
-  val = theValue.str();
+  if (theIsEncoded)
+  {
+    val.insert(0, &theValue[0], theValue.size());
+  }
+  else
+  {
+    std::vector<char> encoded;
+    encoded.reserve(theValue.size());
+    Base64::encode(theValue, encoded);
+    val.insert(0, &encoded[0], encoded.size());
+  }
 }
 
 
 void Base64BinaryItem::appendStringValue(zstring& buf) const
 {
-  buf += theValue.str();
+  if (theIsEncoded)
+  {
+    buf.insert(buf.size(), &theValue[0], theValue.size());
+  }
+  else
+  {
+    std::vector<char> encoded;
+    encoded.reserve(theValue.size());
+    Base64::encode(theValue, encoded);
+    buf.insert(buf.size(), &encoded[0], encoded.size());
+  }
 }
 
 
@@ -3092,9 +3163,155 @@
 }
 
 
-uint32_t Base64BinaryItem::hash(long timezone, const XQPCollator* aCollation) const
-{
-  return theValue.hash();
+/*******************************************************************************
+  class StreamableStringItem
+********************************************************************************/
+zstring StreamableBase64BinaryItem::getStringValue() const
+{
+  if (!theIsMaterialized)
+  {
+    materialize();
+  }
+  return Base64BinaryItem::getStringValue();
+}
+
+
+void StreamableBase64BinaryItem::getStringValue2(zstring& val) const
+{
+  if (!theIsMaterialized)
+  {
+    materialize();
+  }
+  Base64BinaryItem::getStringValue2(val);
+}
+
+
+void StreamableBase64BinaryItem::appendStringValue(zstring& buf) const
+{
+  if (!theIsMaterialized)
+  {
+    materialize();
+  }
+  Base64BinaryItem::appendStringValue(buf);
+}
+
+
+zstring StreamableBase64BinaryItem::show() const
+{
+  if (!theIsMaterialized)
+  {
+    materialize();
+  }
+  zstring res("xs:base64Binary(");
+  appendStringValue(res);
+  res += ")";
+  return res;
+}
+
+
+uint32_t
+StreamableBase64BinaryItem::hash(long timezone, const XQPCollator* aCollation) const
+{
+  if (!theIsMaterialized)
+  {
+    materialize();
+  }
+  return Base64BinaryItem::hash(timezone, aCollation);
+}
+
+
+size_t
+StreamableBase64BinaryItem::getBase64BinaryValue(char*& data) const
+{
+  if (!theIsMaterialized)
+  {
+    materialize();
+  }
+  return Base64BinaryItem::getBase64BinaryValue(data);
+}
+
+
+bool StreamableBase64BinaryItem::isStreamable() const
+{
+  return true;
+}
+
+
+bool StreamableBase64BinaryItem::isSeekable() const
+{
+  return theIsSeekable;
+}
+
+
+StreamReleaser StreamableBase64BinaryItem::getStreamReleaser()
+{
+  return theStreamReleaser;
+}
+
+
+void StreamableBase64BinaryItem::setStreamReleaser(StreamReleaser aReleaser)
+{
+  theStreamReleaser = aReleaser;
+}
+
+
+std::istream& StreamableBase64BinaryItem::getStream()
+{
+  // a non-seekable stream can only be consumed once
+  // we raise an error if getStream is called twice
+  // if a query requires a stream to be consumed more than once,
+  // the query needs to make sure that the stream is explicitly
+  // materialized before
+  if (!theIsSeekable && theIsConsumed) 
+  {
+    throw ZORBA_EXCEPTION( zerr::ZSTR0055_STREAMABLE_STRING_CONSUMED );
+  }
+  else
+  {
+    // if the stream is seekable, we seek to the beginning.
+    // We are not using theIstream.seekg because the USER_ERROR that is thrown
+    // by Zorba is lost possibly in an internal try/catch of the seekg
+    std::streambuf * pbuf;
+    pbuf = theIstream.rdbuf();
+    pbuf->pubseekoff(0, std::ios::beg);
+  }
+  theIsConsumed = true;
+  return theIstream;
+}
+
+
+void StreamableBase64BinaryItem::materialize() const
+{
+  StreamableBase64BinaryItem* const s
+    = const_cast<StreamableBase64BinaryItem*>(this);
+  std::istream& lStream = s->getStream();
+
+  s->theIsMaterialized = true;
+  s->theIsConsumed = true;
+
+  if (isSeekable())
+  {
+    lStream.seekg(0, std::ios::end);
+    size_t len = lStream.tellg();
+    lStream.seekg(0, std::ios::beg);
+    s->theValue.reserve(len);
+    char buf[1024];
+    while (lStream.good())
+    {
+      lStream.read(buf, 1024);
+      s->theValue.insert(s->theValue.end(), buf, buf+lStream.gcount());
+    }
+  }
+  else
+  {
+    char buf[4048];
+    while (lStream.good())
+    {
+      lStream.read(buf, 4048);
+      s->theValue.reserve(s->theValue.size() + lStream.gcount());
+      s->theValue.insert(s->theValue.end(), buf, buf+lStream.gcount());
+    }
+  }
 }
 
 

=== modified file 'src/store/naive/atomic_items.h'
--- src/store/naive/atomic_items.h	2012-01-26 19:56:14 +0000
+++ src/store/naive/atomic_items.h	2012-02-22 01:40:29 +0000
@@ -20,6 +20,7 @@
 #include <zorba/config.h>
 #include <iostream>
 #include <vector>
+#include <cstring>
 
 #include <zorba/streams.h>
 #ifndef ZORBA_NO_FULL_TEXT
@@ -153,7 +154,10 @@
 
   const zstring& getString() const { return theBaseItem->getString(); }
 
-  xs_base64Binary getBase64BinaryValue() const { return theBaseItem->getBase64BinaryValue(); }
+  size_t getBase64BinaryValue(char*& c) const
+  {
+    return theBaseItem->getBase64BinaryValue(c);
+  }
 
   xs_hexBinary getHexBinaryValue() const { return theBaseItem->getHexBinaryValue(); }
 
@@ -2218,30 +2222,115 @@
   friend class BasicItemFactory;
 
 protected:
-  xs_base64Binary theValue;
+  std::vector<char> theValue;
+  bool        theIsEncoded;
 
 protected:
-  Base64BinaryItem(xs_base64Binary aValue) : theValue(aValue) {}
+  Base64BinaryItem(bool aIsEncoded)
+    : theIsEncoded(aIsEncoded) {}
 
-  Base64BinaryItem() {}
+  Base64BinaryItem(const char* aValue, size_t aSize, bool aIsEncoded = true)
+    : theIsEncoded(aIsEncoded)
+  {
+    theValue.reserve(aSize);
+    theValue.insert(theValue.begin(), aValue, aValue + aSize);
+  }
 
 public:
-  xs_base64Binary getBase64BinaryValue() const { return theValue; }
+  size_t getBase64BinaryValue(char*& data) const;
 
   store::SchemaTypeCode getTypeCode() const { return store::XS_BASE64BINARY; }
 
   store::Item* getType() const;
 
+  bool isEncoded() const { return theIsEncoded; }
+
   uint32_t hash(long timezone = 0, const XQPCollator* aCollation = 0) const;
 
   bool equals(
         const store::Item* other,
         long timezone = 0,
-        const XQPCollator* aCollation = 0 ) const
-  {
-    return theValue.equal(other->getBase64BinaryValue());
+        const XQPCollator* aCollation = 0 ) const;
+
+  zstring getStringValue() const;
+
+  void getStringValue2(zstring& val) const;
+
+  void appendStringValue(zstring& buf) const;
+
+  zstring show() const;
+  
+protected:
+  // used in hash doing simple xor of the data
+  struct hash_functor
+  {
+    uint32_t hash_value;
+
+    void operator() (char c)
+    {
+      hash_value ^= (uint32_t) c;
+    }
+  };
+};
+
+
+/*******************************************************************************
+  class StreamableBase64BinaryItem
+********************************************************************************/
+class StreamableBase64BinaryItem : public Base64BinaryItem
+{
+  friend class BasicItemFactory;
+
+protected:
+  std::istream & theIstream;
+
+  bool theIsMaterialized;
+  bool theIsConsumed;
+  bool theIsSeekable;
+
+  StreamReleaser theStreamReleaser;
+
+protected:
+  StreamableBase64BinaryItem(
+      std::istream& aStream,
+      StreamReleaser streamReleaser,
+      bool seekable = false,
+      bool is_encoded = false)
+    : Base64BinaryItem(is_encoded),
+      theIstream(aStream),
+      theIsMaterialized(false),
+      theIsConsumed(false),
+      theIsSeekable(seekable),
+      theStreamReleaser(streamReleaser)
+  {}
+
+  void materialize() const;
+
+public:
+  virtual ~StreamableBase64BinaryItem()
+  {
+    if (theStreamReleaser) 
+    {
+      theStreamReleaser(&theIstream);
+    }
   }
 
+  bool isStreamable() const;
+
+  bool isSeekable() const;
+
+  std::istream& getStream();
+
+  StreamReleaser getStreamReleaser();
+
+  void setStreamReleaser(StreamReleaser aReleaser);
+
+  size_t getBase64BinaryValue(char*&) const;
+
+  store::SchemaTypeCode getTypeCode() const { return store::XS_BASE64BINARY; }
+
+  uint32_t hash(long timezone = 0, const XQPCollator* aCollation = 0) const;
+
   zstring getStringValue() const;
 
   void getStringValue2(zstring& val) const;

=== modified file 'src/store/naive/item.cpp'
--- src/store/naive/item.cpp	2012-02-15 10:25:02 +0000
+++ src/store/naive/item.cpp	2012-02-22 01:40:29 +0000
@@ -430,7 +430,7 @@
 /**
  * Accessor for xs:base64Binary
  */
-xs_base64Binary Item::getBase64BinaryValue() const
+size_t Item::getBase64BinaryValue(char*&) const
 {
   throw ZORBA_EXCEPTION(
     zerr::ZSTR0040_TYPE_ERROR,
@@ -441,6 +441,24 @@
   );
 }
 
+
+/**
+ * Checks whether a base64 item's content is already encoded
+ *
+ * @return true only if it is.
+ */
+bool Item::isEncoded() const
+{
+  throw ZORBA_EXCEPTION(
+    zerr::ZSTR0040_TYPE_ERROR,
+    ERROR_PARAMS(
+      ZED( OperationNotDef_23 ), "Item::isEncoded()",
+      getType()->getStringValue()
+    )
+  );
+}
+
+
 /**
  * Accessor for xs:boolean
  */

=== modified file 'src/store/naive/simple_item_factory.cpp'
--- src/store/naive/simple_item_factory.cpp	2012-02-15 10:25:02 +0000
+++ src/store/naive/simple_item_factory.cpp	2012-02-22 01:40:29 +0000
@@ -997,9 +997,36 @@
 }
 
 
-bool BasicItemFactory::createBase64Binary(store::Item_t& result, xs_base64Binary value)
-{
-  result = new Base64BinaryItem(value);
+bool BasicItemFactory::createBase64Binary(
+    store::Item_t& result,
+    xs_base64Binary value)
+{
+  const std::vector<char>& data = value.getData();
+  result = new Base64BinaryItem(&data[0], data.size(), true);
+  return true;
+}
+
+bool BasicItemFactory::createBase64Binary(
+    store::Item_t& result,
+    const char* value,
+    size_t size,
+    bool encoded)
+{
+  result = new Base64BinaryItem(value, size, encoded);
+  return true;
+}
+
+
+bool BasicItemFactory::createStreamableBase64Binary(
+    store::Item_t& result,
+    std::istream& aStream,
+    StreamReleaser aReleaser,
+    bool seekable,
+    bool encoded)
+{
+  result = new StreamableBase64BinaryItem(
+      aStream, aReleaser, seekable, encoded
+    );
   return true;
 }
 

=== modified file 'src/store/naive/simple_item_factory.h'
--- src/store/naive/simple_item_factory.h	2011-12-21 14:40:33 +0000
+++ src/store/naive/simple_item_factory.h	2012-02-22 01:40:29 +0000
@@ -107,6 +107,19 @@
 
   bool createBase64Binary(store::Item_t& result, xs_base64Binary value);
 
+  bool createBase64Binary(
+      store::Item_t& result,
+      const char* value,
+      size_t size,
+      bool encoded);
+
+  bool createStreamableBase64Binary(
+      store::Item_t& result,
+      std::istream&,
+      StreamReleaser,
+      bool seekable = false,
+      bool encoded = false);
+
   bool createBoolean(store::Item_t& result, xs_boolean value);
 
 

=== modified file 'src/types/casting.cpp'
--- src/types/casting.cpp	2012-01-30 15:23:21 +0000
+++ src/types/casting.cpp	2012-02-22 01:40:29 +0000
@@ -1267,7 +1267,18 @@
 
 T1_TO_T2(b64, hxB)
 {
-  return aFactory->createHexBinary(result, xs_hexBinary(aItem->getBase64BinaryValue()));
+  char* c;
+  size_t s = aItem->getBase64BinaryValue(c);
+  Base64 tmp;
+  if (aItem->isEncoded())
+  {
+    Base64::parseString(c, s, tmp);
+  }
+  else
+  {
+    Base64::encode((const unsigned char*)c, s, tmp);
+  }
+  return aFactory->createHexBinary(result, xs_hexBinary(tmp));
 }
 
 

=== modified file 'src/zorbaserialization/zorba_class_serializer.cpp'
--- src/zorbaserialization/zorba_class_serializer.cpp	2012-01-11 17:30:25 +0000
+++ src/zorbaserialization/zorba_class_serializer.cpp	2012-02-22 01:40:29 +0000
@@ -673,8 +673,31 @@
          
       else if(name_of_type == "base64Binary")
       {
-        SERIALIZE_REF_FIELD(xs_base64Binary, value, getBase64BinaryValue());
-        FINALIZE_SERIALIZE(createBase64Binary, (result, value_in));
+        if (ar.is_serializing_out())
+        {
+          char* c;
+          size_t s = obj->getBase64BinaryValue(c);
+          if (obj->isEncoded())
+          {
+            Base64 tmp;
+            Base64::parseString(c, s, tmp);
+            ar.dont_allow_delay();
+            ar & tmp;
+          }
+          else
+          {
+            Base64 tmp((const unsigned char*)c, s);
+            ar.dont_allow_delay();
+            ar & tmp;
+          }
+        }
+        else
+        {
+          ar.dont_allow_delay();
+          Base64 tmp;
+          ar & tmp;
+          FINALIZE_SERIALIZE(createBase64Binary, (result, tmp));
+        }
       }
       else if(name_of_type == "hexBinary")
       {

=== modified file 'src/zorbatypes/binary.cpp'
--- src/zorbatypes/binary.cpp	2011-06-14 17:26:33 +0000
+++ src/zorbatypes/binary.cpp	2012-02-22 01:40:29 +0000
@@ -306,6 +306,16 @@
 }
 
 
+Base64::Base64(const unsigned char *bin_data, size_t len)
+{
+  std::vector<char> tmp;
+  tmp.reserve(len);
+  tmp.insert(tmp.begin(), (const char*)bin_data, ((const char*)bin_data) + len);
+  theData.reserve(len);
+  encode(tmp, theData);
+}
+
+
 void Base64::serialize(::zorba::serialization::Archiver& ar)
 {
   ar & theData;

=== added directory 'test/rbkt/ExpQueryResults/zorba/base64'
=== added file 'test/rbkt/ExpQueryResults/zorba/base64/binary_1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/base64/binary_1.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/base64/binary_1.xml.res	2012-02-22 01:40:29 +0000
@@ -0,0 +1,1 @@
+true

=== added file 'test/rbkt/ExpQueryResults/zorba/base64/file_read_1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/base64/file_read_1.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/base64/file_read_1.xml.res	2012-02-22 01:40:29 +0000
@@ -0,0 +1,1 @@
+true true f0VMRgEBAQAAAAAAAAAAAAMAAwABAAAAIPxDADQAAAD0JTMHAAAAADQAIAAHACgAKAAlAAEAAAAAAAAAAAAAAAAAAAAv22gBL9toAQUAAAAAEAAAAQAAAJTbaAGU62gBlOtoAehnBwAwpgcABgAAAAAQAAACAAAAvKBuAbywbgG8sG4BGAEAABgBAAAGAAAABAAAAAQAAAAUAQAAFAEAABQBAAAkAAAAJAAAAAQAAAAEAAAAUOV0ZKyHJwGshycBrIcnAfyBCAD8gQgABAAAAAQAAABR5XRkAAAAAAAAAAAAAAAAAAAAAAAAAAAGAAAABAAAAFLldGSU22gBlOtoAZTraAFs5AUAbOQFAAQAAAABAAAABAAAABQAAAADAAAAR05VANMOFlHRDhyfmkw9H3+lYuAncwGhG0AAAAYDAAAAIAAAEgAAABBkRVIkEIgUIwhRukiBHLCOBAAEAgggoSCCg0CkED5YAAAGFAIgABkACJAAiQAwAFCQEwAQIABBAIIFYAIACAASAAZARACAABkTpAQB5EQIYEF0IBIAIgAhciYEoEICZGgAAQAABCAAkBACZAJAAQQUEhGAIIAAKBULIJYAAABRAKFV4AIWgQAEIYgAAIAAQECAMgBGiCIGiRAEkAABAMAIgEQAAAQAACgIJYAAAEAAAAGAAgtCpFwEAAZRQACAABAICAASiBBFgRhIAkEBMAApJchACQZDAIAAgABIAKQEgKEACGE05FgCAAAgACEAgEAAQoxkUAEAiBAGAAkCFiBoISRFFCBEBMV3gARBgBAosUJBQQAEEDIAUUCBAABAEAhgAEAAAAAAACApACIBBgHKkAWBQAAEAAAAAACDURggAkEEEIlEgIEADQZwAAATC5AQNYxCABABAAAAAAEAlQD0WGD3gAAAIEDKCCAQQMGNIAAQAAAACECCAADAbMAWABABaRAAUFQCgAAAgxJmDAEIAiIZCmIAIAIEgAESCSAjETEQIws2A8AIgAEAAAAAkGkAiEAKDEYEBRGJhFEAIACICAIBKMAGAAAHIAQBqIgAUAECgAAAAhoEER4BASMBBhEIIQGCAFbg8IiBYAgCAQgAgEBwCAAAIAEAQATGAj+QQaAIIQAIAAShAgg2AKQAAAQBkCoASAAAFAAgAYFqAQDCcoOEg0sAIhKAABQAAgpAAghAAIQgIBAkAAQBDA4gRAACAAIgAPBAUgBDARCAggB0GQBaEAg1Mk8CABEAAAQgBBgAECIAvEIhFCRAIBgAAEALyBEQAhACQoIKwAABkBiAVM4QAQlFgBAS4AASAIIkAZEkEgAsBMCABSCAKACAIBMAD4JSLEyAAGQBBqATAUECQUgACQGBGA==

=== added directory 'test/rbkt/Queries/zorba/base64'
=== added file 'test/rbkt/Queries/zorba/base64/binary_1.xq'
--- test/rbkt/Queries/zorba/base64/binary_1.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/base64/binary_1.xq	2012-02-22 01:40:29 +0000
@@ -0,0 +1,1 @@
+xs:string(xs:base64Binary("Wm9yYmEgaXMgR3JlYXQhIMOkw7bDvA==")) eq "Wm9yYmEgaXMgR3JlYXQhIMOkw7bDvA=="

=== added file 'test/rbkt/Queries/zorba/base64/decoded'
Binary files test/rbkt/Queries/zorba/base64/decoded	1970-01-01 00:00:00 +0000 and test/rbkt/Queries/zorba/base64/decoded	2012-02-22 01:40:29 +0000 differ
=== added file 'test/rbkt/Queries/zorba/base64/decoded-text'
--- test/rbkt/Queries/zorba/base64/decoded-text	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/base64/decoded-text	2012-02-22 01:40:29 +0000
@@ -0,0 +1,1 @@
+Zorba is Great! äöü
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/base64/encoded'
--- test/rbkt/Queries/zorba/base64/encoded	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/base64/encoded	2012-02-22 01:40:29 +0000
@@ -0,0 +1,1 @@
+f0VMRgEBAQAAAAAAAAAAAAMAAwABAAAAIPxDADQAAAD0JTMHAAAAADQAIAAHACgAKAAlAAEAAAAAAAAAAAAAAAAAAAAv22gBL9toAQUAAAAAEAAAAQAAAJTbaAGU62gBlOtoAehnBwAwpgcABgAAAAAQAAACAAAAvKBuAbywbgG8sG4BGAEAABgBAAAGAAAABAAAAAQAAAAUAQAAFAEAABQBAAAkAAAAJAAAAAQAAAAEAAAAUOV0ZKyHJwGshycBrIcnAfyBCAD8gQgABAAAAAQAAABR5XRkAAAAAAAAAAAAAAAAAAAAAAAAAAAGAAAABAAAAFLldGSU22gBlOtoAZTraAFs5AUAbOQFAAQAAAABAAAABAAAABQAAAADAAAAR05VANMOFlHRDhyfmkw9H3+lYuAncwGhG0AAAAYDAAAAIAAAEgAAABBkRVIkEIgUIwhRukiBHLCOBAAEAgggoSCCg0CkED5YAAAGFAIgABkACJAAiQAwAFCQEwAQIABBAIIFYAIACAASAAZARACAABkTpAQB5EQIYEF0IBIAIgAhciYEoEICZGgAAQAABCAAkBACZAJAAQQUEhGAIIAAKBULIJYAAABRAKFV4AIWgQAEIYgAAIAAQECAMgBGiCIGiRAEkAABAMAIgEQAAAQAACgIJYAAAEAAAAGAAgtCpFwEAAZRQACAABAICAASiBBFgRhIAkEBMAApJchACQZDAIAAgABIAKQEgKEACGE05FgCAAAgACEAgEAAQoxkUAEAiBAGAAkCFiBoISRFFCBEBMV3gARBgBAosUJBQQAEEDIAUUCBAABAEAhgAEAAAAAAACApACIBBgHKkAWBQAAEAAAAAACDURggAkEEEIlEgIEADQZwAAATC5AQNYxCABABAAAAAAEAlQD0WGD3gAAAIEDKCCAQQMGNIAAQAAAACECCAADAbMAWABABaRAAUFQCgAAAgxJmDAEIAiIZCmIAIAIEgAESCSAjETEQIws2A8AIgAEAAAAAkGkAiEAKDEYEBRGJhFEAIACICAIBKMAGAAAHIAQBqIgAUAECgAAAAhoEER4BASMBBhEIIQGCAFbg8IiBYAgCAQgAgEBwCAAAIAEAQATGAj+QQaAIIQAIAAShAgg2AKQAAAQBkCoASAAAFAAgAYFqAQDCcoOEg0sAIhKAABQAAgpAAghAAIQgIBAkAAQBDA4gRAACAAIgAPBAUgBDARCAggB0GQBaEAg1Mk8CABEAAAQgBBgAECIAvEIhFCRAIBgAAEALyBEQAhACQoIKwAABkBiAVM4QAQlFgBAS4AASAIIkAZEkEgAsBMCABSCAKACAIBMAD4JSLEyAAGQBBqATAUECQUgACQGBGA==
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/base64/encoded-text'
--- test/rbkt/Queries/zorba/base64/encoded-text	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/base64/encoded-text	2012-02-22 01:40:29 +0000
@@ -0,0 +1,1 @@
+Wm9yYmEgaXMgR3JlYXQhIMOkw7bDvA==
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/base64/file_read_1.xq'
--- test/rbkt/Queries/zorba/base64/file_read_1.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/base64/file_read_1.xq	2012-02-22 01:40:29 +0000
@@ -0,0 +1,10 @@
+import module namespace f = "http://expath.org/ns/file";;
+
+variable $enc-file-name := resolve-uri("encoded");
+variable $dec-file-name := resolve-uri("decoded");
+variable $base64-1 := f:read-binary($dec-file-name);
+variable $base64-2 := f:read-binary($dec-file-name);
+variable $ref-result := f:read-text($enc-file-name);
+
+xs:string($base64-1) eq $ref-result, $base64-1 eq $base64-2,
+fn:serialize($base64-1)

=== added file 'test/rbkt/Queries/zorba/base64/file_read_2.xq'
--- test/rbkt/Queries/zorba/base64/file_read_2.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/base64/file_read_2.xq	2012-02-22 01:40:29 +0000
@@ -0,0 +1,11 @@
+import module namespace f = "http://expath.org/ns/file";;
+import module namespace b = "http://www.zorba-xquery.com/modules/converters/base64";;
+
+variable $enc-file-name := resolve-uri("encoded-text");
+variable $dec-file-name := resolve-uri("decoded-text");
+variable $encoded := f:read-text($enc-file-name);
+variable $decoded := f:read-text($dec-file-name);
+
+$encoded eq xs:string(b:encode($decoded)),
+xs:base64Binary($encoded) eq b:encode($decoded),
+b:decode(xs:base64Binary($encoded)) eq $decoded


Follow ups