← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/feature-transcode_streambuf into lp:zorba

 

Paul J. Lucas has proposed merging lp:~zorba-coders/zorba/feature-transcode_streambuf into lp:zorba.

Requested reviews:
  Paul J. Lucas (paul-lucas)
  Matthias Brantner (matthias-brantner)
Related bugs:
  Bug #867159 in Zorba: "fn:match fails if the string is non-utf8"
  https://bugs.launchpad.net/zorba/+bug/867159

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/feature-transcode_streambuf/+merge/91980

Added transcode_streambuf.
-- 
https://code.launchpad.net/~zorba-coders/zorba/feature-transcode_streambuf/+merge/91980
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'ChangeLog'
--- ChangeLog	2012-02-07 14:26:42 +0000
+++ ChangeLog	2012-02-08 06:02:21 +0000
@@ -28,6 +28,9 @@
   * zerr is not predeclared anymore to be http://www.zorba-xquery.com/errors
   * Add new XQuery interface for the PHP bindings.
   * Added API method Item::getNamespaceBindings().
+  * Added a transcoding streambuffer to the API which allows transcoding arbitrary encodings
+    from and to UTF-8
+  * file:read-text is able to handle arbitrary encodings (fixes bug #867159)
   * Fixed bug #917981 (disallow declaring same module twice).
   * Added API method StaticContext::getNamespaceBindings() (see bug #905035)
   * Deprecated StaticContext:getNamespaceURIByPrefix()

=== added file 'include/zorba/internal/proxy.h'
--- include/zorba/internal/proxy.h	1970-01-01 00:00:00 +0000
+++ include/zorba/internal/proxy.h	2012-02-08 06:02:21 +0000
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ZORBA_INTERNAL_PROXY_H
+#define ZORBA_INTERNAL_PROXY_H
+
+namespace zorba {
+namespace internal {
+namespace ztd {
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * \internal
+ * A %proxy<T> is-a \c T that also contains a T* -- a pointer to the original.
+ */
+template<class OriginalType>
+class proxy : public OriginalType {
+public:
+  proxy( OriginalType *p ) : original_( p ) { }
+
+  OriginalType* original() const {
+    return original_;
+  }
+private:
+  OriginalType *original_;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace ztd
+} // namespace internal
+} // namespace zorba
+#endif  /* ZORBA_INTERNAL_PROXY_H */
+/* vim:set et sw=2 ts=2: */

=== modified file 'include/zorba/pregenerated/diagnostic_list.h'
--- include/zorba/pregenerated/diagnostic_list.h	2011-12-21 14:40:33 +0000
+++ include/zorba/pregenerated/diagnostic_list.h	2012-02-08 06:02:21 +0000
@@ -684,6 +684,8 @@
 
 extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZOSE0005_DLL_LOAD_FAILED;
 
+extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZOSE0006_TRANSCODING_ERROR;
+
 extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZSTR0001_INDEX_ALREADY_EXISTS;
 
 extern ZORBA_DLL_PUBLIC ZorbaErrorCode ZSTR0002_INDEX_DOES_NOT_EXIST;

=== added file 'include/zorba/transcode_stream.h'
--- include/zorba/transcode_stream.h	1970-01-01 00:00:00 +0000
+++ include/zorba/transcode_stream.h	2012-02-08 06:02:21 +0000
@@ -0,0 +1,213 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ZORBA_TRANSCODE_STREAM_API_H
+#define ZORBA_TRANSCODE_STREAM_API_H
+
+#include <stdexcept>
+#include <streambuf>
+#include <string>
+
+#include <zorba/config.h>
+#include <zorba/internal/proxy.h>
+#include <zorba/internal/unique_ptr.h>
+
+namespace zorba {
+
+typedef internal::ztd::proxy<std::streambuf> proxy_streambuf;
+
+namespace transcode {
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * A %transcode::streambuf is-a std::streambuf for transcoding character
+ * encodings from/to UTF-8 on-the-fly.
+ *
+ * To use it, replace a stream's streambuf:
+ * \code
+ *  istream is;
+ *  // ...
+ *  transcode::streambuf tbuf( "ISO-8859-1", is.rdbuf() );
+ *  is.ios::rdbuf( &tbuf );
+ * \endcode
+ * Note that the %transcode::streambuf must exist for as long as it's being used
+ * by the stream.  If you are replacing the streabuf for a stream you did not
+ * create, you should set it back to the original streambuf:
+ * \code
+ *  void f( ostream &os ) {
+ *    transcode::streambuf tbuf( "ISO-8859-1", os.rdbuf() );
+ *    try {
+ *      os.ios::rdbuf( &tbuf );
+ *      // ...
+ *    }
+ *    catch ( ... ) {
+ *      os.ios::rdbuf( tbuf.orig_streambuf() );
+ *      throw;
+ *    }
+ *  }
+ * \endcode
+ *
+ * While %transcode::streambuf does support seeking, the positions are relative
+ * to the original byte stream.
+ */
+class ZORBA_DLL_PUBLIC streambuf : public std::streambuf {
+public:
+  /**
+   * Constructs a %transcode::streambuf.
+   *
+   * @param charset The name of the character encoding to convert from/to.
+   * @param orig The original streambuf to read/write from/to.
+   * @throws std::invalid_argument if either \a charset is not supported or
+   * \a orig is null.
+   */
+  streambuf( char const *charset, std::streambuf *orig );
+
+  /**
+   * Destructs a %transcode::streambuf.
+   */
+  ~streambuf();
+
+  /**
+   * Gets the original streambuf.
+   *
+   * @return said streambuf.
+   */
+  std::streambuf* orig_streambuf() const {
+    return proxy_buf_->original();
+  }
+
+protected:
+  void imbue( std::locale const& );
+  pos_type seekoff( off_type, std::ios_base::seekdir, std::ios_base::openmode );
+  pos_type seekpos( pos_type, std::ios_base::openmode );
+  std::streambuf* setbuf( char_type*, std::streamsize );
+  std::streamsize showmanyc();
+  int sync();
+  int_type overflow( int_type );
+  int_type pbackfail( int_type );
+  int_type uflow();
+  int_type underflow();
+  std::streamsize xsgetn( char_type*, std::streamsize );
+  std::streamsize xsputn( char_type const*, std::streamsize );
+
+private:
+  std::unique_ptr<proxy_streambuf> proxy_buf_;
+
+  // forbid
+  streambuf( streambuf const& );
+  streambuf& operator=( streambuf const& );
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * A %transcode::stream is used to wrap a C++ standard I/O stream with a
+ * transcode::streambuf so that transcoding and the management of the streambuf
+ * happens automatically.
+ *
+ * @tparam StreamType The I/O stream class type to wrap. It must be a concrete
+ * stream class.
+ */
+template<class StreamType>
+class stream : public StreamType {
+public:
+  /**
+   * Constructs a %transcode::stream.
+   *
+   * @param charset The name of the character encoding to convert from/to.
+   * @throws std::invalid_argument if \a charset is not supported.
+   */
+  stream( char const *charset ) :
+    tbuf_( charset, this->rdbuf() )
+  {
+    init();
+  }
+
+  /**
+   * Constructs a %stream.
+   *
+   * @tparam StreamArgType The type of the first argument of \a StreamType's
+   * constructor.
+   * @param charset The name of the character encoding to convert from/to.
+   * @param stream_arg The argument to pass as the first argument to
+   * \a StreamType's constructor.
+   * @throws std::invalid_argument if \a charset is not supported.
+   */
+  template<typename StreamArgType>
+  stream( char const *charset, StreamArgType stream_arg ) :
+    StreamType( stream_arg ),
+    tbuf_( charset, this->rdbuf() )
+  {
+    init();
+  }
+
+  /**
+   * Constructs a %transcode::stream.
+   *
+   * @tparam StreamArgType The type of the first argument of \a StreamType's
+   * constructor.
+   * @param charset The name of the character encoding to convert from/to.
+   * @param stream_arg The argument to pass as the first argument to
+   * \a StreamType's constructor.
+   * @param mode The open-mode to pass to \a StreamType's constructor.
+   * @throws std::invalid_argument if \a charset is not supported.
+   */
+  template<typename StreamArgType>
+  stream( char const *charset, StreamArgType stream_arg,
+          std::ios_base::openmode mode ) :
+    StreamType( stream_arg, mode ),
+    tbuf_( charset, this->rdbuf() )
+  {
+    init();
+  }
+
+private:
+  streambuf tbuf_;
+
+  void init() {
+    this->std::ios::rdbuf( &tbuf_ );
+  }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Checks whether it would be necessary to transcode from the given character
+ * encoding to UTF-8.
+ *
+ * @param charset The name of the character encoding to check.
+ * @return \c true only if it would be necessary to transcode from the given
+ * character encoding to UTF-8.
+ */
+ZORBA_DLL_PUBLIC
+bool is_necessary( char const *charset );
+
+/**
+ * Checks whether the given character set is supported for transcoding.
+ *
+ * @param charset The name of the character encoding to check.
+ * @return \c true only if the character encoding is supported.
+ */
+ZORBA_DLL_PUBLIC
+bool is_supported( char const *charset );
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace transcode
+} // namespace zorba
+#endif  /* ZORBA_TRANSCODE_STREAM_API_H */
+/* vim:set et sw=2 ts=2: */

=== modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.cpp'
--- modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.cpp	2011-07-29 08:12:36 +0000
+++ modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.cpp	2012-02-08 06:02:21 +0000
@@ -21,6 +21,7 @@
 #include <iostream>
 #include <cassert>
 #ifndef WIN32
+#include <cerrno>
 #include <sys/time.h>
 #endif /* WIN32 */
 
@@ -32,349 +33,347 @@
 using namespace std;
 
 namespace zorba {
-  namespace curl {
-    
-    ///////////////////////////////////////////////////////////////////////////////
-    
+namespace curl {
+  
+///////////////////////////////////////////////////////////////////////////////
+  
 #define ZORBA_CURL_ASSERT(expr)                         \
-do {                                                  \
-if ( CURLcode const code##__LINE__ = (expr) )       \
-throw exception( #expr, "", code##__LINE__ );     \
-} while (0)
-    
+  do {                                                  \
+    if ( CURLcode const code##__LINE__ = (expr) )       \
+      throw exception( #expr, "", code##__LINE__ );     \
+  } while (0)
+
 #define ZORBA_CURLM_ASSERT(expr)                        \
-do {                                                  \
-if ( CURLMcode const code##__LINE__ = (expr) )      \
-if ( code##__LINE__ != CURLM_CALL_MULTI_PERFORM ) \
-throw exception( #expr, "", code##__LINE__ );   \
-} while (0)
-
-    exception::exception( char const *function, char const *uri, char const *msg ) :
-    std::exception(), theMessage(msg)
-    {
-    }
-    
-    exception::exception( char const *function, char const *uri, CURLcode code ) :
-    std::exception(), theMessage(curl_easy_strerror(code))
-    {
-    }
-    
-    exception::exception( char const *function, char const *uri, CURLMcode code ) :
-    std::exception(), theMessage(curl_multi_strerror(code))
-    {
-    }
-    
-    const char* exception::what() const throw() {
-      return theMessage;
-    }
-
-    
-    ///////////////////////////////////////////////////////////////////////////////
-    
-    CURL* create( char const *uri, write_fn_t fn, void *data ) {
-      //
-      // Having cURL initialization wrapped by a class and using a singleton static
-      // instance guarantees that cURL is initialized exactly once before use and
-      // and also is cleaned-up at program termination (when destructors for static
-      // objects are called).
-      //
-      struct curl_initializer {
-        curl_initializer() {
-          ZORBA_CURL_ASSERT( curl_global_init( CURL_GLOBAL_ALL ) );
-        }
-        ~curl_initializer() {
-          curl_global_cleanup();
-        }
-      };
-      static curl_initializer initializer;
-      
-      CURL *const curl = curl_easy_init();
-      if ( !curl )
-        throw exception( "curl_easy_init()", uri, "" );
-      
-      try {
-        ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_URL, uri ) );
-        ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEDATA, data ) );
-        ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, fn ) );
-        
-        // Tells cURL to follow redirects. CURLOPT_MAXREDIRS is by default set to -1
-        // thus cURL will do an infinite number of redirects.
-        ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_FOLLOWLOCATION, 1 ) );
-        
+  do {                                                  \
+    if ( CURLMcode const code##__LINE__ = (expr) )      \
+      if ( code##__LINE__ != CURLM_CALL_MULTI_PERFORM ) \
+        throw exception( #expr, "", code##__LINE__ );   \
+  } while (0)
+
+exception::exception( char const *function, char const *uri, char const *msg ) :
+  std::exception(), msg_( msg )
+{
+}
+
+exception::exception( char const *function, char const *uri, CURLcode code ) :
+  std::exception(),
+  msg_( curl_easy_strerror( code ) )
+{
+}
+
+exception::exception( char const *function, char const *uri, CURLMcode code ) :
+  std::exception(),
+  msg_( curl_multi_strerror( code ) )
+{
+}
+
+exception::~exception() throw() {
+  // out-of-line since it's virtual
+}
+
+const char* exception::what() const throw() {
+  return msg_.c_str();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+CURL* create( char const *uri, write_fn_t fn, void *data ) {
+  //
+  // Having cURL initialization wrapped by a class and using a singleton static
+  // instance guarantees that cURL is initialized exactly once before use and
+  // and also is cleaned-up at program termination (when destructors for static
+  // objects are called).
+  //
+  struct curl_initializer {
+    curl_initializer() {
+      ZORBA_CURL_ASSERT( curl_global_init( CURL_GLOBAL_ALL ) );
+    }
+    ~curl_initializer() {
+      curl_global_cleanup();
+    }
+  };
+  static curl_initializer initializer;
+  
+  CURL *const curl = curl_easy_init();
+  if ( !curl )
+    throw exception( "curl_easy_init()", uri, "" );
+  
+  try {
+    ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_URL, uri ) );
+    ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEDATA, data ) );
+    ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, fn ) );
+    
+    // Tells cURL to follow redirects. CURLOPT_MAXREDIRS is by default set to -1
+    // thus cURL will do an infinite number of redirects.
+    ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_FOLLOWLOCATION, 1 ) );
+    
 #ifndef ZORBA_VERIFY_PEER_SSL_CERTIFICATE
-        ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_SSL_VERIFYPEER, 0 ) );
-        //
-        // CURLOPT_SSL_VERIFYHOST is left default, value 2, meaning verify that the
-        // Common Name or Subject Alternate Name field in the certificate matches
-        // the name of the server.
-        //
-        // Tested with https://www.npr.org/rss/rss.php?id=1001
-        // About using SSL certs in curl: http://curl.haxx.se/docs/sslcerts.html
+    ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_SSL_VERIFYPEER, 0 ) );
+    //
+    // CURLOPT_SSL_VERIFYHOST is left default, value 2, meaning verify that the
+    // Common Name or Subject Alternate Name field in the certificate matches
+    // the name of the server.
+    //
+    // Tested with https://www.npr.org/rss/rss.php?id=1001
+    // About using SSL certs in curl: http://curl.haxx.se/docs/sslcerts.html
 #else
 # ifdef WIN32
-        // set the root CA certificates file path
-        if ( GENV.g_curl_root_CA_certificates_path[0] )
-          ZORBA_CURL_ASSERT(
-                            curl_easy_setopt(
-                                             curl, CURLOPT_CAINFO, GENV.g_curl_root_CA_certificates_path
-                                             )
-                            );
+    // set the root CA certificates file path
+    if ( GENV.g_curl_root_CA_certificates_path[0] )
+      ZORBA_CURL_ASSERT(
+        curl_easy_setopt(
+          curl, CURLOPT_CAINFO, GENV.g_curl_root_CA_certificates_path
+        )
+      );
 # endif /* WIN32 */
 #endif /* ZORBA_VERIFY_PEER_SSL_CERTIFICATE */
-        
-        //
-        // Some servers don't like requests that are made without a user-agent
-        // field, so we provide one.
-        //
-        ZORBA_CURL_ASSERT(
-                          curl_easy_setopt( curl, CURLOPT_USERAGENT, "libcurl-agent/1.0" )
-                          );
-        
-        return curl;
-      }
-      catch ( ... ) {
-        destroy( curl );
-        throw;
-      }
-    }
-    
-    void destroy( CURL *curl ) {
-      if ( curl ) {
-        curl_easy_reset( curl );
-        curl_easy_cleanup( curl );
-      }
-    }
-    
-    ///////////////////////////////////////////////////////////////////////////////
-    
-    streambuf::streambuf() : theInformer(0), theOwnInformer(false) {
-#ifdef WIN32
-      theDummySocket = socket(AF_INET, SOCK_DGRAM, 0);
-      if (theDummySocket == CURL_SOCKET_BAD || theDummySocket == INVALID_SOCKET) {
-        std::cerr << "creating the socket failed" << std::endl;
-      }
-#endif
-      init();
-    }
-    
-    streambuf::streambuf( char const *uri ) : theInformer(0), theOwnInformer(false) {
-#ifdef WIN32
-      theDummySocket = socket(AF_INET, SOCK_DGRAM, 0);
-      if (theDummySocket == CURL_SOCKET_BAD || theDummySocket == INVALID_SOCKET) {
-        std::cerr << "creating the socket failed" << std::endl;
-      }
-#endif
-      init();
-      open( uri );
-    }
-    
-    int streambuf::multi_perform() {
-      underflow();
-      CURLMsg* msg;
-      int msgInQueue;
-      int error = 0;
-      while ((msg = curl_multi_info_read(curlm_, &msgInQueue))) {
-        if (msg->msg == CURLMSG_DONE) {
-          error = msg->data.result;
-        }
-      }
-      return error;
-    }
-    
-    streambuf::streambuf( CURL* aCurl) : theInformer(0), theOwnInformer(false) {
-#ifdef WIN32
-      theDummySocket = socket(AF_INET, SOCK_DGRAM, 0);
-      if (theDummySocket == CURL_SOCKET_BAD || theDummySocket == INVALID_SOCKET) {
-        std::cerr << "creating the socket failed" << std::endl;
-      }
-#endif
-      init();
-      curl_ = aCurl;
-      ZORBA_CURL_ASSERT( curl_easy_setopt( aCurl, CURLOPT_WRITEDATA, this ) );
-      ZORBA_CURL_ASSERT( curl_easy_setopt( aCurl, CURLOPT_WRITEFUNCTION, curl_write_callback ) );
-      
-      init_curlm();
-    }
-    
-    streambuf::~streambuf() {
-      free( buf_ );
-      close();
-#ifdef WIN32
-      closesocket(theDummySocket);
-#endif
-      // If we have been assigned memory ownership of theInformer, delete it now.
-      if (theOwnInformer)
-        delete theInformer;
-    }
-    
-    void streambuf::close() {
-      if ( curl_ ) {
-        if ( curlm_ ) {
-          curl_multi_remove_handle( curlm_, curl_ );
-          curl_multi_cleanup( curlm_ );
-          curlm_ = 0;
-        }
-        destroy( curl_ );
-        curl_ = 0;
-      }
-    }
-    
-    void streambuf::curl_read() {
-      buf_len_ = 0;
-      while ( curl_running_ && !buf_len_ ) {
-        fd_set fd_read, fd_write, fd_except;
-        FD_ZERO( &fd_read );
-        FD_ZERO( &fd_write );
-        FD_ZERO( &fd_except );
-        int max_fd = -1;
-#ifdef WIN32
-        // Windows does not like a call to select where all arguments are 0. So
-        // we just add a dummy socket to make the call to select happy.
-        FD_SET (theDummySocket, &fd_read);
-#endif
-        ZORBA_CURLM_ASSERT(
-                           curl_multi_fdset( curlm_, &fd_read, &fd_write, &fd_except, &max_fd )
-                           );
-        
-        //
-        // Note that the fopen.c sample code is unnecessary at best or wrong at
-        // worst; see: http://curl.haxx.se/mail/lib-2011-05/0011.html
-        //
-        timeval timeout;
-        long curl_timeout_ms;
-        ZORBA_CURLM_ASSERT( curl_multi_timeout( curlm_, &curl_timeout_ms ) );
-        if ( curl_timeout_ms > 0 ) {
-          timeout.tv_sec  = curl_timeout_ms / 1000;
-          timeout.tv_usec = curl_timeout_ms % 1000 * 1000;
-        } else {
-          //
-          // From curl_multi_timeout(3):
-          //
-          //    Note: if libcurl returns a -1 timeout here, it just means that
-          //    libcurl currently has no stored timeout value. You must not wait
-          //    too long (more than a few seconds perhaps) before you call
-          //    curl_multi_perform() again.
-          //
-          // So we just pick some not-too-long default.
-          //
-          timeout.tv_sec  = 1;
-          timeout.tv_usec = 0;
-        }
-        
-        switch ( select( max_fd + 1, &fd_read, &fd_write, &fd_except, &timeout ) ) {
-          case -1:                          // select error
-#ifdef WIN32
-            std::cout << "Error = " << WSAGetLastError() << std::endl;
-#endif
-            throw exception( "select()", "" );
-          case 0:                           // timeout
-            // no break;
-          default:
-            CURLMcode code;
-            do {
-              code = curl_multi_perform( curlm_, &curl_running_ );
-            } while ( code == CURLM_CALL_MULTI_PERFORM );
-            ZORBA_CURLM_ASSERT( code );
-        }
-      }
-      if (theInformer) {
-        theInformer->afterRead();
-      }
-    }
-    
-    size_t streambuf::curl_write_callback( void *ptr, size_t size, size_t nmemb,
-                                          void *data ) {
-      size *= nmemb;
-      streambuf *const that = static_cast<streambuf*>( data );
-      
-      std::streamoff buf_free = that->buf_capacity_ - that->buf_len_;
-      if (that->theInformer) {
-        that->theInformer->beforeRead();
-      }
-      if ( size > buf_free ) {
-        std::streamoff new_capacity = that->buf_capacity_ + size - buf_free;
-        if ( void *const new_buf = realloc( that->buf_, static_cast<size_t>(new_capacity) ) ) {
-          that->buf_ = static_cast<char*>( new_buf );
-          that->buf_capacity_ = new_capacity;
-        } else
-          throw exception( "realloc()", "" );
-      }
-      ::memcpy( that->buf_ + that->buf_len_, ptr, size );
-      that->buf_len_ += size;
-      return size;
-    }
-    
-    void streambuf::init() {
-      buf_ = 0;
-      buf_capacity_ = 0;
-      buf_len_ = 0;
-      curl_ = 0;
-      curlm_ = 0;
-      curl_running_ = 0;
-    }
-    
-    void streambuf::init_curlm() {
-      //
-      // Lie about cURL running initially so the while-loop in curl_read() will run
-      // at least once.
-      //
-      curl_running_ = 1;
-      
-      //
-      // Set the "get" pointer to the end (gptr() == egptr()) so a call to
-      // underflow() and initial data read will be triggered.
-      //
-      buf_len_ = buf_capacity_;
-      setg( buf_, buf_ + buf_len_, buf_ + buf_capacity_ );
-      
-      //
-      // Clean-up has to be done here with try/catch (as opposed to relying on the
-      // destructor) because open() can be called from the constructor.  If an
-      // exception is thrown, the constructor will not have completed, hence the
-      // object will not have been fully constructed; therefore the destructor will
-      // not be called.
-      //
-      try {
-        if ( !(curlm_ = curl_multi_init()) )
-          throw exception( "curl_multi_init()", "" );
-        try {
-          ZORBA_CURLM_ASSERT( curl_multi_add_handle( curlm_, curl_ ) );
-        }
-        catch ( ... ) {
-          curl_multi_cleanup( curlm_ );
-          curlm_ = 0;
-          throw;
-        }
-      }
-      catch ( ... ) {
-        destroy( curl_ );
-        curl_ = 0;
-        throw;
-      }
-    }
-    
-    void streambuf::open( char const *uri ) {
-      curl_ = create( uri, curl_write_callback, this );
-      
-      init_curlm();
-    }
-    
-    streamsize streambuf::showmanyc() {
-      return egptr() - gptr();
-    }
-    
-    streambuf::int_type streambuf::underflow() {
-      while ( true ) {
-        if ( gptr() < egptr() )
-          return traits_type::to_int_type( *gptr() );
-        curl_read();
-        if ( !buf_len_ )
-          return traits_type::eof();
-        setg( buf_, buf_, buf_ + buf_len_ );
-      }
-    }
-    
-    ///////////////////////////////////////////////////////////////////////////////
-    
-  } // namespace curl
+    
+    //
+    // Some servers don't like requests that are made without a user-agent
+    // field, so we provide one.
+    //
+    ZORBA_CURL_ASSERT(
+      curl_easy_setopt( curl, CURLOPT_USERAGENT, "libcurl-agent/1.0" )
+    );
+    
+    return curl;
+  }
+  catch ( ... ) {
+    destroy( curl );
+    throw;
+  }
+}
+
+void destroy( CURL *curl ) {
+  if ( curl ) {
+    curl_easy_reset( curl );
+    curl_easy_cleanup( curl );
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+streambuf::streambuf() {
+  init();
+}
+
+streambuf::streambuf( char const *uri ) {
+  init();
+  open( uri );
+}
+
+streambuf::streambuf( CURL *curl ) {
+  init();
+  curl_ = curl;
+  ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEDATA, this ) );
+  ZORBA_CURL_ASSERT( curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, curl_write_callback ) );
+  init_curlm();
+}
+
+streambuf::~streambuf() {
+  free( buf_ );
+  close();
+#ifdef WIN32
+  closesocket( dummy_socket_ );
+#endif
+  // If we have been assigned memory ownership of theInformer, delete it now.
+  if ( theOwnInformer )
+    delete theInformer;
+}
+
+void streambuf::close() {
+  if ( curl_ ) {
+    if ( curlm_ ) {
+      curl_multi_remove_handle( curlm_, curl_ );
+      curl_multi_cleanup( curlm_ );
+      curlm_ = 0;
+    }
+    destroy( curl_ );
+    curl_ = 0;
+  }
+}
+
+void streambuf::curl_read() {
+  buf_len_ = 0;
+  while ( curl_running_ && !buf_len_ ) {
+    fd_set fd_read, fd_write, fd_except;
+    FD_ZERO( &fd_read );
+    FD_ZERO( &fd_write );
+    FD_ZERO( &fd_except );
+    int max_fd = -1;
+#ifdef WIN32
+    //
+    // Windows does not like a call to select where all arguments are 0, so we
+    // just add a dummy socket to make the call to select happy.
+    //
+    FD_SET( dummy_socket_, &fd_read );
+#endif /* WIN32 */
+    ZORBA_CURLM_ASSERT(
+      curl_multi_fdset( curlm_, &fd_read, &fd_write, &fd_except, &max_fd )
+    );
+    
+    //
+    // Note that the fopen.c sample code is unnecessary at best or wrong at
+    // worst; see: http://curl.haxx.se/mail/lib-2011-05/0011.html
+    //
+    timeval timeout;
+    long curl_timeout_ms;
+    ZORBA_CURLM_ASSERT( curl_multi_timeout( curlm_, &curl_timeout_ms ) );
+    if ( curl_timeout_ms > 0 ) {
+      timeout.tv_sec  = curl_timeout_ms / 1000;
+      timeout.tv_usec = curl_timeout_ms % 1000 * 1000;
+    } else {
+      //
+      // From curl_multi_timeout(3):
+      //
+      //    Note: if libcurl returns a -1 timeout here, it just means that
+      //    libcurl currently has no stored timeout value. You must not wait
+      //    too long (more than a few seconds perhaps) before you call
+      //    curl_multi_perform() again.
+      //
+      // So we just pick some not-too-long default.
+      //
+      timeout.tv_sec  = 1;
+      timeout.tv_usec = 0;
+    }
+    
+    switch ( select( max_fd + 1, &fd_read, &fd_write, &fd_except, &timeout ) ) {
+      case -1:                          // select error
+#ifdef WIN32
+        char err_buf[8];
+        sprintf( err_buf, "%d", WSAGetLastError() );
+        throw exception( "select()", "", err_buf );
+#else
+        throw exception( "select()", "", strerror( errno ) );
+#endif
+      case 0:                           // timeout
+        // no break;
+      default:
+        CURLMcode code;
+        do {
+          code = curl_multi_perform( curlm_, &curl_running_ );
+        } while ( code == CURLM_CALL_MULTI_PERFORM );
+        ZORBA_CURLM_ASSERT( code );
+    }
+  }
+  if ( theInformer )
+    theInformer->afterRead();
+}
+
+size_t streambuf::curl_write_callback( void *ptr, size_t size, size_t nmemb,
+                                       void *data ) {
+  size *= nmemb;
+  streambuf *const that = static_cast<streambuf*>( data );
+  
+  if ( that->theInformer )
+    that->theInformer->beforeRead();
+
+  size_t const buf_free = that->buf_capacity_ - that->buf_len_;
+  if ( size > buf_free ) {
+    streamoff new_capacity = that->buf_capacity_ + size - buf_free;
+    if ( void *const new_buf =
+          realloc( that->buf_, static_cast<size_t>( new_capacity ) ) ) {
+      that->buf_ = static_cast<char*>( new_buf );
+      that->buf_capacity_ = new_capacity;
+    } else
+      throw exception( "realloc()", "" );
+  }
+  ::memcpy( that->buf_ + that->buf_len_, ptr, size );
+  that->buf_len_ += size;
+  return size;
+}
+
+void streambuf::init() {
+  buf_ = 0;
+  buf_capacity_ = 0;
+  buf_len_ = 0;
+  curl_ = 0;
+  curlm_ = 0;
+  curl_running_ = 0;
+  theInformer = 0;
+  theOwnInformer = false;
+#ifdef WIN32
+  dummy_socket_ = socket( AF_INET, SOCK_DGRAM, 0 );
+  if ( dummy_socket_ == CURL_SOCKET_BAD || dummy_socket_ == INVALID_SOCKET )
+    throw exception( "socket()", "" );
+#endif /* WIN32 */
+}
+
+void streambuf::init_curlm() {
+  //
+  // Lie about cURL running initially so the while-loop in curl_read() will run
+  // at least once.
+  //
+  curl_running_ = 1;
+  
+  //
+  // Set the "get" pointer to the end (gptr() == egptr()) so a call to
+  // underflow() and initial data read will be triggered.
+  //
+  buf_len_ = buf_capacity_;
+  setg( buf_, buf_ + buf_len_, buf_ + buf_capacity_ );
+  
+  //
+  // Clean-up has to be done here with try/catch (as opposed to relying on the
+  // destructor) because open() can be called from the constructor.  If an
+  // exception is thrown, the constructor will not have completed, hence the
+  // object will not have been fully constructed; therefore the destructor will
+  // not be called.
+  //
+  try {
+    if ( !(curlm_ = curl_multi_init()) )
+      throw exception( "curl_multi_init()", "" );
+    try {
+      ZORBA_CURLM_ASSERT( curl_multi_add_handle( curlm_, curl_ ) );
+    }
+    catch ( ... ) {
+      curl_multi_cleanup( curlm_ );
+      curlm_ = 0;
+      throw;
+    }
+  }
+  catch ( ... ) {
+    destroy( curl_ );
+    curl_ = 0;
+    throw;
+  }
+}
+
+int streambuf::multi_perform() {
+  underflow();
+  CURLMsg *msg;
+  int msgInQueue;
+  int error = 0;
+  while ( (msg = curl_multi_info_read( curlm_, &msgInQueue )) ) {
+    if ( msg->msg == CURLMSG_DONE )
+      error = msg->data.result;
+  }
+  return error;
+}
+
+void streambuf::open( char const *uri ) {
+  curl_ = create( uri, curl_write_callback, this );
+  
+  init_curlm();
+}
+
+streamsize streambuf::showmanyc() {
+  return egptr() - gptr();
+}
+
+streambuf::int_type streambuf::underflow() {
+  while ( true ) {
+    if ( gptr() < egptr() )
+      return traits_type::to_int_type( *gptr() );
+    curl_read();
+    if ( !buf_len_ )
+      return traits_type::eof();
+    setg( buf_, buf_, buf_ + buf_len_ );
+  }
+}
+  
+///////////////////////////////////////////////////////////////////////////////
+  
+} // namespace curl
 } // namespace zorba
+/* vim:set et sw=2 ts=2: */

=== modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.h'
--- modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.h	2011-07-29 08:12:36 +0000
+++ modules/com/zorba-xquery/www/modules/http-client.xq.src/curl_stream_buffer.h	2012-02-08 06:02:21 +0000
@@ -19,154 +19,175 @@
 
 #include <zorba/config.h>
 
+#include <exception>
 #include <istream>
-#include <exception>
 #include <streambuf>
+#include <string>
 #include <curl/curl.h>
 
 namespace zorba {
-  
-  namespace http_client {
-    class InformDataRead;
-  }
-  
-  namespace curl {
-    
-    class exception : public std::exception {
-    public:
-      exception( char const *function, char const *uri, char const *msg = 0 );
-      exception( char const *function, char const *uri, CURLcode code );
-      exception( char const *function, char const *uri, CURLMcode code );
-    public:
-      virtual const char* what() const throw();
-    private:
-      const char* theMessage;
-    };
-    
-    
-    
-    ////////// create & destroy ///////////////////////////////////////////////////
-    
-    /**
-     * The signature type of cURL's write function callback.
-     */
-    typedef size_t (*write_fn_t)( void*, size_t, size_t, void* );
-    
-    /**
-     * Creates a new, initialized cURL instance.
-     *
-     * @throws exception upon failure.
-     */
-    CURL* create( char const *uri, write_fn_t fn, void *data );
-    
-    /**
-     * Destroys a cURL instance.
-     *
-     * @param instance A cURL instance.  If \c NULL, does nothing.
-     */
-    void destroy( CURL *instance );
-    
-    ////////// streambuf //////////////////////////////////////////////////////////
-    
-    /**
-     * A curl::streambuf is-a std::streambuf for streaming the contents of URI
-     * using cURL.  However, do not use this class directly.  Use uri::streambuf
-     * instead.
-     */
-    class streambuf : public std::streambuf {
-    public:
-      /**
-       * Constructs a %streambuf.
-       */
-      streambuf();
-      
-      /**
-       * Constructs a %streambuf and opens a connection to the server hosting the
-       * given URI for subsequent streaming.
-       *
-       * @param uri The URI to stream.
-       */
-      streambuf( char const *uri );
-      
-      /**
-       * In case we already have a curl object, which was set up somewhere else, we
-       * take it here as an arument. This takes ownership over the object.
-       */
-      streambuf( CURL* aCurl );
-      
-      /**
-       * Destroys a %streambuf.
-       */
-      ~streambuf();
-      
-      /**
-       * Opens a connection to the server hosting the given URI for subsequent
-       * streaming.
-       *
-       * @param uri The URI to stream.
-       * @throws exception upon failure.
-       */
-      void open( char const *uri );
-      
-      /**
-       * Tests whether the buffer is open.
-       *
-       * @return Returns \c true only if the buffer is open.
-       */
-      bool is_open() const {
-        return !!curl_;
-      }
-      
-      /**
-       * Closes this %streambuf.
-       */
-      void close();
-      
-      /**
-       * Provide a InformDataRead that will get callbacks about read events.
-       */
-      void setInformer(::zorba::http_client::InformDataRead* aInformer) { theInformer = aInformer; }
-
-      /**
-       * Specify whether this streambuf has memory ownership over the
-       * InformDataRead it has been passed. You can use this if, for example,
-       * the lifetime of the streambuf will extend past the lifetime of the
-       * object which created the InformDataRead.
-       */
-      void setOwnInformer(bool aOwnInformer) { theOwnInformer = aOwnInformer; }
-      
-      int multi_perform();
-      
-    protected:
-      // inherited
-      std::streamsize showmanyc();
-      int_type underflow();
-      
-    private:
-      void curl_read();
-      static size_t curl_write_callback( void*, size_t, size_t, void* );
-      
-      void init();
-      void init_curlm();
-      
-      char *buf_;
-      std::streamsize buf_capacity_;
-      std::streamoff buf_len_;
-      
-      CURL *curl_;
-      CURLM *curlm_;
-      int curl_running_;
-      ::zorba::http_client::InformDataRead* theInformer;
-      bool theOwnInformer;
-      
-      // forbid
-      streambuf( streambuf const& );
-      streambuf& operator=( streambuf const& );
+
+namespace http_client {
+  class InformDataRead;
+}
+
+namespace curl {
+
+///////////////////////////////////////////////////////////////////////////////
+
+class exception : public std::exception {
+public:
+  exception( char const *function, char const *uri, char const *msg = 0 );
+  exception( char const *function, char const *uri, CURLcode code );
+  exception( char const *function, char const *uri, CURLMcode code );
+  ~exception() throw();
+
+  virtual const char* what() const throw();
+
+private:
+  std::string msg_;
+};
+
+////////// create & destroy ///////////////////////////////////////////////////
+
+/**
+  * The signature type of cURL's write function callback.
+  */
+typedef size_t (*write_fn_t)( void*, size_t, size_t, void* );
+
+/**
+  * Creates a new, initialized cURL instance.
+  *
+  * @throws exception upon failure.
+  */
+CURL* create( char const *uri, write_fn_t fn, void *data );
+
+/**
+  * Destroys a cURL instance.
+  *
+  * @param instance A cURL instance.  If \c NULL, does nothing.
+  */
+void destroy( CURL *instance );
+
+////////// streambuf //////////////////////////////////////////////////////////
+
+/**
+  * A curl::streambuf is-a std::streambuf for streaming the contents of URI
+  * using cURL.  However, do not use this class directly.  Use uri::streambuf
+  * instead.
+  */
+class streambuf : public std::streambuf {
+public:
+  /**
+   * Constructs a %streambuf.
+   */
+  streambuf();
+
+  /**
+   * Constructs a %streambuf and opens a connection to the server hosting the
+   * given URI for subsequent streaming.
+   *
+   * @param uri The URI to stream.
+   */
+  streambuf( char const *uri );
+
+  /**
+   * Constructs a %streambuf using an existing CURL object.
+   *
+   * @param curl The CURL object to use.  This %streambuf takes ownership of
+   * it.
+   */
+  streambuf( CURL *curl );
+
+  /**
+   * Destroys a %streambuf.
+   */
+  ~streambuf();
+
+  /**
+   * Opens a connection to the server hosting the given URI for subsequent
+   * streaming.
+   *
+   * @param uri The URI to stream.
+   * @throws exception upon failure.
+   */
+  void open( char const *uri );
+
+  /**
+   * Tests whether the buffer is open.
+   *
+   * @return Returns \c true only if the buffer is open.
+   */
+  bool is_open() const {
+    return !!curl_;
+  }
+
+  /**
+   * Closes this %streambuf.
+   */
+  void close();
+
+  /**
+   * Gets the CURL object in use.
+   *
+   * @return Return said CURL object.
+   */
+  CURL* curl() const {
+    return curl_;
+  }
+
+  /**
+   * Provide a InformDataRead that will get callbacks about read events.
+   */
+  void setInformer( http_client::InformDataRead *aInformer ) {
+    theInformer = aInformer;
+  }
+
+  /**
+   * Specify whether this streambuf has memory ownership over the
+   * InformDataRead it has been passed. You can use this if, for example,
+   * the lifetime of the streambuf will extend past the lifetime of the
+   * object which created the InformDataRead.
+   */
+  void setOwnInformer( bool aOwnInformer ) {
+    theOwnInformer = aOwnInformer;
+  }
+
+  int multi_perform();
+
+protected:
+  // inherited
+  std::streamsize showmanyc();
+  int_type underflow();
+
+private:
+  void curl_read();
+  static size_t curl_write_callback( void*, size_t, size_t, void* );
+
+  void init();
+  void init_curlm();
+
+  char *buf_;
+  std::streamsize buf_capacity_;
+  std::streamoff buf_len_;
+
+  CURL *curl_;
+  CURLM *curlm_;
+  int curl_running_;
+  http_client::InformDataRead *theInformer;
+  bool theOwnInformer;
+
+  // forbid
+  streambuf( streambuf const& );
+  streambuf& operator=( streambuf const& );
 #ifdef WIN32
-      SOCKET theDummySocket;
-#endif
-    };
-    
-  } // namespace curl
+  SOCKET dummy_socket_;
+#endif /* WIN32 */
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace curl
 } // namespace zorba
 #endif /* ZORBA_CURL_UTIL_H */
+/* vim:set et sw=2 ts=2: */

=== modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.cpp'
--- modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.cpp	2011-07-29 08:12:36 +0000
+++ modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.cpp	2012-02-08 06:02:21 +0000
@@ -26,12 +26,44 @@
 #include <zorba/error.h>
 #include <zorba/xquery_exception.h>
 #include <zorba/xquery_functions.h>
+#include <zorba/transcode_stream.h>
 
 #include "http_response_parser.h"
 #include "http_request_handler.h"
 #include "curl_stream_buffer.h"
 
-namespace zorba { namespace http_client {
+namespace zorba {
+
+static bool parse_content_type( std::string const &s, std::string *mime_type,
+                                std::string *charset ) {
+  std::string::size_type pos = s.find( ';' );
+  *mime_type = s.substr( 0, pos );
+
+  if ( pos != std::string::npos ) {
+    //
+    // Parse: charset="?XXXXX"?[ (comment)]
+    //
+    if ( (pos = s.find( '=' )) != std::string::npos ) {
+      std::string t = s.substr( pos + 1 );
+      if ( !t.empty() ) {
+        if ( t[0] == '"' ) {
+          t.erase( 0, 1 );
+          if ( (pos = t.find( '"' )) != std::string::npos )
+            t.erase( pos );
+        } else {
+          if ( (pos = t.find( ' ' )) != std::string::npos )
+            t.erase( pos );
+        }
+        *charset = t;
+      } 
+    }
+  } else {
+    // The HTTP/1.1 spec says that the default charset is ISO-8859-1.
+    *charset = "ISO-8859-1";
+  }
+}
+
+namespace http_client {
   
   HttpResponseParser::HttpResponseParser(RequestHandler& aHandler, CURL* aCurl,
                                          ErrorThrower& aErrorThrower,
@@ -60,19 +92,30 @@
     if (lCode)
       return lCode; 
     if (!theStatusOnly) {
-      std::auto_ptr<std::istream> lStream(new std::istream(theStreamBuffer));
+
+      if (!theOverridenContentType.empty()) {
+        parse_content_type(
+          theOverridenContentType, &theCurrentContentType, &theCurrentCharset
+        );
+      }
+
+      std::auto_ptr<std::istream> lStream;
+      if ( transcode::is_necessary( theCurrentCharset.c_str() ) ) {
+        lStream.reset(
+          new transcode::stream<std::istream>(
+            theCurrentCharset.c_str(), theStreamBuffer
+          )
+        );
+      } else
+        lStream.reset(new std::istream(theStreamBuffer));
+
       Item lItem;
-      if (theOverridenContentType != "") {
-        theCurrentContentType = theOverridenContentType;
-      }
       if (theCurrentContentType == "text/xml" ||
           theCurrentContentType == "application/xml" ||
           theCurrentContentType == "text/xml-external-parsed-entity" ||
           theCurrentContentType == "application/xml-external-parsed-entity" ||
           theCurrentContentType.find("+xml") == theCurrentContentType.size()-4) {
         lItem = createXmlItem(*lStream.get());
-      } else if (theCurrentContentType.find("text/html") == 0) {
-        lItem = createTextItem(lStream.release());
       } else if (theCurrentContentType.find("text/") == 0) {
         lItem = createTextItem(lStream.release());
       } else {
@@ -106,8 +149,8 @@
     }
     theInsideRead = true;
     theHandler.beginResponse(theStatus, theMessage);
-    std::vector<std::pair<std::string, std::string> >::iterator lIter;
-    for (lIter = theHeaders.begin(); lIter != theHeaders.end(); ++lIter) {
+    for ( headers_type::const_iterator
+          lIter = theHeaders.begin(); lIter != theHeaders.end(); ++lIter) {
       theHandler.header(lIter->first, lIter->second);
     }
     if (!theStatusOnly)
@@ -120,23 +163,20 @@
 
   void HttpResponseParser::registerHandler()
   {
-    curl_easy_setopt(theCurl, CURLOPT_HEADERFUNCTION,
-      &HttpResponseParser::headerfunction);
+    curl_easy_setopt(theCurl, CURLOPT_HEADERFUNCTION, &curl_headerfunction);
     curl_easy_setopt(theCurl, CURLOPT_HEADERDATA, this);
   }
 
-  size_t HttpResponseParser::headerfunction(void *ptr,
-                                            size_t size,
-                                            size_t nmemb,
-                                            void *stream)
+  size_t HttpResponseParser::curl_headerfunction( void *ptr, size_t size,
+                                                  size_t nmemb, void *data )
   {
     size_t lSize = size*nmemb;
     size_t lResult = lSize;
-    HttpResponseParser* lParser = static_cast<HttpResponseParser*>(stream);
+    HttpResponseParser* lParser = static_cast<HttpResponseParser*>(data);
     if (lParser->theInsideRead) {
       lParser->theHandler.endBody();
+      lParser->theInsideRead = false;
     }
-    lParser->theInsideRead = false;
     const char* lDataChar = (const char*) ptr;
     while (lSize != 0 && (lDataChar[lSize - 1] == 10
           || lDataChar[lSize - 1] == 13)) {
@@ -173,7 +213,9 @@
     }
     String lNameS = fn::lower_case( lName );
     if (lNameS == "content-type") {
-      lParser->theCurrentContentType = lValue.substr(0, lValue.find(';'));
+      parse_content_type(
+        lValue, &lParser->theCurrentContentType, &lParser->theCurrentCharset
+      );
     } else if (lNameS == "content-id") {
       lParser->theId = lValue;
     } else if (lNameS == "content-description") {
@@ -184,7 +226,7 @@
     return lResult;
   }
 
-  void HttpResponseParser::parseStatusAndMessage(std::string aHeader)
+  void HttpResponseParser::parseStatusAndMessage(std::string const &aHeader)
   {
     std::string::size_type lPos = aHeader.find(' ');
     assert(lPos != std::string::npos);
@@ -215,7 +257,12 @@
   static void streamReleaser(std::istream* aStream)
   {
     // This istream contains our curl stream buffer, so we have to delete it too
-    delete aStream->rdbuf();
+    std::streambuf *const sbuf = aStream->rdbuf();
+    if ( transcode::streambuf *tbuf =
+          dynamic_cast<transcode::streambuf*>( sbuf ) )
+      delete tbuf->orig_streambuf();
+    else
+      delete sbuf;
     delete aStream;
   }
 
@@ -265,4 +312,7 @@
       return Item(); 
     }
   }
-}}
+
+} // namespace http_client
+} // namespace zorba
+/* vim:set et sw=2 ts=2: */

=== modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.h'
--- modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.h	2011-07-29 08:12:36 +0000
+++ modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.h	2012-02-08 06:02:21 +0000
@@ -31,6 +31,7 @@
 namespace curl {
   class streambuf;
 }
+
 namespace http_client {
   class RequestHandler;
 
@@ -40,7 +41,9 @@
     CURL* theCurl;
     ErrorThrower& theErrorThrower;
     std::string theCurrentContentType;
-    std::vector<std::pair<std::string, std::string> > theHeaders;
+    std::string theCurrentCharset;
+    typedef std::vector<std::pair<std::string, std::string> > headers_type;
+    headers_type theHeaders;
     int theStatus;
     std::string theMessage;
     zorba::curl::streambuf* theStreamBuffer;
@@ -74,15 +77,16 @@
     virtual void afterRead();
   private:
     void registerHandler();
-    void parseStatusAndMessage(std::string aHeader);
+    void parseStatusAndMessage(std::string const &aHeader);
     Item createXmlItem(std::istream& aStream);
     Item createHtmlItem(std::istream& aStream);
     Item createTextItem(std::istream* aStream);
     Item createBase64Item(std::istream& aStream);
-  public: //Handler
-    static size_t headerfunction( void *ptr, size_t size, size_t nmemb,
-      void *stream);
+
+    static size_t curl_headerfunction( void*, size_t, size_t, void* );
   };
-}} // namespace zorba, http_client
+
+} // namespace http_client
+} // namespace zorba
 
 #endif //HTTP_RESPONSE_PARSER_H

=== modified file 'modules/com/zorba-xquery/www/modules/pregenerated/errors.xq'
--- modules/com/zorba-xquery/www/modules/pregenerated/errors.xq	2011-12-21 14:40:33 +0000
+++ modules/com/zorba-xquery/www/modules/pregenerated/errors.xq	2012-02-08 06:02:21 +0000
@@ -664,6 +664,10 @@
 
 (:~
 :)
+declare variable $zerr:ZOSE0006 as xs:QName := fn:QName($zerr:NS, "zerr:ZOSE0006");
+
+(:~
+:)
 declare variable $zerr:ZSTR0001 as xs:QName := fn:QName($zerr:NS, "zerr:ZSTR0001");
 
 (:~

=== modified file 'modules/org/expath/ns/file.xq.src/file.cpp'
--- modules/org/expath/ns/file.xq.src/file.cpp	2011-07-22 08:12:31 +0000
+++ modules/org/expath/ns/file.xq.src/file.cpp	2012-02-08 06:02:21 +0000
@@ -28,6 +28,7 @@
 #include <zorba/singleton_item_sequence.h>
 #include <zorba/util/path.h>
 #include <zorba/user_exception.h>
+#include <zorba/transcode_stream.h>
 
 #include "file_module.h"
 
@@ -188,6 +189,7 @@
 {
   String lFileStr = getFilePathString(aArgs, 0);
   File_t lFile = File::createFile(lFileStr.c_str());
+  String lEncoding("UTF-8");
 
   // preconditions
   if (!lFile->exists()) {
@@ -198,18 +200,30 @@
   }
 
   if (aArgs.size() == 2) {
-    // since Zorba currently only supports UTF-8 we only call this function
-    // to reject any other encoding requested bu the user
-    getEncodingArg(aArgs, 1);
+    lEncoding = getEncodingArg(aArgs, 1);
   }
   
-  std::auto_ptr<StreamableItemSequence> lSeq(new StreamableItemSequence());
-  lFile->openInputStream(*lSeq->theStream, false, true);
-
-  lSeq->theItem = theModule->getItemFactory()->createStreamableString(
-      *lSeq->theStream, &StreamableItemSequence::streamReleaser);
-
-  return ItemSequence_t(lSeq.release());
+  zorba::Item lResult;
+  std::unique_ptr<std::ifstream> lInStream;
+  if ( transcode::is_necessary( lEncoding.c_str() ) )
+  {
+    try {
+      lInStream.reset( new transcode::stream<std::ifstream>(lEncoding.c_str()) );
+    } catch (std::invalid_argument const& e)
+    {
+      raiseFileError("FOFL0006", "Unsupported encoding", lEncoding.c_str());
+    }
+  }
+  else
+  {
+    lInStream.reset( new std::ifstream() );
+  }
+  lFile->openInputStream(*lInStream.get(), false, true);
+  lResult = theModule->getItemFactory()->createStreamableString(
+      *lInStream.release(), &FileModule::streamReleaser
+    );
+  return ItemSequence_t(new SingletonItemSequence(lResult));
+
 }
 
 //*****************************************************************************
@@ -722,3 +736,4 @@
 extern "C" DLL_EXPORT zorba::ExternalModule* createModule() {
   return new zorba::filemodule::FileModule();
 }
+/* vim:set et sw=2 ts=2: */

=== modified file 'modules/org/expath/ns/file.xq.src/file_function.cpp'
--- modules/org/expath/ns/file.xq.src/file_function.cpp	2011-07-13 01:56:45 +0000
+++ modules/org/expath/ns/file.xq.src/file_function.cpp	2012-02-08 06:02:21 +0000
@@ -141,11 +141,6 @@
     arg_iter->close();
   }
 
-  if (!(lEncoding == "UTF-8" || lEncoding == "UTF8")) {
-    // the rest are not supported encodings
-    raiseFileError("FOFL0006", "Unsupported encoding", lEncoding.c_str());
-  }
-
   return lEncoding;
 }
 

=== modified file 'modules/org/expath/ns/file.xq.src/file_function.h'
--- modules/org/expath/ns/file.xq.src/file_function.h	2011-07-22 08:12:31 +0000
+++ modules/org/expath/ns/file.xq.src/file_function.h	2012-02-08 06:02:21 +0000
@@ -25,7 +25,9 @@
 
 #include <fstream>
 
-namespace zorba { namespace filemodule {
+namespace zorba {
+
+  namespace filemodule {
 
   class FileModule;
 
@@ -136,18 +138,12 @@
             next(Item& aResult);
         };
 
-        Item           theItem;
-        std::ifstream* theStream;
+        Item                 theItem;
+        std::ifstream*       theStream;
 
         StreamableItemSequence() 
           : theStream(new std::ifstream()) {}
 
-        static void
-        streamReleaser(std::istream* stream)
-        {
-          delete stream;
-        }
-
         Iterator_t  getIterator()
         {
           return new InternalIterator(this);

=== modified file 'modules/org/expath/ns/file.xq.src/file_module.cpp'
--- modules/org/expath/ns/file.xq.src/file_module.cpp	2011-06-08 18:37:56 +0000
+++ modules/org/expath/ns/file.xq.src/file_module.cpp	2012-02-08 06:02:21 +0000
@@ -17,11 +17,10 @@
 #include "file.h"
 #include "file_module.h"
 #include "file_function.h"
+#include <cassert>
 
 namespace zorba { namespace filemodule {
 
-  ItemFactory* FileModule::theFactory = 0;
-
   const char* FileModule::theNamespace = "http://expath.org/ns/file";;
 
 
@@ -39,9 +38,7 @@
 {
   ExternalFunction*& lFunc = theFunctions[aLocalname];
   if (!lFunc) {
-    if (1 == 0) {
-
-    } else if (aLocalname == "create-directory") {
+    if (aLocalname == "create-directory") {
       lFunc = new CreateDirectoryFunction(this);
     } else if (aLocalname == "delete-file-impl") {
       lFunc = new DeleteFileImplFunction(this);

=== modified file 'modules/org/expath/ns/file.xq.src/file_module.h'
--- modules/org/expath/ns/file.xq.src/file_module.h	2011-06-08 18:37:56 +0000
+++ modules/org/expath/ns/file.xq.src/file_module.h	2012-02-08 06:02:21 +0000
@@ -27,7 +27,7 @@
 class FileModule : public ExternalModule
 {
 private:
-  static ItemFactory* theFactory;
+  mutable ItemFactory* theFactory;
 
 public:
   static const char* theNamespace;
@@ -43,10 +43,17 @@
   };
   
   typedef std::map<String, ExternalFunction*, ltstr> FuncMap_t;
-
   FuncMap_t theFunctions;
-  
+
 public:
+  static void
+  streamReleaser(std::istream* stream)
+  {
+    delete stream;
+  }
+
+  FileModule() : theFactory(0) {}
+
   virtual ~FileModule();
   
   virtual String
@@ -58,10 +65,10 @@
   virtual void
   destroy();
 
-  static ItemFactory*
-  getItemFactory()
+  ItemFactory*
+  getItemFactory() const
   {
-    if(!theFactory)
+    if (!theFactory)
     {
       theFactory = Zorba::getInstance(0)->getItemFactory();
     }

=== modified file 'src/api/CMakeLists.txt'
--- src/api/CMakeLists.txt	2011-08-31 13:17:59 +0000
+++ src/api/CMakeLists.txt	2012-02-08 06:02:21 +0000
@@ -55,6 +55,7 @@
     zorba_functions.cpp
     annotationimpl.cpp
     auditimpl.cpp
+    transcode_streambuf.cpp
     )
 
 IF (NOT ZORBA_NO_FULL_TEXT)

=== added file 'src/api/transcode_streambuf.cpp'
--- src/api/transcode_streambuf.cpp	1970-01-01 00:00:00 +0000
+++ src/api/transcode_streambuf.cpp	2012-02-08 06:02:21 +0000
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <zorba/transcode_stream.h>
+
+#include "util/transcode_streambuf.h"
+
+using namespace std;
+
+namespace zorba {
+namespace transcode {
+
+///////////////////////////////////////////////////////////////////////////////
+
+streambuf::streambuf( char const *charset, std::streambuf *orig ) :
+  proxy_buf_( new internal::transcode::streambuf( charset, orig ) )
+{
+}
+
+streambuf::~streambuf() {
+  // out-of-line since it's virtual
+}
+
+void streambuf::imbue( std::locale const &loc ) {
+  proxy_buf_->pubimbue( loc );
+}
+
+streambuf::pos_type streambuf::seekoff( off_type o, ios_base::seekdir d,
+                                        ios_base::openmode m ) {
+  return proxy_buf_->pubseekoff( o, d, m );
+}
+
+streambuf::pos_type streambuf::seekpos( pos_type p, ios_base::openmode m ) {
+  return proxy_buf_->pubseekpos( p, m );
+}
+
+std::streambuf* streambuf::setbuf( char_type *p, streamsize s ) {
+  proxy_buf_->pubsetbuf( p, s );
+  return this;
+}
+
+streamsize streambuf::showmanyc() {
+  return proxy_buf_->in_avail();
+}
+
+int streambuf::sync() {
+  return proxy_buf_->pubsync();
+}
+
+streambuf::int_type streambuf::overflow( int_type c ) {
+  return proxy_buf_->sputc( c );
+}
+
+streambuf::int_type streambuf::pbackfail( int_type c ) {
+  return proxy_buf_->sputbackc( traits_type::to_char_type( c ) );
+}
+
+streambuf::int_type streambuf::uflow() {
+  return proxy_buf_->sbumpc();
+}
+
+streambuf::int_type streambuf::underflow() {
+  return proxy_buf_->sgetc();
+}
+
+streamsize streambuf::xsgetn( char_type *to, streamsize size ) {
+  return proxy_buf_->sgetn( to, size );
+}
+
+streamsize streambuf::xsputn( char_type const *from,
+                                       streamsize size ) {
+  return proxy_buf_->sputn( from, size );
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+bool is_necessary( char const *charset ) {
+  return internal::transcode::streambuf::is_necessary( charset );
+}
+
+bool is_supported( char const *charset ) {
+  return internal::transcode::streambuf::is_supported( charset );
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace transcode
+} // namespace zorba
+/* vim:set et sw=2 ts=2: */

=== modified file 'src/diagnostics/diagnostic_en.xml'
--- src/diagnostics/diagnostic_en.xml	2011-12-21 14:40:33 +0000
+++ src/diagnostics/diagnostic_en.xml	2012-02-08 06:02:21 +0000
@@ -2188,6 +2188,10 @@
       <value>"$1": error loading dynamic library${: 2}</value>
     </diagnostic>
 
+    <diagnostic code="ZOSE0006" name="TRANSCODING_ERROR">
+      <value>stream transcoding error ($1)</value>
+    </diagnostic>
+
     <!--////////// Zorba Store Errors //////////////////////////////////////-->
 
     <diagnostic code="ZSTR0001" name="INDEX_ALREADY_EXISTS">

=== modified file 'src/diagnostics/pregenerated/diagnostic_list.cpp'
--- src/diagnostics/pregenerated/diagnostic_list.cpp	2011-12-21 14:40:33 +0000
+++ src/diagnostics/pregenerated/diagnostic_list.cpp	2012-02-08 06:02:21 +0000
@@ -1004,6 +1004,9 @@
 ZorbaErrorCode ZOSE0005_DLL_LOAD_FAILED( "ZOSE0005" );
 
 
+ZorbaErrorCode ZOSE0006_TRANSCODING_ERROR( "ZOSE0006" );
+
+
 ZorbaErrorCode ZSTR0001_INDEX_ALREADY_EXISTS( "ZSTR0001" );
 
 

=== modified file 'src/diagnostics/pregenerated/dict_en.cpp'
--- src/diagnostics/pregenerated/dict_en.cpp	2011-12-21 14:40:33 +0000
+++ src/diagnostics/pregenerated/dict_en.cpp	2012-02-08 06:02:21 +0000
@@ -337,6 +337,7 @@
   { "ZOSE0003", "stream read failure" },
   { "ZOSE0004", "${\"1\": }I/O error${: 2}" },
   { "ZOSE0005", "\"$1\": error loading dynamic library${: 2}" },
+  { "ZOSE0006", "stream transcoding error ($1)" },
   { "ZSTR0001", "\"$1\": index already exists" },
   { "ZSTR0002", "\"$1\": index does not exist" },
   { "ZSTR0003", "\"$1\": partial key insertion into index \"$2\"" },

=== modified file 'src/unit_tests/CMakeLists.txt'
--- src/unit_tests/CMakeLists.txt	2012-02-02 09:56:52 +0000
+++ src/unit_tests/CMakeLists.txt	2012-02-08 06:02:21 +0000
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-                                  
 
 SET(UNIT_TEST_SRCS
   string_instantiate.cpp
@@ -28,10 +27,9 @@
     tokenizer.cpp)
 ENDIF (NOT ZORBA_NO_FULL_TEXT)
 
-IF(ZORBA_WITH_DEBUGGER)
-  LIST(APPEND UNIT_TEST_SRCS
-#    test_debugger_protocol.cpp
-  )
-ENDIF(ZORBA_WITH_DEBUGGER)
+IF (NOT ZORBA_NO_UNICODE)
+  LIST (APPEND UNIT_TEST_SRCS
+    test_icu_streambuf.cpp)
+ENDIF (NOT ZORBA_NO_UNICODE)
 
 # vim:set et sw=2 tw=2:

=== added file 'src/unit_tests/test_icu_streambuf.cpp'
--- src/unit_tests/test_icu_streambuf.cpp	1970-01-01 00:00:00 +0000
+++ src/unit_tests/test_icu_streambuf.cpp	2012-02-08 06:02:21 +0000
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+#include "util/transcode_streambuf.h"
+
+using namespace std;
+using namespace zorba;
+
+#define COPYRIGHT_ISO     "\xA9"
+#define COPYRIGHT_UTF8    "\xC2\xA9"
+
+#define ONE_THIRD_UTF8    "\xE2\x85\x93"
+#define ONE_THIRD_UTF16BE "\x21\x53"
+
+struct test {
+  char const *ext_charset;
+  char const *ext_str;
+  int ext_len;
+  char const *utf8_str;
+};
+
+static test const tests[] = {
+  /*  0 */ { "ISO-8859-1", "Copyright " COPYRIGHT_ISO " 2011", 0, "Copyright " COPYRIGHT_UTF8 " 2011" },
+  /*  1 */ { "UTF-16BE", ONE_THIRD_UTF16BE "\0 \0c\0u\0p", 10, ONE_THIRD_UTF8 " cup" },
+  { 0, 0, 0, 0 }
+};
+
+static string make_ext_str( test const *t ) {
+  if ( t->ext_len )
+    return string( t->ext_str, t->ext_len );
+  return string( t->ext_str );
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+static int failures;
+
+static bool assert_true( int no, char const *expr, int line, bool result ) {
+  if ( !result ) {
+    cout << '#' << no << " FAILED, line " << line << ": " << expr << endl;
+    ++failures;
+  }
+  return result;
+}
+
+static void print_exception( int no, char const *expr, int line,
+                             std::exception const &e ) {
+  assert_true( no, expr, line, false );
+  cout << "+ exception: " << e.what() << endl;
+}
+
+#define ASSERT_TRUE( NO, EXPR ) assert_true( NO, #EXPR, __LINE__, !!(EXPR) )
+
+#define ASSERT_TRUE_AND_NO_EXCEPTION( NO, EXPR ) \
+  try { ASSERT_TRUE( NO, EXPR ); } \
+  catch ( std::exception const &e ) { print_exception( NO, #EXPR, __LINE__, e ); }
+
+///////////////////////////////////////////////////////////////////////////////
+
+static bool test_getline( test const *t ) {
+  string const ext_str( make_ext_str( t ) );
+  istringstream iss( ext_str );
+  icu_streambuf xbuf( t->ext_charset, iss.rdbuf() );
+  iss.ios::rdbuf( &xbuf );
+
+  char utf8_buf[ 1024 ];
+  iss.getline( utf8_buf, sizeof utf8_buf );
+  if ( iss.gcount() ) {
+    string const utf8_str( utf8_buf );
+    return utf8_str == t->utf8_str;
+  }
+  return false;
+}
+
+static bool test_read( test const *t ) {
+  string const ext_str( make_ext_str( t ) );
+  istringstream iss( ext_str );
+  icu_streambuf xbuf( t->ext_charset, iss.rdbuf() );
+  iss.ios::rdbuf( &xbuf );
+
+  char utf8_buf[ 1024 ];
+  iss.read( utf8_buf, sizeof utf8_buf );
+  if ( iss.gcount() ) {
+    string const utf8_str( utf8_buf, iss.gcount() );
+    return utf8_str == t->utf8_str;
+  }
+  return false;
+}
+
+static bool test_insertion( test const *t ) {
+  ostringstream oss;
+  icu_streambuf xbuf( t->ext_charset, oss.rdbuf() );
+  oss.ios::rdbuf( &xbuf );
+
+  oss << t->utf8_str << flush;
+  string const ext_str( oss.str() );
+
+  string const expected_ext_str( make_ext_str( t ) );
+  return ext_str == expected_ext_str;
+}
+
+static bool test_put( test const *t ) {
+  ostringstream oss;
+  icu_streambuf xbuf( t->ext_charset, oss.rdbuf() );
+  oss.ios::rdbuf( &xbuf );
+
+  for ( char const *c = t->utf8_str; *c; ++c )
+    oss.put( *c );
+  string const ext_str( oss.str() );
+
+  string const expected_ext_str( make_ext_str( t ) );
+  return ext_str == expected_ext_str;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+namespace zorba {
+namespace UnitTests {
+
+int test_icu_streambuf( int, char*[] ) {
+  int test_no = 0;
+  for ( test const *t = tests; t->utf8_str; ++t, ++test_no ) {
+    ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_getline( t ) );
+    ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_read( t ) );
+    ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_insertion( t ) );
+    ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_put( t ) );
+  }
+  cout << failures << " test(s) failed\n";
+  return failures ? 1 : 0;
+}
+
+} // namespace UnitTests
+} // namespace zorba
+/* vim:set et sw=2 ts=2: */

=== modified file 'src/unit_tests/unit_test_list.h'
--- src/unit_tests/unit_test_list.h	2012-02-02 09:56:52 +0000
+++ src/unit_tests/unit_test_list.h	2012-02-08 06:02:21 +0000
@@ -17,6 +17,8 @@
 #ifndef ZORBA_UNIT_TEST_LIST_H
 #define ZORBA_UNIT_TEST_LIST_H
 
+#include <iostream>
+
 #include <zorba/config.h>
 
 namespace zorba {
@@ -34,6 +36,9 @@
     /**
      * ADD NEW UNIT TESTS HERE
      */
+#ifndef ZORBA_NO_UNICODE
+    int test_icu_streambuf( int, char*[] );
+#endif /* ZORBA_NO_UNICODE */
 
     void initializeTestList();
   };

=== modified file 'src/unit_tests/unit_tests.cpp'
--- src/unit_tests/unit_tests.cpp	2012-02-02 09:56:52 +0000
+++ src/unit_tests/unit_tests.cpp	2012-02-08 06:02:21 +0000
@@ -39,6 +39,9 @@
     void initializeTestList() {
       libunittests["string"] = test_string;
       libunittests["uri"] = runUriTest;
+#ifndef ZORBA_NO_UNICODE
+      libunittests["icu_streambuf"] = test_icu_streambuf;
+#endif /* ZORBA_NO_UNICODE */
       libunittests["unique_ptr"] = test_unique_ptr;
 #ifndef ZORBA_NO_FULL_TEXT
       libunittests["stemmer"] = test_stemmer;

=== modified file 'src/util/CMakeLists.txt'
--- src/util/CMakeLists.txt	2011-07-18 14:25:21 +0000
+++ src/util/CMakeLists.txt	2012-02-08 06:02:21 +0000
@@ -39,7 +39,12 @@
 ENDIF(ZORBA_WITH_FILE_ACCESS)
 
 IF(ZORBA_NO_UNICODE)
-  LIST(APPEND UTIL_SRCS regex_ascii.cpp)
+  LIST(APPEND UTIL_SRCS
+    regex_ascii.cpp
+    passthru_streambuf.cpp)
+ELSE(ZORBA_NO_UNICODE)
+  LIST(APPEND UTIL_SRCS
+    icu_streambuf.cpp)
 ENDIF(ZORBA_NO_UNICODE)
 
 HEADER_GROUP_SUBFOLDER(UTIL_SRCS fx)

=== added file 'src/util/icu_streambuf.cpp'
--- src/util/icu_streambuf.cpp	1970-01-01 00:00:00 +0000
+++ src/util/icu_streambuf.cpp	2012-02-08 06:02:21 +0000
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ZORBA_DEBUG_ICU_STREAMBUF 0
+
+#ifdef ZORBA_DEBUG_ICU_STREAMBUF
+# include <stdio.h>
+#endif
+
+#include <algorithm>
+#include <cassert>
+
+#include <zorba/diagnostic_list.h>
+
+#include "diagnostics/assert.h"
+#include "diagnostics/diagnostic.h"
+#include "diagnostics/zorba_exception.h"
+#include "util/cxx_util.h"
+#include "util/string_util.h"
+#include "util/utf8_util.h"
+
+#include "icu_streambuf.h"
+
+using namespace std;
+
+namespace zorba {
+
+int const Small_External_Buf_Size = 6;
+int const Large_External_Buf_Size = 4096;
+
+///////////////////////////////////////////////////////////////////////////////
+
+inline void icu_streambuf::buf_type_base::reset() {
+  pivot_source_ = pivot_target_ = pivot_buf_;
+}
+
+inline void icu_streambuf::resetg() {
+  setg(
+    g_.utf8_char_, g_.utf8_char_ + sizeof g_.utf8_char_,
+    g_.utf8_char_ + sizeof g_.utf8_char_
+  );
+}
+
+icu_streambuf::icu_streambuf( char const *charset, streambuf *orig ) :
+  proxy_streambuf( orig ),
+  no_conv_( !is_necessary( charset ) ),
+  external_conv_( no_conv_ ? nullptr : create_conv( charset ) ),
+  utf8_conv_( no_conv_ ? nullptr : create_conv( "UTF-8" ) )
+{
+  if ( !orig )
+    throw invalid_argument( "null streambuf" );
+  resetg();
+}
+
+icu_streambuf::~icu_streambuf() {
+  if ( external_conv_ )
+    ucnv_close( external_conv_ );
+  if ( utf8_conv_ )
+    ucnv_close( utf8_conv_ );
+}
+
+void icu_streambuf::clear() {
+  if ( !no_conv_ ) {
+    ucnv_reset( external_conv_ );
+    ucnv_reset( utf8_conv_ );
+    g_.reset();
+    p_.reset();
+    resetg();
+  }
+}
+
+UConverter* icu_streambuf::create_conv( char const *charset ) {
+  UErrorCode err = U_ZERO_ERROR;
+  UConverter *const conv = ucnv_open( charset, &err );
+  ucnv_setFromUCallBack(
+    conv, UCNV_FROM_U_CALLBACK_STOP, nullptr, nullptr, nullptr, &err
+  );
+  ucnv_setToUCallBack(
+    conv, UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, &err
+  );
+  if ( !conv || U_FAILURE( err ) ) {
+    if ( conv )
+      ucnv_close( conv );
+    throw invalid_argument( charset );
+  }
+  return conv;
+}
+
+bool icu_streambuf::is_necessary( char const *charset ) {
+  //
+  // Checking for "US-ASCII" explicitly isn't necessary since ICU knows about
+  // aliases.
+  //
+  return  ucnv_compareNames( charset, "ASCII" )
+      &&  ucnv_compareNames( charset, "UTF-8" );
+}
+
+bool icu_streambuf::is_supported( char const *charset ) {
+  try {
+    ucnv_close( create_conv( charset ) );
+    return true;
+  }
+  catch ( invalid_argument const& ) {
+    return false;
+  }
+}
+
+icu_streambuf::pos_type icu_streambuf::seekoff( off_type o, ios_base::seekdir d,
+                                                ios_base::openmode m ) {
+  clear();
+  return original()->pubseekoff( o, d, m );
+}
+
+icu_streambuf::pos_type icu_streambuf::seekpos( pos_type p,
+                                                ios_base::openmode m ) {
+  clear();
+  return original()->pubseekpos( p, m );
+}
+
+streambuf* icu_streambuf::setbuf( char_type *p, streamsize s ) {
+  original()->pubsetbuf( p, s );
+  return this;
+}
+
+int icu_streambuf::sync() {
+  return original()->pubsync();
+}
+
+icu_streambuf::int_type icu_streambuf::overflow( int_type c ) {
+#if ZORBA_DEBUG_ICU_STREAMBUF
+  printf( "overflow()\n" );
+#endif
+  if ( no_conv_ )
+    return original()->sputc( c );
+
+  if ( traits_type::eq_int_type( c, traits_type::eof() ) )
+    return traits_type::eof();
+
+  char_type const utf8_byte = traits_type::to_char_type( c );
+  char_type const *from = &utf8_byte;
+  char ebuf[ Small_External_Buf_Size ], *to = ebuf;
+
+  bool const ok = to_external( &from, from + 1, &to, to + sizeof ebuf );
+  assert( ok );
+  if ( streamsize const n = to - ebuf ) {
+    original()->sputn( ebuf, n );
+    p_.reset();
+  }
+
+  return c;
+}
+
+bool icu_streambuf::to_external( char_type const **from,
+                                 char_type const *from_end, char **to,
+                                 char const *to_end, bool flush ) {
+  UErrorCode err = U_ZERO_ERROR;
+  ucnv_convertEx(
+    external_conv_, utf8_conv_, to, to_end, from, from_end,
+    p_.pivot_buf_, &p_.pivot_source_, &p_.pivot_target_,
+    p_.pivot_buf_ + sizeof p_.pivot_buf_,
+    /*reset*/ false, flush, &err
+  );
+  if ( err == U_TRUNCATED_CHAR_FOUND || err == U_BUFFER_OVERFLOW_ERROR )
+    return false;
+  if ( U_FAILURE( err ) )
+    throw ZORBA_EXCEPTION(
+      zerr::ZOSE0006_TRANSCODING_ERROR, ERROR_PARAMS( u_errorName( err ) )
+    );
+  return true;
+}
+
+bool icu_streambuf::to_utf8( char const **from, char const *from_end,
+                             char_type **to, char_type const *to_end,
+                             bool flush ) {
+  UErrorCode err = U_ZERO_ERROR;
+  ucnv_convertEx(
+    utf8_conv_, external_conv_, to, to_end, from, from_end,
+    g_.pivot_buf_, &g_.pivot_source_, &g_.pivot_target_,
+    g_.pivot_buf_ + sizeof g_.pivot_buf_,
+    /*reset*/ false, flush, &err
+  );
+  if ( err == U_TRUNCATED_CHAR_FOUND || err == U_BUFFER_OVERFLOW_ERROR )
+    return false;
+  if ( U_FAILURE( err ) )
+    throw ZORBA_EXCEPTION(
+      zerr::ZOSE0006_TRANSCODING_ERROR, ERROR_PARAMS( u_errorName( err ) )
+    );
+  return true;
+}
+
+icu_streambuf::int_type icu_streambuf::underflow() {
+#if ZORBA_DEBUG_ICU_STREAMBUF
+  printf( "underflow()\n" );
+#endif
+  if ( no_conv_ )
+    return original()->sgetc();
+
+  if ( gptr() >= egptr() ) {
+    utf8::storage_type *to = g_.utf8_char_;
+    utf8::storage_type const *const to_end = to + sizeof g_.utf8_char_;
+
+    while ( true ) {
+      int_type const c = original()->sbumpc();
+      if ( traits_type::eq_int_type( c, traits_type::eof() ) )
+        return traits_type::eof();
+
+      char const ebyte = traits_type::to_char_type( c );
+      char const *from = &ebyte;
+
+      to_utf8( &from, from + 1, &to, to_end );
+      if ( to > g_.utf8_char_ ) {
+        setg( g_.utf8_char_, g_.utf8_char_, to );
+        g_.reset();
+        break;
+      }
+    }
+  }
+  return traits_type::to_int_type( *gptr() );
+}
+
+streamsize icu_streambuf::xsgetn( char_type *to, streamsize size ) {
+#if ZORBA_DEBUG_ICU_STREAMBUF
+  printf( "xsgetn()\n" );
+#endif
+  if ( no_conv_ )
+    return original()->sgetn( to, size );
+
+  streamsize return_size = 0;
+  char_type *const to_end = to + size;
+
+  if ( streamsize const gsize = egptr() - gptr() ) {
+    // must first get any chars in g_.utf8_char_
+    streamsize const n = min( gsize, size );
+    traits_type::copy( to, gptr(), n );
+    gbump( n );
+    to += n;
+    size -= n, return_size += n;
+  }
+
+  while ( size > 0 ) {
+    char ebuf[ Large_External_Buf_Size ];
+    streamsize const get = min( (streamsize)(sizeof ebuf), size );
+    if ( streamsize const got = original()->sgetn( ebuf, get ) ) {
+      char const *from = ebuf;
+      char_type const *const to_orig = to;
+      int_type const peek = original()->sgetc();
+      bool const flush = traits_type::eq_int_type( peek, traits_type::eof() );
+      to_utf8( &from, from + got, &to, to_end, flush );
+      streamsize const n = to - to_orig;
+      size -= n, return_size += n;
+      if ( flush )
+        break;
+    } else
+      break;
+  }
+  return return_size;
+}
+
+streamsize icu_streambuf::xsputn( char_type const *from, streamsize size ) {
+#if ZORBA_DEBUG_ICU_STREAMBUF
+  printf( "xsputn()\n" );
+#endif
+  if ( no_conv_ )
+    return original()->sputn( from, size );
+
+  streamsize return_size = 0;
+  char_type const *const from_end = from + size;
+  char ebuf[ Large_External_Buf_Size ], *to = ebuf;
+  char const *const to_end = to + sizeof ebuf;
+
+  while ( size > 0 ) {
+    char_type const *const from_orig = from;
+    to_external( &from, from_end, &to, to_end );
+    streamsize n = to - ebuf;
+    if ( n && !original()->sputn( ebuf, n ) )
+      break;
+    to = ebuf;
+    n = from - from_orig;
+    size -= n, return_size += n;
+  }
+  return return_size;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace zorba
+/* vim:set et sw=2 ts=2: */

=== added file 'src/util/icu_streambuf.h'
--- src/util/icu_streambuf.h	1970-01-01 00:00:00 +0000
+++ src/util/icu_streambuf.h	2012-02-08 06:02:21 +0000
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ZORBA_ICU_STREAMBUF_H
+#define ZORBA_ICU_STREAMBUF_H
+
+#include <zorba/transcode_stream.h>
+
+#include "util/utf8_util.h"
+
+namespace zorba {
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * An %icu_streambuf is-a std::streambuf for transcoding character encodings
+ * from/to UTF-8 on-the-fly.
+ *
+ * To use it, replace a stream's streambuf:
+ * \code
+ *  istream is;
+ *  // ...
+ *  icu_streambuf xbuf( "ISO-8859-1", is.rdbuf() );
+ *  is.ios::rdbuf( &xbuf );
+ * \endcode
+ * Note that the %icu_streambuf must exist for as long as it's being used by
+ * the stream.  If you are replacing the streabuf for a stream you did not
+ * create, you should set it back to the original streambuf:
+ * \code
+ *  void f( ostream &os ) {
+ *    icu_streambuf xbuf( "ISO-8859-1", os.rdbuf() );
+ *    try {
+ *      os.ios::rdbuf( &xbuf );
+ *      // ...
+ *    }
+ *    catch ( ... ) {
+ *      os.ios::rdbuf( xbuf.original() );
+ *      throw;
+ *    }
+ *  }
+ * \endcode
+ *
+ * While %icu_streambuf does support seeking, the positions are relative to the
+ * original byte stream.
+ */
+class icu_streambuf : public proxy_streambuf {
+public:
+  /**
+   * Constructs an %icu_streambuf.
+   *
+   * @param charset The name of the character encoding to convert from/to.
+   * @param orig The original streambuf to read/write from/to.
+   */
+  icu_streambuf( char const *charset, std::streambuf *orig );
+
+  /**
+   * Destructs an %icu_streambuf.
+   */
+  ~icu_streambuf();
+
+  /**
+   * Checks whether it would be necessary to transcode from the given character
+   * encoding to UTF-8.
+   *
+   * @param charset The name of the character encoding to check.
+   * @return \c true only if t would be necessary to transcode from the given
+   * character encoding to UTF-8.
+   */
+  static bool is_necessary( char const *charset );
+
+  /**
+   * Checks whether the given character set is supported for transcoding.
+   *
+   * @param charset The name of the character encoding to check.
+   * @return \c true only if the character encoding is supported.
+   */
+  static bool is_supported( char const *charset );
+
+protected:
+  pos_type seekoff( off_type, std::ios_base::seekdir, std::ios_base::openmode );
+  pos_type seekpos( pos_type, std::ios_base::openmode );
+  std::streambuf* setbuf( char_type*, std::streamsize );
+  int sync();
+  int_type overflow( int_type );
+  int_type underflow();
+  std::streamsize xsgetn( char_type*, std::streamsize );
+  std::streamsize xsputn( char_type const*, std::streamsize );
+
+private:
+  struct buf_type_base {
+    UChar pivot_buf_[ 4096 ], *pivot_source_, *pivot_target_;
+
+    buf_type_base() { reset(); }
+    void reset();
+  };
+
+  struct gbuf_type : buf_type_base {
+    utf8::encoded_char_type utf8_char_;
+  };
+  gbuf_type g_;
+
+  typedef buf_type_base pbuf_type;
+  pbuf_type p_;
+
+  bool const no_conv_;                  // true = no conversion needed
+  UConverter *const external_conv_, *const utf8_conv_;
+
+  void clear();
+  static UConverter* create_conv( char const *charset );
+  void resetg();
+
+  bool to_external( char_type const **from, char_type const *from_end,
+                    char **to, char const *to_end, bool flush = false );
+
+  bool to_utf8( char const **from, char const *from_end, char_type **to,
+                char_type const *to_end, bool flush = false );
+
+  // forbid
+  icu_streambuf( icu_streambuf const& );
+  icu_streambuf& operator=( icu_streambuf const& );
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace zorba
+#endif  /* ZORBA_ICU_STREAMBUF_H */
+/* vim:set et sw=2 ts=2: */

=== added file 'src/util/passthru_streambuf.cpp'
--- src/util/passthru_streambuf.cpp	1970-01-01 00:00:00 +0000
+++ src/util/passthru_streambuf.cpp	2012-02-08 06:02:21 +0000
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "passthru_streambuf.h"
+
+using namespace std;
+
+namespace zorba {
+
+///////////////////////////////////////////////////////////////////////////////
+
+passthru_streambuf::passthru_streambuf( char const*, streambuf *orig ) :
+  proxy_streambuf( orig )
+{
+  if ( !orig )
+    throw invalid_argument( "null streambuf" );
+}
+
+passthru_streambuf::~passthru_streambuf() {
+  // out-of-line since it's virtual
+}
+
+void passthru_streambuf::imbue( std::locale const &loc ) {
+  original()->pubimbue( loc );
+}
+
+bool passthru_streambuf::is_necessary( char const *cc_charset ) {
+  zstring charset( cc_charset );
+  ascii::trim_whitespace( charset );
+  ascii::to_upper( charset );
+  return charset != "ASCII"
+      && charset != "US-ASCII"
+      && charset != "UTF-8";
+}
+
+bool passthru_streambuf::is_supported( char const *cc_charset ) {
+  return !is_necessary( charset );
+}
+
+passthru_streambuf::pos_type
+passthru_streambuf::seekoff( off_type o, ios_base::seekdir d,
+                             ios_base::openmode m ) {
+  return original()->pubseekoff( o, d, m );
+}
+
+passthru_streambuf::pos_type
+passthru_streambuf::seekpos( pos_type p, ios_base::openmode m ) {
+  return original()->pubseekpos( p, m );
+}
+
+streambuf* passthru_streambuf::setbuf( char_type *p, streamsize s ) {
+  original()->pubsetbuf( p, s );
+  return this;
+}
+
+streamsize passthru_streambuf::showmanyc() {
+  return original()->in_avail();
+}
+
+int passthru_streambuf::sync() {
+  return original()->pubsync();
+}
+
+passthru_streambuf::int_type passthru_streambuf::overflow( int_type c ) {
+  return original()->sputc( c );
+}
+
+passthru_streambuf::int_type passthru_streambuf::pbackfail( int_type c ) {
+  return original()->sputbackc( traits_type::to_char_type( c ) );
+}
+
+passthru_streambuf::int_type passthru_streambuf::uflow() {
+  return original()->sbumpc();
+}
+
+passthru_streambuf::int_type passthru_streambuf::underflow() {
+  return original()->sgetc();
+}
+
+streamsize passthru_streambuf::xsgetn( char_type *to, streamsize size ) {
+  return original()->sgetn( to, size );
+}
+
+streamsize passthru_streambuf::xsputn( char_type const *from,
+                                       streamsize size ) {
+  return original()->sputn( from, size );
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace zorba
+/* vim:set et sw=2 ts=2: */

=== added file 'src/util/passthru_streambuf.h'
--- src/util/passthru_streambuf.h	1970-01-01 00:00:00 +0000
+++ src/util/passthru_streambuf.h	2012-02-08 06:02:21 +0000
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ZORBA_PASSTHRU_STREAMBUF_H
+#define ZORBA_PASSTHRU_STREAMBUF_H
+
+#include <zorba/transcode_streambuf.h>
+
+namespace zorba {
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * A %passthru_streambuf is-a std::streambuf TODO
+ */
+class passthru_streambuf : public proxy_streambuf {
+public:
+  /**
+   * Constructs an %passthru_streambuf.
+   *
+   * @param charset The name of the character encoding to convert from/to.
+   * @param orig The original streambuf to read/write from/to.
+   */
+  passthru_streambuf( char const *charset, std::streambuf *orig );
+
+  /**
+   * Destructs an %passthru_streambuf.
+   */
+  ~passthru_streambuf();
+
+  /**
+   * Checks whether the given character set is supported for transcoding.
+   *
+   * @param charset The name of the character encoding to check.
+   * @return \c true only if the character encoding is supported.
+   */
+  static bool is_supported( char const *charset );
+
+protected:
+  void imbue( std::locale const& );
+  pos_type seekoff( off_type, std::ios_base::seekdir, std::ios_base::openmode );
+  pos_type seekpos( pos_type, std::ios_base::openmode );
+  std::streambuf* setbuf( char_type*, std::streamsize );
+  std::streamsize showmanyc();
+  int sync();
+  int_type overflow( int_type );
+  int_type pbackfail( int_type );
+  int_type uflow();
+  int_type underflow();
+  std::streamsize xsgetn( char_type*, std::streamsize );
+  std::streamsize xsputn( char_type const*, std::streamsize );
+
+private:
+  // forbid
+  passthru_streambuf( passthru_streambuf const& );
+  passthru_streambuf& operator=( passthru_streambuf const& );
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace zorba
+#endif  /* ZORBA_PASSTHRU_STREAMBUF_H */
+/* vim:set et sw=2 ts=2: */

=== added file 'src/util/transcode_streambuf.h'
--- src/util/transcode_streambuf.h	1970-01-01 00:00:00 +0000
+++ src/util/transcode_streambuf.h	2012-02-08 06:02:21 +0000
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ZORBA_TRANSCODE_STREAMBUF_H
+#define ZORBA_TRANSCODE_STREAMBUF_H
+
+#include <zorba/config.h>
+
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef ZORBA_NO_UNICODE
+# include "passthru_streambuf.h"
+#else
+# include "icu_streambuf.h"
+#endif /* ZORBA_NO_UNICODE */
+
+namespace zorba {
+namespace internal {
+namespace transcode {
+
+#ifdef ZORBA_NO_UNICODE
+typedef passthru_streambuf streambuf;
+#else
+typedef icu_streambuf streambuf;
+#endif /* ZORBA_NO_UNICODE */
+
+} // namespace transcode
+} // namespace internal
+} // namespace zorba
+
+///////////////////////////////////////////////////////////////////////////////
+
+#endif  /* ZORBA_TRANSCODE_STREAMBUF_H */
+/* vim:set et sw=2 ts=2: */

=== added file 'test/rbkt/ExpQueryResults/zorba/file/cp1252.xml.res'
--- test/rbkt/ExpQueryResults/zorba/file/cp1252.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/file/cp1252.xml.res	2012-02-08 06:02:21 +0000
@@ -0,0 +1,1 @@
+üäö

=== added file 'test/rbkt/Queries/zorba/file/cp1252.txt'
--- test/rbkt/Queries/zorba/file/cp1252.txt	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/file/cp1252.txt	2012-02-08 06:02:21 +0000
@@ -0,0 +1,1 @@
+�== added file 'test/rbkt/Queries/zorba/file/cp1252.xq'
--- test/rbkt/Queries/zorba/file/cp1252.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/file/cp1252.xq	2012-02-08 06:02:21 +0000
@@ -0,0 +1,3 @@
+import module namespace f = "http://expath.org/ns/file";;
+
+f:read-text(fn:resolve-uri("cp1252.txt"), "CP1252")

=== added file 'test/rbkt/Queries/zorba/file/invalid_encoding.spec'
--- test/rbkt/Queries/zorba/file/invalid_encoding.spec	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/file/invalid_encoding.spec	2012-02-08 06:02:21 +0000
@@ -0,0 +1,1 @@
+Error: http://expath.org/ns/file:FOFL0006

=== added file 'test/rbkt/Queries/zorba/file/invalid_encoding.xq'
--- test/rbkt/Queries/zorba/file/invalid_encoding.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/file/invalid_encoding.xq	2012-02-08 06:02:21 +0000
@@ -0,0 +1,3 @@
+import module namespace f = "http://expath.org/ns/file";;
+
+f:read-text(fn:resolve-uri("cp1252.txt"), "FOO")

=== modified file 'test/rbkt/Queries/zorba/http-client/send-request/http2-read-svg.xq'
--- test/rbkt/Queries/zorba/http-client/send-request/http2-read-svg.xq	2011-08-23 07:11:31 +0000
+++ test/rbkt/Queries/zorba/http-client/send-request/http2-read-svg.xq	2012-02-08 06:02:21 +0000
@@ -7,9 +7,9 @@
                auth-method="Basic"
                send-authorization="true"
                username="zorba"
-               password="blub"/>;
+               password="blub"
+               override-media-type="application/xml; charset=utf-8"/>;
 
 variable $http-res := http:send-request($req, (), ());
 
 $http-res[2]
-


Follow ups