← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~paul-lucas/zorba/bug-996593 into lp:zorba

 

Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/bug-996593 into lp:zorba.

Requested reviews:
  Paul J. Lucas (paul-lucas)
  Dennis Knochenwefel (dennis-knochenwefel)
Related bugs:
  Bug #996593 in Zorba: "http client throws error with no content-type header in http response"
  https://bugs.launchpad.net/zorba/+bug/996593

For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/bug-996593/+merge/105293

1. In transcoding streambufs, throwing std::invalid_argument for empty charsets.
2. In the HTTP code, now setting the charset to ISO-8859-1 in the constructor so it's set even when there's no Content-Type header.
-- 
https://code.launchpad.net/~paul-lucas/zorba/bug-996593/+merge/105293
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/error_thrower.h'
--- modules/com/zorba-xquery/www/modules/http-client.xq.src/error_thrower.h	2012-05-03 12:31:51 +0000
+++ modules/com/zorba-xquery/www/modules/http-client.xq.src/error_thrower.h	2012-05-10 04:16:19 +0000
@@ -20,7 +20,6 @@
 #include <curl/curl.h>
 
 namespace zorba {
-
 namespace http_client {
 
 class ErrorThrower 
@@ -36,18 +35,19 @@
     theHeaderList(aHeaderList)
   {
   }
-  
-  void raiseException(String aNamespace, String aLocalName, String aDescription) 
+
+  void raiseException( String const &aNamespace, String const &aLocalName,
+                       String const &aDescription )
   {
     if (*theHeaderList) 
-    {
       curl_slist_free_all(*theHeaderList);
-    }
 
-    UserException ex = USER_EXCEPTION(theFactory->createQName(aNamespace, aLocalName), aDescription);
-    throw ex;
+    throw USER_EXCEPTION(
+      theFactory->createQName(aNamespace, aLocalName), aDescription
+    );
   }
 };
   
-}} //namespace zorba, http_client
-    
+} // namespace http_client
+} // namespace zorba
+/* vim:set et sw=2 ts=2: */

=== modified file 'modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.cpp'
--- modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.cpp	2012-05-03 12:31:51 +0000
+++ modules/com/zorba-xquery/www/modules/http-client.xq.src/http_response_parser.cpp	2012-05-10 04:16:19 +0000
@@ -18,15 +18,17 @@
 #include <assert.h>
 #include <iostream>
 
-#include <zorba/item_factory.h>
-#include <zorba/item.h>
-#include <zorba/xmldatamanager.h>
 #include <zorba/base64.h>
 #include <zorba/config.h>
+#include <zorba/diagnostic_list.h>
 #include <zorba/error.h>
+#include <zorba/item.h>
+#include <zorba/item_factory.h>
+#include <zorba/transcode_stream.h>
+#include <zorba/xmldatamanager.h>
+#include <zorba/xquery_exception.h>
 #include <zorba/xquery_exception.h>
 #include <zorba/xquery_functions.h>
-#include <zorba/transcode_stream.h>
 
 #include "http_response_parser.h"
 #include "http_request_handler.h"
@@ -39,6 +41,9 @@
   std::string::size_type pos = s.find( ';' );
   *mime_type = s.substr( 0, pos );
 
+  // The HTTP/1.1 spec says that the default charset is ISO-8859-1.
+  *charset = "ISO-8859-1";
+
   if ( pos != std::string::npos ) {
     //
     // Parse: charset="?XXXXX"?[ (comment)]
@@ -57,9 +62,6 @@
         *charset = t;
       } 
     }
-  } else {
-    // The HTTP/1.1 spec says that the default charset is ISO-8859-1.
-    *charset = "ISO-8859-1";
   }
 }
 
@@ -67,12 +69,18 @@
   
   HttpResponseParser::HttpResponseParser(RequestHandler& aHandler, CURL* aCurl,
                                          ErrorThrower& aErrorThrower,
-                                         std::string aOverridenContentType, bool aStatusOnly)
-  : 
-  theHandler(aHandler), theCurl(aCurl), theErrorThrower(aErrorThrower),
-  theStatus(-1), theStreamBuffer(0), theInsideRead(false),
+                                         std::string aOverridenContentType,
+                                         bool aStatusOnly) : 
+  theHandler(aHandler),
+  theCurl(aCurl),
+  theErrorThrower(aErrorThrower),
+  theCurrentCharset("ISO-8859-1"),      // HTTP/1.1 says this is the default
+  theStatus(-1),
+  theStreamBuffer(0),
+  theInsideRead(false),
   theOverridenContentType(aOverridenContentType),
-  theStatusOnly(aStatusOnly), theSelfContained(true)
+  theStatusOnly(aStatusOnly),
+  theSelfContained(true)
   {
     registerHandler();
     theStreamBuffer = new zorba::curl::streambuf(theCurl);
@@ -100,14 +108,21 @@
       }
 
       std::auto_ptr<std::istream> lStream;
-      if ( transcode::is_necessary( theCurrentCharset.c_str() ) ) {
-        lStream.reset(
-          new transcode::stream<std::istream>(
-            theCurrentCharset.c_str(), theStreamBuffer
-          )
+      try {
+        if ( transcode::is_necessary( theCurrentCharset.c_str() ) ) {
+          lStream.reset(
+            new transcode::stream<std::istream>(
+              theCurrentCharset.c_str(), theStreamBuffer
+            )
+          );
+        } else
+          lStream.reset(new std::istream(theStreamBuffer));
+      }
+      catch ( std::invalid_argument const &e ) {
+        theErrorThrower.raiseException(
+          "http://www.zorba-xquery.com/errors";, "ZXQP0006", e.what()
         );
-      } else
-        lStream.reset(new std::istream(theStreamBuffer));
+      }
 
       Item lItem;
       if (theCurrentContentType == "text/xml" ||

=== modified file 'src/diagnostics/diagnostic_en.xml'
--- src/diagnostics/diagnostic_en.xml	2012-05-03 12:31:51 +0000
+++ src/diagnostics/diagnostic_en.xml	2012-05-10 04:16:19 +0000
@@ -1176,7 +1176,7 @@
        using the specified encoding, the resulting characters are not 
        permitted XML characters or requested encoding not supported
       </comment>
-      <value>"$1": Cannot decode resource retrieved</value>
+      <value>"$1": can not decode resource retrieved</value>
     </diagnostic>  
 
     <!--////////// XQuery Update Facility //////////////////////////////////-->

=== modified file 'src/diagnostics/pregenerated/dict_en.cpp'
--- src/diagnostics/pregenerated/dict_en.cpp	2012-05-03 12:31:51 +0000
+++ src/diagnostics/pregenerated/dict_en.cpp	2012-05-10 04:16:19 +0000
@@ -75,7 +75,7 @@
   { "FOUP0001", "first operand of fn:put() is not a node of a supported kind" },
   { "FOUP0002", "second operand of fn:put() is not a valid lexical representation of the xs:anyURI type" },
   { "FOUT1170", "\"$1\": error retrieving resource containing text" },
-  { "FOUT1190", "\"$1\": Cannot decode resource retrieved" },
+  { "FOUT1190", "\"$1\": can not decode resource retrieved" },
 #if !defined(ZORBA_NO_FULL_TEXT)
   { "FTDY0016", "\"$1\": invalid weight: absolute value must be in [0,1000]" },
 #endif

=== modified file 'src/util/icu_streambuf.cpp'
--- src/util/icu_streambuf.cpp	2012-05-03 12:31:51 +0000
+++ src/util/icu_streambuf.cpp	2012-05-10 04:16:19 +0000
@@ -23,6 +23,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <stdexcept>
 
 #include <zorba/diagnostic_list.h>
 
@@ -95,12 +96,14 @@
   if ( !conv || U_FAILURE( err ) ) {
     if ( conv )
       ucnv_close( conv );
-    throw invalid_argument( charset );
+    throw invalid_argument( u_errorName( err ) );
   }
   return conv;
 }
 
 bool icu_streambuf::is_necessary( char const *cc_charset ) {
+  if ( !*cc_charset )
+    throw invalid_argument( "empty charset" );
   //
   // Apparently, ucnv_compareNames() doesn't consider "US-ASCII" an alias for
   // "ASCII", so check for "US-ASCII" ourselves.

=== modified file 'src/util/icu_streambuf.h'
--- src/util/icu_streambuf.h	2012-05-03 14:32:47 +0000
+++ src/util/icu_streambuf.h	2012-05-10 04:16:19 +0000
@@ -65,6 +65,8 @@
    *
    * @param charset The name of the character encoding to convert from/to.
    * @param orig The original streambuf to read/write from/to.
+   * @throws std::invalid_argument if either \a charset is invalid
+   * or \a orig is \c null.
    */
   icu_streambuf( char const *charset, std::streambuf *orig );
 
@@ -80,6 +82,7 @@
    * @param charset The name of the character encoding to check.
    * @return \c true only if t would be necessary to transcode from the given
    * character encoding to UTF-8.
+   * @throws std::invalid_argument if \a charset is invalid.
    */
   static bool is_necessary( char const *charset );
 

=== modified file 'src/util/passthru_streambuf.cpp'
--- src/util/passthru_streambuf.cpp	2012-05-03 12:31:51 +0000
+++ src/util/passthru_streambuf.cpp	2012-05-10 04:16:19 +0000
@@ -14,8 +14,11 @@
  * limitations under the License.
  */
 
+#include <stdexcept>
+
 #include "stdafx.h"
 #include "passthru_streambuf.h"
+
 using namespace std;
 
 namespace zorba {
@@ -38,6 +41,8 @@
 }
 
 bool passthru_streambuf::is_necessary( char const *cc_charset ) {
+  if ( !*cc_charset )
+    throw invalid_argument( "empty charset" );
   zstring charset( cc_charset );
   ascii::trim_whitespace( charset );
   ascii::to_upper( charset );

=== modified file 'src/util/passthru_streambuf.h'
--- src/util/passthru_streambuf.h	2012-05-03 12:31:51 +0000
+++ src/util/passthru_streambuf.h	2012-05-10 04:16:19 +0000
@@ -18,22 +18,36 @@
 #define ZORBA_PASSTHRU_STREAMBUF_H
 
 #include <zorba/transcode_stream.h>
+
+#include "util/ascii_util.h"
 #include "zorbatypes/zstring.h"
-#include "util/ascii_util.h"
+
 namespace zorba {
 
 ///////////////////////////////////////////////////////////////////////////////
 
 /**
- * A %passthru_streambuf is-a std::streambuf TODO
+ * A %passthru_streambuf is-a std::streambuf that simply passes through
+ * characters unchanged.
  */
 class passthru_streambuf : public proxy_streambuf {
 public:
+#ifdef WIN32
+  // These typedefs are needed (but shouldn't be) when using MSVC++.
+  typedef std::streambuf::char_type char_type;
+  typedef std::streambuf::int_type int_type;
+  typedef std::streambuf::off_type off_type;
+  typedef std::streambuf::pos_type pos_type;
+  typedef std::streambuf::traits_type traits_type;
+#endif /* WIN32 */
+
   /**
    * Constructs an %passthru_streambuf.
    *
    * @param charset The name of the character encoding to convert from/to.
    * @param orig The original streambuf to read/write from/to.
+   * @throws std::invalid_argument if either \a charset is invalid
+   * or \a orig is \c null.
    */
   passthru_streambuf( char const *charset, std::streambuf *orig );
 
@@ -43,19 +57,23 @@
   ~passthru_streambuf();
 
   /**
+   * Checks whether it would be necessary to transcode from the given character
+   * encoding to UTF-8.
+   *
+   * @param charset The name of the character encoding to check.
+   * @return \c true only if t would be necessary to transcode from the given
+   * character encoding to UTF-8.
+   * @throws std::invalid_argument if \a charset is invalid.
+   */
+  static bool is_necessary( char const *charset );
+
+  /**
    * Checks whether the given character set is supported for transcoding.
    *
    * @param charset The name of the character encoding to check.
    * @return \c true only if the character encoding is supported.
    */
   static bool is_supported( char const *charset );
-  static bool is_necessary( char const *cc_charset );
-
-  typedef std::streambuf::char_type char_type;
-  typedef std::streambuf::int_type int_type;
-  typedef std::streambuf::off_type off_type;
-  typedef std::streambuf::pos_type pos_type;
-  typedef std::streambuf::traits_type traits_type;
 
 protected:
   void imbue( std::locale const& );


Follow ups