← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~paul-lucas/zorba/bug-1025622 into lp:zorba

 

Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/bug-1025622 into lp:zorba.

Requested reviews:
  Dennis Knochenwefel (dennis-knochenwefel)
  Paul J. Lucas (paul-lucas)
Related bugs:
  Bug #1025622 in Zorba: "incorrect JSON serialization of supplementory plane code points"
  https://bugs.launchpad.net/zorba/+bug/1025622

For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/bug-1025622/+merge/115636

Now doing proper JSON serialization.
-- 
https://code.launchpad.net/~paul-lucas/zorba/bug-1025622/+merge/115636
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/api/serialization/serializer.cpp'
--- src/api/serialization/serializer.cpp	2012-07-13 06:50:40 +0000
+++ src/api/serialization/serializer.cpp	2012-07-18 23:45:24 +0000
@@ -31,8 +31,10 @@
 #include "api/unmarshaller.h"
 
 #include "util/ascii_util.h"
+#include "util/string_util.h"
+#include "util/unicode_util.h"
+#include "util/utf8_string.h"
 #include "util/utf8_util.h"
-#include "util/string_util.h"
 #include "util/xml_util.h"
 
 #include "system/globalenv.h"
@@ -1203,29 +1205,38 @@
 /*******************************************************************************
 
 ********************************************************************************/
-void serializer::json_emitter::emit_json_string(zstring string)
+void serializer::json_emitter::emit_json_string(zstring const &string)
 {
   tr << '"';
-  zstring::const_iterator i = string.begin();
-  zstring::const_iterator end = string.end();
-  for (; i < end; i++) 
-  {
-    if (*i < 0x20) 
-    {
-      // Escape control sequences
-      std::stringstream hex;
-      hex << "\\u" << std::setw(4) << std::setfill('0')
-          << std::hex << static_cast<int>(*i);
-      tr << hex.str();
-      continue;
-    }
-    if (*i == '\\' || *i == '"') 
-    {
-      // Output escape char for \ or "
-      tr << '\\';
-      // Fall through to output original character
-    }
-    tr << *i;
+  utf8_string<zstring const> const u( string );
+  FOR_EACH( utf8_string<zstring const>, i, u ) {
+    unicode::code_point const cp = *i;
+    if ( ascii::is_cntrl( cp ) ) {
+      std::ostringstream oss;
+      oss << std::hex << std::setfill('0') << "\\u" << std::setw(4) << cp;
+      tr << oss.str();
+      continue;
+    }
+    if ( unicode::is_supplementary_plane( cp ) ) {
+      unsigned high, low;
+      unicode::convert_surrogate( cp, &high, &low );
+      std::ostringstream oss;
+      oss << std::hex << std::setfill('0')
+          << "\\u" << std::setw(4) << high
+          << "\\u" << std::setw(4) << low;
+      tr << oss.str();
+      continue;
+    }
+    switch ( cp ) {
+      case '\\':
+      case '"':
+        tr << '\\';
+        // no break;
+      default: {
+        utf8::encoded_char_type ec;
+        tr.write( ec, utf8::encode( cp, ec ) );
+      }
+    }
   }
   tr << '"';
 }

=== modified file 'src/api/serialization/serializer.h'
--- src/api/serialization/serializer.h	2012-07-13 06:50:40 +0000
+++ src/api/serialization/serializer.h	2012-07-18 23:45:24 +0000
@@ -402,7 +402,7 @@
 
     void emit_jsoniq_xdm_node(store::Item *item, int depth);
 
-    void emit_json_string(zstring string);
+    void emit_json_string(zstring const &string);
 
     store::Item_t theJSONiqValueName;
     store::Item_t theTypeName;

=== modified file 'src/util/ascii_util.h'
--- src/util/ascii_util.h	2012-07-12 17:29:55 +0000
+++ src/util/ascii_util.h	2012-07-18 23:45:24 +0000
@@ -141,6 +141,25 @@
 }
 
 /**
+ * Checks whether the given character is a control character.  This function
+ * exists to make a proper function out of the standard iscntrl(3) that may be
+ * implemented as a macro.
+ *
+ * @param CharType The character type.
+ * @param c The character to check.
+ * @return Returns \c true only if the character is a control character.
+ */
+template<typename CharType> inline
+bool is_cntrl( CharType c ) {
+#ifdef WIN32
+  // Windows' iscntrl() implementation crashes for non-ASCII characters.
+  return __isascii( c ) && iscntrl( c );
+#else
+  return iscntrl( c );
+#endif
+}
+
+/**
  * Checks whether the given character is a decimal digit.  This function exists
  * to make a proper function out of the standard isdigit(3) that may be
  * implemented as a macro.
@@ -160,6 +179,25 @@
 }
 
 /**
+ * Checks whether the given character is a printing character.  This function
+ * exists to make a proper function out of the standard isprint(3) that may be
+ * implemented as a macro.
+ *
+ * @param CharType The character type.
+ * @param c The character to check.
+ * @return Returns \c true only if the character is a printing character.
+ */
+template<typename CharType> inline
+bool is_print( CharType c ) {
+#ifdef WIN32
+  // Windows' isprint() implementation crashes for non-ASCII characters.
+  return __isascii( c ) && isprint( c );
+#else
+  return isprint( c );
+#endif
+}
+
+/**
  * Checks whether the given character is a punctuation character.  This function
  * exists to make a proper function out of the standard ispunct(3) that may be
  * implemented as a macro.


Follow ups