← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba

 

Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/pjl-misc into lp:zorba.

Requested reviews:
  Paul J. Lucas (paul-lucas)

For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/pjl-misc/+merge/115403

Added functions to test for and create UTF-16 surrogate pairs.
These will probably be needed by whoever fixes bug #1025622.
-- 
https://code.launchpad.net/~paul-lucas/zorba/pjl-misc/+merge/115403
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/util/unicode_util.h'
--- src/util/unicode_util.h	2012-07-16 23:37:51 +0000
+++ src/util/unicode_util.h	2012-07-17 17:58:21 +0000
@@ -136,40 +136,6 @@
 bool is_ucschar( code_point c );
 
 /**
- * Checks whether the given value is a "high surrogate."
- *
- * @param n The value to check.
- * @return Returns \c true only if \a n is a high surrogate.
- */
-inline bool is_high_surrogate( unsigned long n ) {
-  return n >= 0xD800 && n <= 0xDBFF;
-}
-
-/**
- * Checks whether the given value is a "low surrogate."
- *
- * @param n The value to check.
- * @return Returns \c true only if \a n is a low surrogate.
- */
-inline bool is_low_surrogate( unsigned long n ) {
-  return n >= 0xDC00 && n <= 0xDFFF;
-}
-
-/**
- * Converts the given high and low surrogate values into the code-point they
- * represent.  Note that no checking is done on the parameters.
- *
- * @param high The high surrogate value.
- * @param low The low surrogate value.
- * @return Returns the represented code-point.
- * @see is_high_surrogate()
- * @see is_low_surrogate()
- */
-inline code_point convert_surrogate( unsigned high, unsigned low ) {
-  return 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00);
-}
-
-/**
  * Checks whether the given code-point is valid.
  *
  * @param c The code-point to check.
@@ -338,6 +304,71 @@
   return to_string( in.data(), static_cast<size_type>( in.size() ), out );
 }
 
+////////// UTF-16 surrogate pairs /////////////////////////////////////////////
+
+/**
+ * Converts the given high and low surrogate values into the code-point they
+ * represent.  Note that no checking is done on the parameters.
+ *
+ * @param high The high surrogate value.
+ * @param low The low surrogate value.
+ * @return Returns the represented code-point.
+ * @see is_high_surrogate()
+ * @see is_low_surrogate()
+ */
+inline code_point convert_surrogate( unsigned high, unsigned low ) {
+  return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
+}
+
+/**
+ * Converts the given code-point into the high and low surrogate values that
+ * represent it.  Note that no checking is done on the parameters.
+ *
+ * @tparam ResultType The integer type for the results.
+ * @param c The code-point to convert.
+ * @param high A pointer to where to put the high surrogate.
+ * @param low A pointer to where to put the low surrogate.
+ */
+template<typename ResultType> inline
+typename std::enable_if<ZORBA_TR1_NS::is_integral<ResultType>::value,
+                        void>::type
+covert_surrogate( code_point c, ResultType *high, ResultType *low ) {
+  code_point const n = c - 0x10000;
+  *high = 0xD800 + (static_cast<unsigned>(n) >> 10);
+  *low  = 0xDC00 + (n & 0x3FF);
+}
+
+/**
+ * Checks whether the given value is a "high surrogate."
+ *
+ * @param n The value to check.
+ * @return Returns \c true only if \a n is a high surrogate.
+ */
+inline bool is_high_surrogate( unsigned long n ) {
+  return n >= 0xD800 && n <= 0xDBFF;
+}
+
+/**
+ * Checks whether the given value is a "low surrogate."
+ *
+ * @param n The value to check.
+ * @return Returns \c true only if \a n is a low surrogate.
+ */
+inline bool is_low_surrogate( unsigned long n ) {
+  return n >= 0xDC00 && n <= 0xDFFF;
+}
+
+/**
+ * Checks whether the given code-point is in the "supplementary plane" and
+ * therefore would need a surrogate pair to be encoded in UTF-16.
+ *
+ * @param c The code-point to check.
+ * @return Returns \c true only if \a c is within the supplementary plane.
+ */
+inline bool is_supplementary_plane( code_point c ) {
+  return c >= 0x10000 && c <= 0x10FFFF;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 
 } // namespace unicode


Follow ups