zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #12377
[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/pjl-misc into lp:zorba.
Requested reviews:
Paul J. Lucas (paul-lucas)
For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/pjl-misc/+merge/115403
Added functions to test for and create UTF-16 surrogate pairs.
These will probably be needed by whoever fixes bug #1025622.
--
https://code.launchpad.net/~paul-lucas/zorba/pjl-misc/+merge/115403
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/util/unicode_util.h'
--- src/util/unicode_util.h 2012-07-16 23:37:51 +0000
+++ src/util/unicode_util.h 2012-07-17 17:58:21 +0000
@@ -136,40 +136,6 @@
bool is_ucschar( code_point c );
/**
- * Checks whether the given value is a "high surrogate."
- *
- * @param n The value to check.
- * @return Returns \c true only if \a n is a high surrogate.
- */
-inline bool is_high_surrogate( unsigned long n ) {
- return n >= 0xD800 && n <= 0xDBFF;
-}
-
-/**
- * Checks whether the given value is a "low surrogate."
- *
- * @param n The value to check.
- * @return Returns \c true only if \a n is a low surrogate.
- */
-inline bool is_low_surrogate( unsigned long n ) {
- return n >= 0xDC00 && n <= 0xDFFF;
-}
-
-/**
- * Converts the given high and low surrogate values into the code-point they
- * represent. Note that no checking is done on the parameters.
- *
- * @param high The high surrogate value.
- * @param low The low surrogate value.
- * @return Returns the represented code-point.
- * @see is_high_surrogate()
- * @see is_low_surrogate()
- */
-inline code_point convert_surrogate( unsigned high, unsigned low ) {
- return 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00);
-}
-
-/**
* Checks whether the given code-point is valid.
*
* @param c The code-point to check.
@@ -338,6 +304,71 @@
return to_string( in.data(), static_cast<size_type>( in.size() ), out );
}
+////////// UTF-16 surrogate pairs /////////////////////////////////////////////
+
+/**
+ * Converts the given high and low surrogate values into the code-point they
+ * represent. Note that no checking is done on the parameters.
+ *
+ * @param high The high surrogate value.
+ * @param low The low surrogate value.
+ * @return Returns the represented code-point.
+ * @see is_high_surrogate()
+ * @see is_low_surrogate()
+ */
+inline code_point convert_surrogate( unsigned high, unsigned low ) {
+ return 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
+}
+
+/**
+ * Converts the given code-point into the high and low surrogate values that
+ * represent it. Note that no checking is done on the parameters.
+ *
+ * @tparam ResultType The integer type for the results.
+ * @param c The code-point to convert.
+ * @param high A pointer to where to put the high surrogate.
+ * @param low A pointer to where to put the low surrogate.
+ */
+template<typename ResultType> inline
+typename std::enable_if<ZORBA_TR1_NS::is_integral<ResultType>::value,
+ void>::type
+covert_surrogate( code_point c, ResultType *high, ResultType *low ) {
+ code_point const n = c - 0x10000;
+ *high = 0xD800 + (static_cast<unsigned>(n) >> 10);
+ *low = 0xDC00 + (n & 0x3FF);
+}
+
+/**
+ * Checks whether the given value is a "high surrogate."
+ *
+ * @param n The value to check.
+ * @return Returns \c true only if \a n is a high surrogate.
+ */
+inline bool is_high_surrogate( unsigned long n ) {
+ return n >= 0xD800 && n <= 0xDBFF;
+}
+
+/**
+ * Checks whether the given value is a "low surrogate."
+ *
+ * @param n The value to check.
+ * @return Returns \c true only if \a n is a low surrogate.
+ */
+inline bool is_low_surrogate( unsigned long n ) {
+ return n >= 0xDC00 && n <= 0xDFFF;
+}
+
+/**
+ * Checks whether the given code-point is in the "supplementary plane" and
+ * therefore would need a surrogate pair to be encoded in UTF-16.
+ *
+ * @param c The code-point to check.
+ * @return Returns \c true only if \a c is within the supplementary plane.
+ */
+inline bool is_supplementary_plane( code_point c ) {
+ return c >= 0x10000 && c <= 0x10FFFF;
+}
+
///////////////////////////////////////////////////////////////////////////////
} // namespace unicode
Follow ups