zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #16834
[Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba.
Commit message:
Streambuf for validating UTF-8 on-the-fly.
Requested reviews:
Paul J. Lucas (paul-lucas)
For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/feature-utf8_streambuf/+merge/142440
Streambuf for validating UTF-8 on-the-fly.
--
https://code.launchpad.net/~paul-lucas/zorba/feature-utf8_streambuf/+merge/142440
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/unit_tests/CMakeLists.txt'
--- src/unit_tests/CMakeLists.txt 2013-01-05 00:57:49 +0000
+++ src/unit_tests/CMakeLists.txt 2013-01-09 05:11:31 +0000
@@ -23,6 +23,8 @@
test_uri.cpp
test_uuid.cpp
unit_tests.cpp
+ test_uri.cpp
+ test_utf8_streambuf.cpp
)
IF (NOT ZORBA_NO_FULL_TEXT)
=== added file 'src/unit_tests/test_utf8_streambuf.cpp'
--- src/unit_tests/test_utf8_streambuf.cpp 1970-01-01 00:00:00 +0000
+++ src/unit_tests/test_utf8_streambuf.cpp 2013-01-09 05:11:31 +0000
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdafx.h"
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+#include <zorba/zorba_exception.h>
+
+#include "util/utf8_streambuf.h"
+
+using namespace std;
+using namespace zorba;
+
+#define SMILEY_FACE "\xF0\x9F\x98\x8A"
+#define COPYRIGHT_UTF8 "\xC2\xA9"
+#define ONE_THIRD_UTF8 "\xE2\x85\x93"
+
+#define BAD_COPYRIGHT_1_UTF8 "\x42\xA9"
+#define BAD_COPYRIGHT_2_UTF8 "\xC2\x79"
+
+static char const *const tests_good[] = {
+ "Hello, world!",
+ "Copyright " COPYRIGHT_UTF8 " 2012",
+ ONE_THIRD_UTF8 " cup sugar",
+ "Smiley " SMILEY_FACE,
+ "Smiley 2 " SMILEY_FACE SMILEY_FACE,
+ SMILEY_FACE " Smiley",
+ SMILEY_FACE SMILEY_FACE " 2 Smiley",
+ 0
+};
+
+static char const *const tests_bad[] = {
+ "Copyright " BAD_COPYRIGHT_1_UTF8 " 2012",
+ "Copyright " BAD_COPYRIGHT_2_UTF8 " 2012",
+ 0
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+static int failures;
+
+static bool assert_true( int no, char const *expr, int line, bool result ) {
+ if ( !result ) {
+ cout << '#' << no << " FAILED, line " << line << ": " << expr << endl;
+ ++failures;
+ }
+ return result;
+}
+
+static void print_exception( int no, char const *expr, int line,
+ std::exception const &e ) {
+ assert_true( no, expr, line, false );
+ cout << "+ exception: " << e.what() << endl;
+}
+
+#define ASSERT_TRUE( NO, EXPR ) assert_true( NO, #EXPR, __LINE__, !!(EXPR) )
+
+#define ASSERT_TRUE_AND_NO_EXCEPTION( NO, EXPR ) \
+ try { ASSERT_TRUE( NO, EXPR ); } \
+ catch ( exception const &e ) { print_exception( NO, #EXPR, __LINE__, e ); } \
+ catch ( ... ) { assert_true( NO, #EXPR, __LINE__, false ); }
+
+#define ASSERT_EXCEPTION( NO, EXPR ) \
+ try { EXPR; assert_true( NO, #EXPR, __LINE__, false ); } \
+ catch ( ZorbaException const &e ) { } \
+ catch ( ... ) { assert_true( NO, #EXPR, __LINE__, false ); }
+
+///////////////////////////////////////////////////////////////////////////////
+
+static bool test_getline( char const *test ) {
+ istringstream iss( test );
+ utf8::streambuf utf_buf( iss.rdbuf() );
+ iss.ios::rdbuf( &utf_buf );
+ iss.exceptions( ios::badbit );
+
+ char buf[ 1024 ];
+ iss.getline( buf, sizeof buf );
+ if ( iss.gcount() ) {
+ string const s( buf, iss.gcount() );
+ return s == test;
+ }
+ return false;
+}
+
+static bool test_read( char const *test ) {
+ istringstream iss( test );
+ utf8::streambuf utf_buf( iss.rdbuf() );
+ iss.ios::rdbuf( &utf_buf );
+ iss.exceptions( ios::badbit );
+
+ char buf[ 1024 ];
+ iss.read( buf, sizeof buf );
+ if ( iss.gcount() ) {
+ string const s( buf, iss.gcount() );
+ return s == test;
+ }
+ return false;
+}
+
+static bool test_insertion( char const *test ) {
+ ostringstream oss;
+ utf8::streambuf utf_buf( oss.rdbuf(), true );
+ oss.ios::rdbuf( &utf_buf );
+ oss.exceptions( ios::badbit );
+
+ oss << test << flush;
+ string const s( oss.str() );
+ return s == test;
+}
+
+static bool test_put( char const *test ) {
+ ostringstream oss;
+ utf8::streambuf utf_buf( oss.rdbuf(), true );
+ oss.ios::rdbuf( &utf_buf );
+ oss.exceptions( ios::badbit );
+
+ for ( char const *c = test; *c; ++c )
+ oss.put( *c );
+
+ string const s( oss.str() );
+ return s == test;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+namespace zorba {
+namespace UnitTests {
+
+int test_utf8_streambuf( int, char*[] ) {
+ int test_no = 0;
+ for ( char const *const *s = tests_good; *s; ++s, ++test_no ) {
+ ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_getline( *s ) );
+ ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_read( *s ) );
+ ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_insertion( *s ) );
+ ASSERT_TRUE_AND_NO_EXCEPTION( test_no, test_put( *s ) );
+ }
+ for ( char const *const *s = tests_bad; *s; ++s, ++test_no ) {
+ ASSERT_EXCEPTION( test_no, test_getline( *s ) );
+ ASSERT_EXCEPTION( test_no, test_read( *s ) );
+ ASSERT_EXCEPTION( test_no, test_insertion( *s ) );
+ ASSERT_EXCEPTION( test_no, test_put( *s ) );
+ }
+ cout << failures << " test(s) failed\n";
+ return failures ? 1 : 0;
+}
+
+} // namespace UnitTests
+} // namespace zorba
+/* vim:set et sw=2 ts=2: */
=== modified file 'src/unit_tests/unit_test_list.h'
--- src/unit_tests/unit_test_list.h 2012-09-19 21:16:15 +0000
+++ src/unit_tests/unit_test_list.h 2013-01-09 05:11:31 +0000
@@ -56,6 +56,7 @@
int test_unique_ptr( int, char*[] );
#endif /* ZORBA_HAVE_UNIQUE_PTR */
+ int test_utf8_streambuf( int, char*[] );
int test_uuid( int, char*[] );
#ifndef ZORBA_HAVE_UNORDERED_MAP
=== modified file 'src/unit_tests/unit_tests.cpp'
--- src/unit_tests/unit_tests.cpp 2012-11-12 21:17:32 +0000
+++ src/unit_tests/unit_tests.cpp 2013-01-09 05:11:31 +0000
@@ -61,6 +61,7 @@
libunittests["unique_ptr"] = test_unique_ptr;
#endif /* ZORBA_HAVE_UNIQUE_PTR */
+ libunittests["utf8_streambuf"] = test_utf8_streambuf;
libunittests["uuid"] = test_uuid;
#ifndef ZORBA_HAVE_UNORDERED_MAP
=== modified file 'src/util/CMakeLists.txt'
--- src/util/CMakeLists.txt 2012-12-12 01:04:54 +0000
+++ src/util/CMakeLists.txt 2013-01-09 05:11:31 +0000
@@ -29,6 +29,7 @@
unicode_categories.cpp
uri_util.cpp
utf8_util.cpp
+ utf8_streambuf.cpp
xml_util.cpp
fx/fxcharheap.cpp
string/empty_rep_base.cpp
=== modified file 'src/util/icu_streambuf.h'
--- src/util/icu_streambuf.h 2012-12-27 18:50:25 +0000
+++ src/util/icu_streambuf.h 2013-01-09 05:11:31 +0000
@@ -46,12 +46,12 @@
* try {
* os.ios::rdbuf( &xbuf );
* // ...
+ * os.ios::rdbuf( xbuf.original() );
* }
* catch ( ... ) {
* os.ios::rdbuf( xbuf.original() );
* throw;
* }
- * os.ios::rdbuf( xbuf.original() );
* }
* \endcode
*
=== added file 'src/util/utf8_streambuf.cpp'
--- src/util/utf8_streambuf.cpp 1970-01-01 00:00:00 +0000
+++ src/util/utf8_streambuf.cpp 2013-01-09 05:11:31 +0000
@@ -0,0 +1,259 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdafx.h"
+
+//#define ZORBA_DEBUG_UTF8_STREAMBUF
+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
+# include <stdio.h>
+#endif
+
+#include <iomanip>
+#include <stdexcept>
+
+#include <zorba/config.h>
+#include <zorba/diagnostic_list.h>
+
+#include "diagnostics/diagnostic.h"
+#include "diagnostics/zorba_exception.h"
+#include "util/cxx_util.h"
+#include "util/oseparator.h"
+#include "util/string_util.h"
+#include "util/utf8_util.h"
+
+#include "utf8_streambuf.h"
+
+using namespace std;
+
+namespace zorba {
+namespace utf8 {
+
+///////////////////////////////////////////////////////////////////////////////
+
+inline void streambuf::buf_type::clear() {
+ char_len_ = 0;
+}
+
+void streambuf::buf_type::throw_invalid_utf8( storage_type *buf,
+ size_type len ) {
+ ostringstream oss;
+ oss << hex << setfill('0') << setw(2) << uppercase;
+ oseparator comma( ',' );
+
+ for ( size_type i = 0; i < len; ++i )
+ oss << comma << "0x" << (static_cast<unsigned>( buf[i] ) & 0xFF);
+
+ clear();
+ throw ZORBA_EXCEPTION(
+ zerr::ZXQD0006_INVALID_UTF8_BYTE_SEQUENCE,
+ ERROR_PARAMS( oss.str() )
+ );
+}
+
+void streambuf::buf_type::validate( storage_type c, bool bump ) {
+ size_type char_len_copy = char_len_, cur_len_copy = cur_len_;
+
+ if ( !char_len_copy ) {
+ //
+ // This means we're (hopefully) at the first byte of a UTF-8 byte sequence
+ // comprising a character.
+ //
+ if ( !(char_len_copy = char_length( c )) )
+ throw_invalid_utf8( &c, 1 );
+ cur_len_copy = 0;
+ }
+
+ storage_type *const cur_byte_ptr = utf8_char_ + cur_len_copy;
+ storage_type const old_byte = *cur_byte_ptr;
+ *cur_byte_ptr = c;
+
+ if ( cur_len_copy++ && !is_continuation_byte( c ) )
+ throw_invalid_utf8( utf8_char_, cur_len_copy );
+
+ if ( bump ) {
+ char_len_ = (cur_len_copy == char_len_copy ? 0 : char_len_copy);
+ cur_len_ = cur_len_copy;
+ } else {
+ *cur_byte_ptr = old_byte;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+inline void streambuf::clear() {
+ gbuf_.clear();
+ pbuf_.clear();
+}
+
+streambuf::streambuf( std::streambuf *orig, bool validate_put ) :
+ internal::proxy_streambuf( orig ),
+ validate_put_( validate_put )
+{
+ if ( !orig )
+ throw invalid_argument( "null streambuf" );
+ clear();
+}
+
+void streambuf::imbue( std::locale const &loc ) {
+ original()->pubimbue( loc );
+}
+
+void streambuf::resync() {
+ int_type c = original()->sgetc();
+ while ( !traits_type::eq_int_type( c, traits_type::eof() ) ) {
+ if ( is_start_byte( traits_type::to_char_type( c ) ) )
+ break;
+ c = original()->sbumpc();
+ }
+}
+
+streambuf::pos_type streambuf::seekoff( off_type o, ios_base::seekdir d,
+ ios_base::openmode m ) {
+ clear();
+ return original()->pubseekoff( o, d, m );
+}
+
+streambuf::pos_type streambuf::seekpos( pos_type p, ios_base::openmode m ) {
+ clear();
+ return original()->pubseekpos( p, m );
+}
+
+std::streambuf* streambuf::setbuf( char_type *p, streamsize s ) {
+ original()->pubsetbuf( p, s );
+ return this;
+}
+
+streamsize streambuf::showmanyc() {
+ return original()->in_avail();
+}
+
+int streambuf::sync() {
+ return original()->pubsync();
+}
+
+streambuf::int_type streambuf::overflow( int_type c ) {
+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
+ printf( "overflow()\n" );
+#endif
+ if ( traits_type::eq_int_type( c, traits_type::eof() ) )
+ return traits_type::eof();
+ if ( validate_put_ )
+ pbuf_.validate( traits_type::to_char_type( c ), true );
+ original()->sputc( c );
+ return c;
+}
+
+streambuf::int_type streambuf::pbackfail( int_type c ) {
+ if ( !traits_type::eq_int_type( c, traits_type::eof() ) &&
+ gbuf_.cur_len_ &&
+ original()->sputbackc( traits_type::to_char_type( c ) ) ) {
+ --gbuf_.cur_len_;
+ return c;
+ }
+ return traits_type::eof();
+}
+
+streambuf::int_type streambuf::uflow() {
+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
+ printf( "uflow()\n" );
+#endif
+ int_type const c = original()->sbumpc();
+ if ( traits_type::eq_int_type( c, traits_type::eof() ) )
+ return traits_type::eof();
+ gbuf_.validate( traits_type::to_char_type( c ) );
+ return c;
+}
+
+streambuf::int_type streambuf::underflow() {
+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
+ printf( "underflow()\n" );
+#endif
+ int_type const c = original()->sgetc();
+ if ( traits_type::eq_int_type( c, traits_type::eof() ) )
+ return traits_type::eof();
+ gbuf_.validate( traits_type::to_char_type( c ), false );
+ return c;
+}
+
+streamsize streambuf::xsgetn( char_type *to, streamsize size ) {
+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
+ printf( "xsgetn()\n" );
+#endif
+ streamsize return_size = 0;
+
+ if ( gbuf_.char_len_ ) {
+ streamsize const want = gbuf_.char_len_ - gbuf_.cur_len_;
+ streamsize const get = min( want, size );
+ streamsize const got = original()->sgetn( to, get );
+ for ( streamsize i = 0; i < got; ++i )
+ gbuf_.validate( to[i] );
+ to += got;
+ size -= got, return_size += got;
+ }
+
+ while ( size > 0 ) {
+ if ( streamsize const got = original()->sgetn( to, size ) ) {
+ for ( streamsize i = 0; i < got; ++i )
+ gbuf_.validate( to[i] );
+ to += got;
+ size -= got, return_size += got;
+ } else
+ break;
+ }
+ return return_size;
+}
+
+streamsize streambuf::xsputn( char_type const *from, streamsize size ) {
+#ifdef ZORBA_DEBUG_UTF8_STREAMBUF
+ printf( "xsputn()\n" );
+#endif
+ if ( validate_put_ )
+ for ( streamsize i = 0; i < size; ++i )
+ pbuf_.validate( from[i] );
+ return original()->sputn( from, size );
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+// Both new & delete are done inside Zorba rather than in the header to
+// guarantee that they're cross-DLL-boundary safe on Windows.
+
+std::streambuf* alloc_streambuf( std::streambuf *orig ) {
+ return new utf8::streambuf( orig );
+}
+
+int get_streambuf_index() {
+ //
+ // This function is out-of-line because it has a static constant within it.
+ // It has a static constant within it to guarantee (1) initialization before
+ // use and (2) initialization happens exactly once.
+ //
+ // See: "Standard C++ IOStreams and Locales: Advanced Programmer's Guide and
+ // Reference," Angelika Langer and Klaus Kreft, Addison-Wesley, 2000, section
+ // 3.3.1.1: "Initializing and Maintaining the iword/pword Index."
+ //
+ // See: "The C++ Programming Language," Bjarne Stroustrup, Addison-Wesley,
+ // 2000, section 10.4.8: "Local Static Store."
+ //
+ static int const index = ios_base::xalloc();
+ return index;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace utf8
+} // namespace zorba
+/* vim:set et sw=2 ts=2: */
=== added file 'src/util/utf8_streambuf.h'
--- src/util/utf8_streambuf.h 1970-01-01 00:00:00 +0000
+++ src/util/utf8_streambuf.h 2013-01-09 05:11:31 +0000
@@ -0,0 +1,322 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ZORBA_UTF8_STREAMBUF_H
+#define ZORBA_UTF8_STREAMBUF_H
+
+#include <zorba/internal/streambuf.h>
+
+#include "util/utf8_util.h"
+
+namespace zorba {
+namespace utf8 {
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * A %utf8::streambuf is-a std::streambuf for validating UTF-8 on-the-fly.
+ * To use it, replace a stream's streambuf:
+ * \code
+ * istream is;
+ * // ...
+ * utf8::streambuf xbuf( is.rdbuf() );
+ * is.ios::rdbuf( &xbuf );
+ * \endcode
+ * Note that the %utf8::streambuf must exist for as long as it's being used by
+ * the stream. If you are replacing the streambuf for a stream you did not
+ * create, you should set it back to the original streambuf:
+ * \code
+ * void f( ostream &os ) {
+ * utf8::streambuf xbuf( os.rdbuf() );
+ * try {
+ * os.ios::rdbuf( &xbuf );
+ * // ...
+ * os.ios::rdbuf( xbuf.original() );
+ * }
+ * catch ( ... ) {
+ * os.ios::rdbuf( xbuf.original() );
+ * throw;
+ * }
+ * }
+ * \endcode
+ *
+ * If an invalid UTF-8 byte sequence is read, then the stream's \c badbit is
+ * set. Hence using a %utf8::streambuf requires rigorous error-checking.
+ *
+ * However, if exceptions are enabled for the stream, then
+ * \c ZXQD0006_INVALID_UTF8_BYTE_SEQUENCE is thrown. (When enabling exceptions
+ * for a stream you didn't create, you should set the exception mask back to
+ * the original mask.)
+ * \code
+ * istream is;
+ * std::ios::iostate const orig_exceptions = is.exceptions();
+ * try {
+ * is.exceptions( orig_exceptions | ios::badbit );
+ * // ...
+ * is.exceptions( orig_exceptions );
+ * }
+ * catch ( ... ) {
+ * is.exceptions( orig_exceptions );
+ * throw;
+ * }
+ * \endcode
+ *
+ * While %utf8::streambuf does support seeking, the positions must always be on
+ * the first byte of a UTF-8 character.
+ */
+class streambuf : public internal::proxy_streambuf {
+public:
+ /**
+ * Constructs a %streambuf.
+ *
+ * @param orig The original streambuf to read/write from/to.
+ * @param validate_put If \c true, characters written are validated;
+ * if \c false, characters are written without validation, i.e., it's assumed
+ * that you're writing valid UTF-8.
+ * @throws std::invalid_argument if \a orig is \c null.
+ */
+ streambuf( std::streambuf *orig, bool validate_put = false );
+
+ /**
+ * If an invalid UTF-8 byte sequence was read, resynchronizes by skipping
+ * bytes until a new UTF-8 start byte is encountered.
+ */
+ void resync();
+
+protected:
+ void imbue( std::locale const& );
+ pos_type seekoff( off_type, std::ios_base::seekdir, std::ios_base::openmode );
+ pos_type seekpos( pos_type, std::ios_base::openmode );
+ std::streambuf* setbuf( char_type*, std::streamsize );
+ std::streamsize showmanyc();
+ int sync();
+ int_type overflow( int_type );
+ int_type pbackfail( int_type );
+ int_type uflow();
+ int_type underflow();
+ std::streamsize xsgetn( char_type*, std::streamsize );
+ std::streamsize xsputn( char_type const*, std::streamsize );
+
+private:
+ struct buf_type {
+ encoded_char_type utf8_char_;
+ size_type char_len_;
+ size_type cur_len_;
+
+ void clear();
+ void throw_invalid_utf8( storage_type *buf, size_type len );
+ void validate( storage_type, bool bump = true );
+ };
+
+ buf_type gbuf_, pbuf_;
+ bool const validate_put_;
+
+ void clear();
+
+ // forbid
+ streambuf( streambuf const& );
+ streambuf& operator=( streambuf const& );
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+std::streambuf* alloc_streambuf( std::streambuf *orig );
+
+int get_streambuf_index();
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Attaches a utf8::streambuf to a stream. Unlike using a
+ * utf8::streambuf directly, this function will create the streambuf,
+ * attach it to the stream, and manage it for the lifetime of the stream
+ * automatically.
+ *
+ * @param ios The stream to attach the utf8::streambuf to. If the stream
+ * already has a utf8::streambuf attached to it, this function does
+ * nothing.
+ */
+template<typename charT,typename Traits> inline
+void attach( std::basic_ios<charT,Traits> &ios ) {
+ int const index = get_streambuf_index();
+ void *&pword = ios.pword( index );
+ if ( !pword ) {
+ std::streambuf *const buf = alloc_streambuf( ios.rdbuf() );
+ ios.rdbuf( buf );
+ pword = buf;
+ ios.register_callback( internal::stream_callback, index );
+ }
+}
+
+/**
+ * Detaches a previously attached utf8::streambuf from a stream. The streambuf
+ * is destroyed and the stream's original streambuf is restored.
+ *
+ * @param ios The stream to detach the utf8::streambuf from. If the stream
+ * doesn't have a utf8::streambuf attached to it, this function does nothing.
+ */
+template<typename charT,typename Traits> inline
+void detach( std::basic_ios<charT,Traits> &ios ) {
+ int const index = get_streambuf_index();
+ if ( streambuf *const buf = static_cast<streambuf*>( ios.pword( index ) ) ) {
+ ios.pword( index ) = 0;
+ ios.rdbuf( buf->original() );
+ internal::dealloc_streambuf( buf );
+ }
+}
+
+/**
+ * Checks whether the given stream has a utf8::streambuf attached.
+ *
+ * @param ios The stream to check.
+ * @return \c true only if a utf8::streambuf is attached.
+ */
+template<typename charT,typename Traits> inline
+bool is_attached( std::basic_ios<charT,Traits> &ios ) {
+ return !!ios.pword( get_streambuf_index() );
+}
+
+/**
+ * A %utf8::auto_attach is a class that attaches a utf8::streambuf to a stream
+ * and automatically detaches it when the %auto_attach object is destroyed.
+ * \code
+ * void f( ostream &os ) {
+ * utf8::auto_attach<ostream> const raii( os, "ISO-8859-1" );
+ * // ...
+ * }
+ * \endcode
+ * A %utf8::auto_attach is useful for streams not created by you.
+ *
+ * @see http://en.wikipedia.org/wiki/Resource_Acquisition_Is_Initialization
+ */
+template<class StreamType>
+class auto_attach {
+public:
+ /**
+ * Constructs an %auto_attach object calling attach() on the given stream.
+ *
+ * @param stream The stream to attach the utf8::streambuf to. If the stream
+ * already has a utf8::streambuf attached to it, this contructor does
+ * nothing.
+ */
+ auto_attach( StreamType &stream ) : stream_( stream ) {
+ attach( stream );
+ }
+
+ /**
+ * Destroys this %auto_attach object calling detach() on the previously
+ * attached stream.
+ */
+ ~auto_attach() {
+ detach( stream_ );
+ }
+
+private:
+ StreamType &stream_;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * A %utf8::stream is used to wrap a C++ standard I/O stream with a
+ * utf8::streambuf so that encoding/decoding and the management of the
+ * streambuf happens automatically.
+ *
+ * A %utf8::stream is useful for streams created by you.
+ *
+ * @tparam StreamType The I/O stream class type to wrap. It must be a concrete
+ * stream class.
+ */
+template<class StreamType>
+class stream : public StreamType {
+public:
+ /**
+ * Constructs a %utf8::stream.
+ */
+ stream() :
+#ifdef WIN32
+# pragma warning( push )
+# pragma warning( disable : 4355 )
+#endif /* WIN32 */
+ utf8_buf_( this->rdbuf() )
+#ifdef WIN32
+# pragma warning( pop )
+#endif /* WIN32 */
+ {
+ init();
+ }
+
+ /**
+ * Constructs a %stream.
+ *
+ * @tparam StreamArgType The type of the first argument of \a StreamType's
+ * constructor.
+ * @param stream_arg The argument to pass as the first argument to
+ * \a StreamType's constructor.
+ */
+ template<typename StreamArgType>
+ stream( StreamArgType stream_arg ) :
+ StreamType( stream_arg ),
+#ifdef WIN32
+# pragma warning( push )
+# pragma warning( disable : 4355 )
+#endif /* WIN32 */
+ utf8_buf_( this->rdbuf() )
+#ifdef WIN32
+# pragma warning( pop )
+#endif /* WIN32 */
+ {
+ init();
+ }
+
+ /**
+ * Constructs a %utf8::stream.
+ *
+ * @tparam StreamArgType The type of the first argument of \a StreamType's
+ * constructor.
+ * @param stream_arg The argument to pass as the first argument to
+ * \a StreamType's constructor.
+ * @param mode The open-mode to pass to \a StreamType's constructor.
+ */
+ template<typename StreamArgType>
+ stream( StreamArgType stream_arg, std::ios_base::openmode mode ) :
+ StreamType( stream_arg, mode ),
+#ifdef WIN32
+# pragma warning( push )
+# pragma warning( disable : 4355 )
+#endif /* WIN32 */
+ utf8_buf_( this->rdbuf() )
+#ifdef WIN32
+# pragma warning( pop )
+#endif /* WIN32 */
+ {
+ init();
+ }
+
+private:
+ streambuf utf8_buf_;
+
+ void init() {
+ this->std::ios::rdbuf( &utf8_buf_ );
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace utf8
+} // namespace zorba
+#endif /* ZORBA_UTF8_STREAMBUF_H */
+/* vim:set et sw=2 ts=2: */
=== modified file 'test/unit/CMakeLists.txt'
--- test/unit/CMakeLists.txt 2012-09-19 21:16:15 +0000
+++ test/unit/CMakeLists.txt 2013-01-09 05:11:31 +0000
@@ -153,5 +153,6 @@
ZORBA_ADD_TEST("test/libunit/thesaurus" LibUnitTest thesaurus)
ZORBA_ADD_TEST("test/libunit/tokenizer" LibUnitTest tokenizer)
ENDIF (NOT ZORBA_NO_FULL_TEXT)
+ZORBA_ADD_TEST("test/libunit/utf8_streambuf" LibUnitTest utf8_streambuf)
# vim:set et sw=2 ts=2:
Follow ups
-
[Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: noreply, 2013-01-26
-
[Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: Zorba Build Bot, 2013-01-26
-
[Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: Zorba Build Bot, 2013-01-26
-
[Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: Paul J. Lucas, 2013-01-26
-
[Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: Zorba Build Bot, 2013-01-25
-
Re: [Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: Zorba Build Bot, 2013-01-25
-
[Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: Matthias Brantner, 2013-01-25
-
Re: [Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: Matthias Brantner, 2013-01-25
-
Re: [Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: Sorin Marian Nasoi, 2013-01-24
-
Re: [Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: Juan Zacarias, 2013-01-24
-
Re: [Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: Paul J. Lucas, 2013-01-09
-
Re: [Merge] lp:~paul-lucas/zorba/feature-utf8_streambuf into lp:zorba
From: Paul J. Lucas, 2013-01-09