zorba-coders team mailing list archive

Thread
Date

[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba

To: mp+209760@xxxxxxxxxxxxxxxxxx
From: "Paul J. Lucas" <paul@xxxxxxxxxxxxx>
Date: Thu, 06 Mar 2014 19:09:21 -0000
Reply-to: mp+209760@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/pjl-misc into lp:zorba.

Requested reviews:
  Paul J. Lucas (paul-lucas)

For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/pjl-misc/+merge/209760
-- 
https://code.launchpad.net/~paul-lucas/zorba/pjl-misc/+merge/209760
Your team Zorba Coders is subscribed to branch lp:zorba.

=== modified file 'src/runtime/csv/csv_impl.cpp'
--- src/runtime/csv/csv_impl.cpp	2014-03-01 01:57:00 +0000
+++ src/runtime/csv/csv_impl.cpp	2014-03-06 19:08:12 +0000
@@ -365,7 +365,7 @@
   store::Item_t item;
   vector<store::Item_t> keys_copy, values;
   set<unsigned> keys_omit;
-  zstring value;
+  zstring *value;
   bool eol, quoted, swap_keys = false;
 
   CsvParseIteratorState *state;
@@ -381,7 +381,8 @@
     set_options( item, state );
   }
 
-  while ( state->csv_.next_value( &value, &eol, &quoted ) ) {
+  while ( state->csv_.next_value( &state->value_, &eol, &quoted ) ) {
+    value = &state->value_;
     if ( state->keys_.size() && values.size() == state->keys_.size() &&
          state->extra_name_.empty() ) {
       //
@@ -390,13 +391,13 @@
       //
       throw XQUERY_EXCEPTION(
         csv::EXTRA_VALUE,
-        ERROR_PARAMS( value, state->line_no_ ),
+        ERROR_PARAMS( *value, state->line_no_ ),
         ERROR_LOC( loc )
       );
     }
 
     item = nullptr;
-    if ( value.empty() ) {
+    if ( value->empty() ) {
       if ( state->keys_.empty() ) {
         //
         // Header field names can never be empty.
@@ -408,7 +409,7 @@
         );
       }
       if ( quoted )
-        GENV_ITEMFACTORY->createString( item, value );
+        GENV_ITEMFACTORY->createString( item, *value );
       else
         switch ( state->missing_ ) {
           case missing::error:
@@ -421,15 +422,15 @@
             break;
         }
     } else if ( state->cast_unquoted_ && !quoted && !state->keys_.empty() ) {
-      if ( value == "T" || value == "Y" )
+      if ( *value == "T" || *value == "Y" )
         GENV_ITEMFACTORY->createBoolean( item, true );
-      else if ( value == "F" || value == "N" )
+      else if ( *value == "F" || *value == "N" )
         GENV_ITEMFACTORY->createBoolean( item, false );
       else {
         json::token t;
-        switch ( parse_json( value, &t ) ) {
+        switch ( parse_json( *value, &t ) ) {
           case json::boolean:
-            GENV_ITEMFACTORY->createBoolean( item, value[0] == 't' );
+            GENV_ITEMFACTORY->createBoolean( item, (*value)[0] == 't' );
             break;
           case json::null:
             GENV_ITEMFACTORY->createJSONNull( item );
@@ -437,24 +438,24 @@
           case json::number:
             switch ( t.get_numeric_type() ) {
               case json::token::integer:
-                GENV_ITEMFACTORY->createInteger( item, xs_integer( value ) );
+                GENV_ITEMFACTORY->createInteger( item, xs_integer( *value ) );
                 break;
               case json::token::decimal:
-                GENV_ITEMFACTORY->createDecimal( item, xs_decimal( value ) );
+                GENV_ITEMFACTORY->createDecimal( item, xs_decimal( *value ) );
                 break;
               case json::token::floating_point:
-                GENV_ITEMFACTORY->createDouble( item, xs_double( value ) );
+                GENV_ITEMFACTORY->createDouble( item, xs_double( *value ) );
                 break;
               default:
                 ZORBA_ASSERT( false );
             }
             break;
           default:
-            GENV_ITEMFACTORY->createString( item, value );
+            GENV_ITEMFACTORY->createString( item, *value );
         } // switch
       } // else
     } else {
-      GENV_ITEMFACTORY->createString( item, value );
+      GENV_ITEMFACTORY->createString( item, *value );
     }
 
     if ( !item.isNull() )

=== modified file 'src/runtime/csv/pregenerated/csv.h'
--- src/runtime/csv/pregenerated/csv.h	2014-01-31 21:47:54 +0000
+++ src/runtime/csv/pregenerated/csv.h	2014-03-06 19:08:12 +0000
@@ -57,6 +57,7 @@
   missing::type missing_; //
   bool skip_called_; //
   zstring string_; //
+  zstring value_; //
 
   CsvParseIteratorState();
 

=== modified file 'src/runtime/spec/csv/csv.xml'
--- src/runtime/spec/csv/csv.xml	2014-02-28 01:46:12 +0000
+++ src/runtime/spec/csv/csv.xml	2014-03-06 19:08:12 +0000
@@ -39,6 +39,7 @@
     <zorba:member type="missing::type" name="missing_" defaultValue="missing::null"/>
     <zorba:member type="bool" name="skip_called_" defaultValue="false"/>
     <zorba:member type="zstring" name="string_"/>
+    <zorba:member type="zstring" name="value_"/>
   </zorba:state>
   <zorba:method name="count" const="true" return="bool">
     <zorba:param name="result" type="store::Item_t&amp;"/>

=== modified file 'src/store/naive/simple_item_factory.cpp'
--- src/store/naive/simple_item_factory.cpp	2013-09-17 21:12:49 +0000
+++ src/store/naive/simple_item_factory.cpp	2014-03-06 19:08:12 +0000
@@ -2366,19 +2366,20 @@
     const std::vector<store::Item_t>& names,
     const std::vector<store::Item_t>& values)
 {
+  assert( names.size() == values.size() );
+
   result = new json::SimpleJSONObject();
-
-  json::JSONObject* obj = static_cast<json::JSONObject*>(result.getp());
-
-  assert(names.size() == values.size());
-
-  csize numPairs = names.size();
-  for (csize i = 0; i < numPairs; ++i)
-  {
-    if (!obj->add(names[i], values[i], false))
-    {
-      RAISE_ERROR_NO_LOC(jerr::JNDY0003, ERROR_PARAMS(names[i]->getStringValue()));
-    }
+  json::JSONObject *const obj = static_cast<json::JSONObject*>( result.getp() );
+
+  std::vector<store::Item_t>::const_iterator n_i( names.begin() );
+  std::vector<store::Item_t>::const_iterator v_i( values.begin() );
+  std::vector<store::Item_t>::const_iterator const n_end( names.end() );
+
+  for ( ; n_i != n_end; ++n_i, ++v_i ) {
+    if ( !obj->add( *n_i, *v_i, false ) )
+      throw XQUERY_EXCEPTION(
+        jerr::JNDY0003, ERROR_PARAMS( (*n_i)->getStringValue() )
+      );
   }
 
   return true;

=== modified file 'src/util/csv_parser.cpp'
--- src/util/csv_parser.cpp	2013-09-23 15:02:13 +0000
+++ src/util/csv_parser.cpp	2014-03-06 19:08:12 +0000
@@ -14,6 +14,10 @@
  * limitations under the License.
  */
 
+// Zorba
+#include "util/string_util.h"
+
+// local
 #include "csv_parser.h"
 
 namespace zorba {
@@ -21,11 +25,13 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 bool csv_parser::next_value( zstring *value, bool *eol, bool *quoted ) const {
-  value->clear();
+  ztd::string_appender<zstring,128> appender( value );
   char c;
   bool in_quote = false;
   bool is_quoted = false;
 
+  value->clear();
+
   while ( is_->get( c ) ) {
     if ( in_quote ) {
       if ( quote_esc_ == quote_ ) {     // ""
@@ -63,9 +69,10 @@
           goto return_true;
       } // switch
     } // else
-    *value += c;
+    appender += c;
   } // while
 
+  appender.flush();
   if ( value->empty() )
     return false;
 

=== modified file 'src/util/json_parser.cpp'
--- src/util/json_parser.cpp	2013-08-29 01:20:43 +0000
+++ src/util/json_parser.cpp	2014-03-06 19:08:12 +0000
@@ -253,8 +253,9 @@
         //
         location::line_type const quote_line = cur_loc_.line();
         location::column_type const quote_col = cur_loc_.column();
-        if ( !parse_string( &t->value_, throw_exceptions ) )
+        if ( !parse_string( throw_exceptions ) )
           return false;
+        t->value_ = value_;
         t->type_ = token::string;
         t->loc_.set(
           cur_loc_.file(), quote_line, quote_col, prev_line_, prev_col_
@@ -273,8 +274,9 @@
       case '8':
       case '9': {
         token::numeric_type nt;
-        if ( !(nt = parse_number( c, &t->value_, throw_exceptions )) )
+        if ( !(nt = parse_number( c, throw_exceptions )) )
           return false;
+        t->value_ = value_;
         t->numeric_type_ = nt;
         t->type_ = token::number;
         set_loc_range( &t->loc_ );
@@ -284,8 +286,9 @@
       case 'n':   // null
       case 't': { // true
         token::type tt;
-        if ( !(tt = parse_literal( c, &t->value_, throw_exceptions )) )
+        if ( !(tt = parse_literal( c, throw_exceptions )) )
           return false;
+        t->value_ = value_;
         t->type_ = tt;
         set_loc_range( &t->loc_ );
         return true;
@@ -372,22 +375,21 @@
   return false;
 }
 
-token::type lexer::parse_literal( char first_c, token::value_type *value,
-                                  bool throw_exceptions ) {
+token::type lexer::parse_literal( char first_c, bool throw_exceptions ) {
   static token::value_type const false_value( "false" );
   static token::value_type const null_value ( "null"  );
   static token::value_type const true_value ( "true"  );
 
   token::type tt = token::none;
   switch ( first_c ) {
-    case 'f': *value = false_value; tt = token::json_false; break;
-    case 'n': *value = null_value ; tt = token::json_null ; break;
-    case 't': *value = true_value ; tt = token::json_true ; break;
+    case 'f': value_ = false_value; tt = token::json_false; break;
+    case 'n': value_ = null_value ; tt = token::json_null ; break;
+    case 't': value_ = true_value ; tt = token::json_true ; break;
     default : assert( false );
   }
 
   char c;
-  for ( char const *s = value->c_str(); *++s; ) {
+  for ( char const *s = value_.c_str(); *++s; ) {
     if ( !get_char( &c ) )
       goto error_set_cur_loc_end_false;
     if ( c != *s )
@@ -409,17 +411,16 @@
   return token::none;
 }
 
-token::numeric_type lexer::parse_number( char first_c,
-                                         token::value_type *value,
-                                         bool throw_exceptions ) {
+token::numeric_type lexer::parse_number( char first_c, bool throw_exceptions ) {
   token::numeric_type numeric_type;
+  ztd::string_appender<token::value_type,64> value( &value_ );
 
-  value->clear();
+  value_.clear();
 
   // <number> ::= [-] <int> [<frac>] [<exp>]
   char c = first_c;
   if ( c == '-' ) {
-    *value += c;
+    value += c;
     if ( !get_char( &c ) )
       goto error_set_cur_loc_end_false;
   }
@@ -427,7 +428,7 @@
   // <int> := '0' | <1-9> <digit>*
   if ( !ascii::is_digit( c ) )
     goto error_set_cur_loc_end;
-  *value += c;
+  value += c;
   numeric_type = token::integer;
   if ( c == '0' ) {
     if ( !peek_char( &c ) )
@@ -443,19 +444,19 @@
       if ( !ascii::is_digit( c ) )
         break;
       get_char( &c );
-      *value += c;
+      value += c;
     }
   }
 
   // <frac> ::= '.' <digit>+
   if ( c == '.' ) {
     get_char( &c );
-    *value += c;
+    value += c;
     if ( !get_char( &c ) )
       goto error_set_cur_loc_end_false;
     if ( !ascii::is_digit( c ) )
       goto error_set_cur_loc_end;
-    *value += c;
+    value += c;
     numeric_type = token::decimal;
     while ( true ) {
       if ( !peek_char( &c ) )
@@ -465,7 +466,7 @@
       if ( !ascii::is_digit( c ) )
         break;
       get_char( &c );
-      *value += c;
+      value += c;
     }
   }
 
@@ -474,17 +475,17 @@
   // <sign> ::= '-' | '+'
   if ( c == 'e' || c == 'E' ) {
     get_char( &c );
-    *value += c;
+    value += c;
     if ( !get_char( &c ) )
       goto error_set_cur_loc_end_false;
     if ( c == '+' || c == '-' ) {
-      *value += c;
+      value += c;
       if ( !get_char( &c ) )
         goto error_set_cur_loc_end_false;
     }
     if ( !ascii::is_digit( c ) )
       goto error_set_cur_loc_end;
-    *value += c;
+    value += c;
     numeric_type = token::floating_point;
     while ( true ) {
       if ( !peek_char( &c ) )
@@ -494,7 +495,7 @@
       if ( !ascii::is_digit( c ) )
         break;
       get_char( &c );
-      *value += c;
+      value += c;
     }
   }
 
@@ -512,10 +513,12 @@
   return token::non_numeric;
 }
 
-bool lexer::parse_string( token::value_type *value, bool throw_exceptions ) {
-  value->clear();
+bool lexer::parse_string( bool throw_exceptions ) {
   bool got_backslash = false;
   location start_loc( cur_loc_ );
+  ztd::string_appender<token::value_type,1024> value( &value_ );
+
+  value_.clear();
 
   while ( true ) {
     //
@@ -537,28 +540,29 @@
         case '"':
         case '/':
         case '\\':
-          *value += c;
+          value += c;
           break;
         case 'b':
-          *value += '\b';
+          value += '\b';
           break;
         case 'f':
-          *value += '\f';
+          value += '\f';
           break;
         case 'n':
-          *value += '\n';
+          value += '\n';
           break;
         case 'r':
-          *value += '\r';
+          value += '\r';
           break;
         case 't':
-          *value += '\t';
+          value += '\t';
           break;
         case 'u': {
           unicode::code_point cp;
           if ( !parse_codepoint( &cp, throw_exceptions ) )
             return false;
-          utf8::encode( cp, value );
+          value.flush();
+          utf8::encode( cp, &value_ );
           break;
         }
         default:
@@ -576,7 +580,7 @@
       case '"':
         return true;
       default:
-        *value += c;
+        value += c;
     }
   } // while
 }

=== modified file 'src/util/json_parser.h'
--- src/util/json_parser.h	2013-08-29 01:20:43 +0000
+++ src/util/json_parser.h	2014-03-06 19:08:12 +0000
@@ -504,10 +504,9 @@
   bool get_char( char* );
   bool peek_char( char* );
   bool parse_codepoint( unicode::code_point *cp, bool throw_exceptions );
-  token::type parse_literal( char, token::value_type*, bool throw_exceptions );
-  token::numeric_type parse_number( char, token::value_type*,
-                                    bool throw_exceptions );
-  bool parse_string( token::value_type*, bool throw_exceptions );
+  token::type parse_literal( char, bool throw_exceptions );
+  token::numeric_type parse_number( char, bool throw_exceptions );
+  bool parse_string( bool throw_exceptions );
   void set_cur_loc();
   location& set_cur_loc_end( bool prev = true );
   void set_loc_range( location* );
@@ -518,6 +517,7 @@
   column_type col_, prev_col_;
   location cur_loc_;
   bool throw_exceptions_;
+  token::value_type value_;
 };
 
 ///////////////////////////////////////////////////////////////////////////////

=== modified file 'src/util/string_util.h'
--- src/util/string_util.h	2013-12-04 14:28:07 +0000
+++ src/util/string_util.h	2014-03-06 19:08:12 +0000
@@ -50,7 +50,81 @@
 
 using internal::ztd::c_str;
 
-////////// String building /////////////////////////////////////////////////////
+////////// String appending ///////////////////////////////////////////////////
+
+/**
+ * A %string_appender is used to optimize repeatedly appending characters to a
+ * string in a loop by gathering \em N characters and appending them in chunks
+ * so as to call the string's \c append() function less.
+ */
+template<class StringType,int BufCapacity>
+class string_appender {
+public:
+  typedef StringType value_type;
+  typedef typename value_type::value_type char_type;
+  typedef typename value_type::size_type size_type;
+
+  /**
+   * Constructs an appender.
+   *
+   * @param s A pointer to the string to append to.
+   */
+  string_appender( value_type *s ) : s_( s ), buf_size_( 0 ) { }
+
+  /**
+   * Destroys the appender and appends any unappended characters to the string.
+   */
+  ~string_appender() {
+    flush();
+  }
+
+  /**
+   * Appends a character.
+   *
+   * @param c The character to append.
+   * @return Returns \c *this.
+   */
+  string_appender& append( char_type c ) {
+    buf_[ buf_size_++ ] = c;
+    if ( buf_size_ == BufCapacity )
+      flush();
+    return *this;
+  }
+
+  /**
+   * Appends any unappended characters to the string.
+   */
+  void flush() {
+    s_->append( buf_, buf_size_ );
+    buf_size_ = 0;
+  }
+
+  /**
+   * Gets the string that is being appended to.
+   *
+   * @return Returns said string.
+   */
+  value_type& str() const {
+    return *s_;
+  }
+
+  /**
+   * Appends a character.
+   *
+   * @param c The character to append.
+   * @return Returns \c *this.
+   */
+  string_appender& operator+=( char_type c ) {
+    return append( c );
+  }
+
+private:
+  char_type buf_[ BufCapacity ];
+  size_type buf_size_;
+  value_type *s_;
+};
+
+////////// String building ////////////////////////////////////////////////////
 
 /**
  * A %string_builder is used to build (concatenate) strings on-the-fly and pass

Follow ups

[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: noreply, 2014-03-06
[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: Zorba Build Bot, 2014-03-06
[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: Zorba Build Bot, 2014-03-06
Re: [Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: Matthias Brantner, 2014-03-06
[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: Zorba Build Bot, 2014-03-06
[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: Zorba Build Bot, 2014-03-06
[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: Zorba Build Bot, 2014-03-06
[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: Paul J. Lucas, 2014-03-06
Re: [Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: Paul J. Lucas, 2014-03-06
[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: Paul J. Lucas, 2014-03-06
[Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: Paul J. Lucas, 2014-03-06
Re: [Merge] lp:~paul-lucas/zorba/pjl-misc into lp:zorba
From: Paul J. Lucas, 2014-03-06