← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~paul-lucas/zorba/feature-unordered_map into lp:zorba

 

Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/feature-unordered_map into lp:zorba.

Requested reviews:
  Paul J. Lucas (paul-lucas)

For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/feature-unordered_map/+merge/117498

Added pool class and replaced StringPool.
-- 
https://code.launchpad.net/~paul-lucas/zorba/feature-unordered_map/+merge/117498
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/store/naive/loader_dtd.cpp'
--- src/store/naive/loader_dtd.cpp	2012-07-24 08:48:48 +0000
+++ src/store/naive/loader_dtd.cpp	2012-07-31 17:29:20 +0000
@@ -1305,11 +1305,9 @@
       if (prefix == NULL)
         prefix = "";
 
-      zstring pooledNs;
-      store.getNamespacePool().insertc(nsuri, pooledNs);
 
       bindings[i].first = prefix;
-      bindings[i].second = pooledNs;
+      bindings[i].second = *store.getNamespacePool().insert(nsuri).first;
 
       LOADER_TRACE1("namespace decl: [" << prefix  << ":" << nsuri << "]");
       i++;

=== modified file 'src/store/naive/loader_fast.cpp'
--- src/store/naive/loader_fast.cpp	2012-07-24 08:48:48 +0000
+++ src/store/naive/loader_fast.cpp	2012-07-31 17:29:20 +0000
@@ -691,11 +691,8 @@
         if (prefix == NULL)
           prefix = "";
 
-        zstring pooledNs;
-        store.getNamespacePool().insertc(nsuri, pooledNs);
-
         bindings[i].first = prefix;
-        bindings[i].second = pooledNs;
+        bindings[i].second = *store.getNamespacePool().insert(nsuri).first;
 
         LOADER_TRACE1("namespace decl: [" << prefix  << ":" << nsuri << "]");
       }

=== modified file 'src/store/naive/qname_pool.cpp'
--- src/store/naive/qname_pool.cpp	2012-07-24 08:48:48 +0000
+++ src/store/naive/qname_pool.cpp	2012-07-31 17:29:20 +0000
@@ -157,8 +157,7 @@
   if (pre == NULL) pre = "";
   ZORBA_FATAL(ln != NULL && *ln != '\0', "");
 
-  zstring pooledNs;
-  theNamespacePool->insertc(ns, pooledNs);
+  zstring const pooledNs(*theNamespacePool->insert(ns).first);
 
   ulong hval = hashfun::h32(pre, hashfun::h32(ln, hashfun::h32(ns)));
 
@@ -247,8 +246,7 @@
 
   bool normalized = pre.empty();
 
-  zstring pooledNs;
-  theNamespacePool->insert(ns, pooledNs);
+  zstring const pooledNs(*theNamespacePool->insert(ns).first);
 
   ulong hval = hashfun::h32(pre.c_str(),
                             hashfun::h32(ln.c_str(),

=== modified file 'src/store/naive/qname_pool.h'
--- src/store/naive/qname_pool.h	2012-07-24 08:48:48 +0000
+++ src/store/naive/qname_pool.h	2012-07-31 17:29:20 +0000
@@ -25,11 +25,10 @@
 #include "common/common.h"
 
 #include "atomic_items.h"
+#include "string_pool.h"
 
 namespace zorba { namespace simplestore {
 
-class StringPool;
-
 /*******************************************************************************
 
   theCache       : An array of QName slots that is managed as a cache. This

=== modified file 'src/store/naive/simple_item_factory.cpp'
--- src/store/naive/simple_item_factory.cpp	2012-07-24 08:48:48 +0000
+++ src/store/naive/simple_item_factory.cpp	2012-07-31 17:29:20 +0000
@@ -112,9 +112,7 @@
 
 bool BasicItemFactory::createAnyURI(store::Item_t& result, const char* value)
 {
-  zstring str;
-  theUriPool->insertc(value, str);
-  result = new AnyUriItem(str);
+  result = new AnyUriItem(*theUriPool->insert(value).first);
   return true;
 }
 

=== modified file 'src/store/naive/simple_item_factory.h'
--- src/store/naive/simple_item_factory.h	2012-07-24 08:48:48 +0000
+++ src/store/naive/simple_item_factory.h	2012-07-31 17:29:20 +0000
@@ -26,6 +26,7 @@
 #include "zorbatypes/schema_types.h"
 
 #include "tree_id.h"
+#include "string_pool.h"
 
 namespace zorba
 {
@@ -39,7 +40,6 @@
 namespace simplestore
 {
 
-class StringPool;
 typedef StringPool UriPool;
 class QNamePool;
 class OrdPath;

=== modified file 'src/store/naive/store.cpp'
--- src/store/naive/store.cpp	2012-07-24 08:48:48 +0000
+++ src/store/naive/store.cpp	2012-07-31 17:29:20 +0000
@@ -78,7 +78,7 @@
 /*******************************************************************************
   Store static data
 ********************************************************************************/
-const ulong Store::NAMESPACE_POOL_SIZE = 128;
+const ulong Store::NAMESPACE_POOL_SIZE = 127; // really should be prime
 const ulong Store::DEFAULT_DOCUMENT_SET_SIZE = 32;
 const ulong Store::DEFAULT_URI_COLLECTION_SET_SIZE = 32;
 const ulong Store::DEFAULT_INDICES_SET_SIZE = 32;
@@ -147,8 +147,8 @@
 
     theNamespacePool = new StringPool(NAMESPACE_POOL_SIZE);
 
-    theNamespacePool->insertc("", theEmptyNs);
-    theNamespacePool->insertc(XS_URI, theXmlSchemaNs);
+    theEmptyNs = *theNamespacePool->insert("").first;
+    theXmlSchemaNs = *theNamespacePool->insert(XS_URI).first;
 
     theQNamePool = new QNamePool(QNamePool::MAX_CACHE_SIZE, theNamespacePool);
 

=== modified file 'src/store/naive/store.h'
--- src/store/naive/store.h	2012-07-24 08:48:48 +0000
+++ src/store/naive/store.h	2012-07-31 17:29:20 +0000
@@ -35,6 +35,7 @@
 #include "store/api/index.h"
 #include "store/api/ic.h"
 #endif
+#include "string_pool.h"
 
 using namespace zorba;
 
@@ -53,7 +54,6 @@
 namespace simplestore
 {
 
-class StringPool;
 class QNamePool;
 class XmlLoader;
 class FastXmlLoader;

=== modified file 'src/store/naive/string_pool.cpp'
--- src/store/naive/string_pool.cpp	2012-07-24 08:48:48 +0000
+++ src/store/naive/string_pool.cpp	2012-07-31 17:29:20 +0000
@@ -13,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#if 0
+
 #include "stdafx.h"
 
 #include "util/string_util.h"
@@ -169,4 +171,5 @@
 
 } // namespace simplestore
 } // namespace zorba
+#endif
 /* vim:set et sw=2 ts=2: */

=== modified file 'src/store/naive/string_pool.h'
--- src/store/naive/string_pool.h	2012-07-24 08:48:48 +0000
+++ src/store/naive/string_pool.h	2012-07-31 17:29:20 +0000
@@ -16,16 +16,22 @@
 #ifndef ZORBA_SIMPLE_STORE_STRING_POOL
 #define ZORBA_SIMPLE_STORE_STRING_POOL
 
-#include "common/common.h"
+#include <functional>
+
 #include "zorbatypes/zstring.h"
-
-#include "zorbautils/hashset.h"
-#include "zorbautils/hashfun.h"
-
+#include "util/pool.h"
 
 namespace zorba { namespace simplestore {
 
-
+struct not_shared : std::unary_function<zstring const&,bool> {
+  bool operator()( zstring const &s ) const {
+    return !s.is_shared();
+  }
+};
+
+typedef pool<zstring,not_shared> StringPool;
+
+#if 0
 class StringPoolCompareFunction
 {
 public:
@@ -60,7 +66,7 @@
 protected:
   void garbageCollect();
 };
-
+#endif
 
 } // namespace store
 } // namespace zorba

=== modified file 'src/unit_tests/CMakeLists.txt'
--- src/unit_tests/CMakeLists.txt	2012-07-24 08:48:48 +0000
+++ src/unit_tests/CMakeLists.txt	2012-07-31 17:29:20 +0000
@@ -18,6 +18,7 @@
   test_base64_streambuf.cpp
   test_fs_iterator.cpp
   test_json_parser.cpp
+  test_pool.cpp
   test_string.cpp
   test_uri.cpp
   unit_tests.cpp

=== added file 'src/unit_tests/test_pool.cpp'
--- src/unit_tests/test_pool.cpp	1970-01-01 00:00:00 +0000
+++ src/unit_tests/test_pool.cpp	2012-07-31 17:29:20 +0000
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdafx.h"
+#include <iostream>
+#include <functional>
+#include <utility>
+
+#include "util/pool.h"
+#include "zorbatypes/zstring.h"
+
+using namespace std;
+using namespace zorba;
+
+struct not_shared : unary_function<zstring const&,bool> {
+  bool operator()( zstring const &s ) const {
+    return !s.is_shared();
+  }
+};
+
+//#define USE_TEST_STRING
+
+#ifdef USE_TEST_STRING
+
+struct test_string : zstring {
+  test_string() { }
+  test_string( char const *s ) : zstring( s ) {
+    cout << "test_string(char const* = \"" << s << "\")" << endl;
+  }
+  test_string( test_string const &s ) : zstring( s ) {
+    cout << "test_string(test_string const& = \"" << s.c_str() << "\")" << endl;
+  }
+  ~test_string() {
+    cout << "~test_string(\"" << c_str() << "\")" << endl;
+  }
+  test_string& operator=( test_string const &s ) {
+    zstring::operator=( s );
+    return *this;
+  }
+  test_string& operator=( char const *s ) {
+    zstring::operator=( s );
+    return *this;
+  }
+};
+
+inline bool operator==( test_string const &s1, test_string const &s2 ) {
+  return (zstring const&)s1 == (zstring const&)s2;
+}
+
+namespace std {
+
+template<>
+struct hash<test_string> : unary_function<test_string const&,size_t> {
+  size_t operator()( test_string const &s ) const {
+    return zorba::ztd::hash_bytes( s.data(), s.size() );
+  }
+};
+
+} // namespace std
+
+#else
+
+typedef zstring test_string;
+
+#endif /* USE_TEST_STRING */
+
+typedef pool<test_string,not_shared> test_pool;
+typedef pair<test_pool::iterator,bool> insert_return_type;
+
+///////////////////////////////////////////////////////////////////////////////
+
+static int failures;
+
+static bool assert_true( char const *expr, int line, bool result ) {
+  if ( !result ) {
+    cout << "FAILED, line " << line << ": " << expr << endl;
+    ++failures;
+  }
+  return result;
+}
+
+static void print_exception( char const *expr, int line,
+                             std::exception const &e ) {
+  assert_true( expr, line, false );
+  cout << "+ exception: " << e.what() << endl;
+}
+
+#define ASSERT_TRUE( EXPR ) assert_true( #EXPR, __LINE__, !!(EXPR) )
+
+///////////////////////////////////////////////////////////////////////////////
+
+static void test_basics() {
+  test_pool p;
+  insert_return_type ir;
+
+  test_string shared( "a" );
+  ir = p.insert( shared );
+  ASSERT_TRUE( ir.second );
+  ir = p.insert( shared );
+  ASSERT_TRUE( !ir.second );
+  ASSERT_TRUE( ir.first->data() == shared.data() );
+
+  char buf[3];
+  buf[0] = 'x';
+  buf[2] = '\0';
+  test_string temp;
+#ifdef USE_TEST_STRING
+  cout << "-----" << endl;
+#endif /* USE_TEST_STRING */
+
+  int const test_size = p.set().bucket_count() * 1.5;
+  for ( int i = 0; i < test_size; ++i ) {
+    buf[1] = 'A' + i;
+    temp = buf;
+    p.insert( temp );
+  }
+#ifdef USE_TEST_STRING
+  cout << "-----" << endl;
+#endif /* USE_TEST_STRING */
+  ASSERT_TRUE( p.set().size() < test_size + 1 ); // +1 for the shared string
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+namespace zorba {
+namespace UnitTests {
+
+int test_pool( int, char*[] ) {
+  test_basics();
+
+  cout << failures << " test(s) failed\n";
+  return failures ? 1 : 0;
+}
+
+} // namespace UnitTests
+} // namespace zorba
+
+///////////////////////////////////////////////////////////////////////////////
+
+/* vim:set et sw=2 ts=2: */

=== modified file 'src/unit_tests/unit_test_list.h'
--- src/unit_tests/unit_test_list.h	2012-07-24 08:48:48 +0000
+++ src/unit_tests/unit_test_list.h	2012-07-31 17:29:20 +0000
@@ -33,6 +33,7 @@
   int test_icu_streambuf( int, char*[] );
 #endif /* ZORBA_NO_ICU */
   int test_json_parser( int, char*[] );
+  int test_pool( int, char*[] );
   int test_string( int, char*[] );
 #ifndef ZORBA_NO_FULL_TEXT
   int test_stemmer( int, char*[] );

=== modified file 'src/unit_tests/unit_tests.cpp'
--- src/unit_tests/unit_tests.cpp	2012-07-24 08:48:48 +0000
+++ src/unit_tests/unit_tests.cpp	2012-07-31 17:29:20 +0000
@@ -44,6 +44,7 @@
   libunittests["icu_streambuf"] = test_icu_streambuf;
 #endif /* ZORBA_NO_ICU */
   libunittests["json_parser"] = test_json_parser;
+  libunittests["pool"] = test_pool;
   libunittests["string"] = test_string;
 #ifndef ZORBA_NO_FULL_TEXT
   libunittests["stemmer"] = test_stemmer;

=== added file 'src/util/pool.h'
--- src/util/pool.h	1970-01-01 00:00:00 +0000
+++ src/util/pool.h	2012-07-31 17:29:20 +0000
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#ifndef ZORBA_POOL_H
+#define ZORBA_POOL_H
+
+// local
+#include "stl_util.h"
+#include "unordered_set.h"
+
+namespace zorba {
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * A %pool uses-an unordered_set to purge ("garbage collect") elements during
+ * insertion.
+ *
+ * @tparam KeyType They pool's key type.
+ * @tparam PredicateType The predicate to use to determine which keys to erase.
+ * @tparam KeyHash The unary_function to use for generating hash codes.
+ * @tparam KeyEqual The binary_function to use to test for value equality.
+ * @tparam Allocator The allocator to use.
+ */
+template<
+  typename KeyType,
+  class PredicateType,
+  class KeyHash = std::hash<KeyType>,
+  class KeyEqual = std::equal_to<KeyType>,
+  class Allocator = std::allocator<KeyType>
+>
+class pool {
+public:
+  typedef std::unordered_set<KeyType,KeyHash,KeyEqual,Allocator> set_type;
+
+  typedef typename set_type::key_type key_type;
+  typedef PredicateType predicate_type;
+  typedef typename set_type::hasher hasher;
+  typedef typename set_type::key_equal key_equal;
+  typedef typename set_type::allocator_type allocator_type;
+
+  typedef typename set_type::pointer pointer;
+  typedef typename set_type::const_pointer const_pointer;
+  typedef typename set_type::reference reference;
+  typedef typename set_type::const_reference const_reference;
+
+  typedef typename set_type::local_iterator local_iterator;
+  typedef typename set_type::const_local_iterator const_local_iterator;
+  typedef typename set_type::iterator iterator;
+  typedef typename set_type::const_iterator const_iterator;
+
+  typedef typename set_type::size_type size_type;
+  typedef typename set_type::difference_type difference_type;
+
+  /**
+   * Constructs a %pool.  All the arguments are passed as-is to the underlying
+   * set.
+   *
+   * @param bucket_count The initial number of buckets.
+   * @param hash The unary_function to use for generating hash codes.
+   * @param equal The binary_function to use to test for key equality.
+   * @param alloc The allocator to use.
+   */
+  explicit pool(
+    size_type bucket_count = 11,
+    hasher const &hash = hasher(),
+    key_equal const &equal = key_equal(),
+    allocator_type const &alloc = allocator_type()
+  ) :
+    set_( bucket_count, hash, equal, alloc )
+  {
+  }
+
+  /**
+   * Attempts to insert a new value and may purge elements.
+   *
+   * @param value The value to insert.
+   * @return If a value with the given key is already in the pool, returns
+   * [i,false] where \a i is positioned at the existing element; otherwise
+   * returns [i,true] where \a i is positioned at the new element.
+   */
+  std::pair<iterator,bool> insert( key_type const &key ) {
+    // if the load factor is 90% of the max load factor, purge
+    if ( set_.load_factor() >= set_.max_load_factor() * 0.9F )
+      purge();
+    return set_.insert( key );
+  }
+
+  /**
+   * Immediately erases elements for which the predicate is \c true.
+   */
+  void purge() {
+    ztd::erase_if( set_, pred_ );
+  }
+
+  /**
+   * Gets the underlying set.
+   *
+   * @return Returns said set.
+   */
+  set_type const& set() const {
+    return set_;
+  }
+
+private:
+  set_type set_;
+  predicate_type pred_;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+} // namespace zorba
+
+#endif /* ZORBA_POOL_H */
+/* vim:set et ts=2 sw=2: */

=== modified file 'src/util/stl_util.h'
--- src/util/stl_util.h	2012-07-24 08:48:48 +0000
+++ src/util/stl_util.h	2012-07-31 17:29:20 +0000
@@ -200,8 +200,28 @@
 }
 
 /**
- * Erases the first element in the given sequence for which the given predicate
- * is \c true.
+ * Erases all elements in the given container for which the given predicate is
+ * \c true.
+ *
+ * @tparam SequenceType The sequence type.
+ * @tparam PredicateType The predicate type.
+ * @param seq The sequence to modify.
+ * @param pred The predicate to use.
+ * @return Returns \c true only if an element was erased; \c false otherwise.
+ */
+template<class SequenceType,class PredicateType> inline
+void erase_if( SequenceType &seq, PredicateType pred ) {
+  for ( typename SequenceType::iterator i = seq.begin(); i != seq.end(); ) {
+    if ( pred( *i ) )
+      i = seq.erase( i );
+    else
+      ++i;
+  }
+}
+
+/**
+ * Erases only the first element in the given sequence for which the given
+ * predicate is \c true.
  *
  * @tparam SequenceType The sequence type.
  * @tparam PredicateType The predicate type.


Follow ups