zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #14456
[Merge] lp:~zorba-coders/zorba/hashmap into lp:zorba
Markos Zaharioudakis has proposed merging lp:~zorba-coders/zorba/hashmap into lp:zorba.
Requested reviews:
Markos Zaharioudakis (markos-za)
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/hashmap/+merge/125187
HashMap optimizations
--
https://code.launchpad.net/~zorba-coders/zorba/hashmap/+merge/125187
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/context/static_context.h'
--- src/context/static_context.h 2012-09-17 00:36:37 +0000
+++ src/context/static_context.h 2012-09-19 12:44:24 +0000
@@ -42,6 +42,7 @@
#include "zorbautils/hashmap_zstring.h"
#include "zorbautils/hashmap_itemp.h"
+#include "zorbautils/checked_vector.h"
#include "common/shared_types.h"
=== modified file 'src/store/naive/qname_pool.cpp'
--- src/store/naive/qname_pool.cpp 2012-09-17 00:36:37 +0000
+++ src/store/naive/qname_pool.cpp 2012-09-19 12:44:24 +0000
@@ -63,12 +63,12 @@
********************************************************************************/
QNamePool::~QNamePool()
{
- csize n = theHashSet.theHashTab.size();
+ csize n = theHashSet.capacity();
for (csize i = 0; i < n; ++i)
{
if (!theHashSet.theHashTab[i].isFree() &&
- theHashSet.theHashTab[i].theItem->isOverflow())
- delete theHashSet.theHashTab[i].theItem;
+ theHashSet.theHashTab[i].key()->isOverflow())
+ delete theHashSet.theHashTab[i].key();
}
if (theCache != NULL)
@@ -297,12 +297,12 @@
bool found;
entry = theHashSet.hashInsert(qn, hval, found);
- entry->theItem = qn;
+ entry->key() = qn;
ZORBA_FATAL(!found, "");
}
else
{
- qn = entry->theItem;
+ qn = entry->key();
cachePin(qn);
}
@@ -389,12 +389,12 @@
bool found;
entry = theHashSet.hashInsert(qn, hval, found);
- entry->theItem = qn;
+ entry->key() = qn;
ZORBA_FATAL(!found, "");
}
else
{
- qn = entry->theItem;
+ qn = entry->key();
cachePin(qn);
}
@@ -478,7 +478,7 @@
while (entry != NULL)
{
- QNameItem* qn = entry->theItem;
+ QNameItem* qn = entry->key();
if (ztd::equals(qn->getLocalName(), ln, lnlen) &&
ztd::equals(qn->getNamespace(), ns, nslen) &&
=== modified file 'src/store/naive/string_pool.cpp'
--- src/store/naive/string_pool.cpp 2012-09-17 00:36:37 +0000
+++ src/store/naive/string_pool.cpp 2012-09-19 12:44:24 +0000
@@ -28,13 +28,14 @@
StringPool::~StringPool()
{
csize count = 0;
- csize n = theHashTab.size();
+ csize n = capacity();
for (csize i = 0; i < n; ++i)
{
- if (theHashTab[i].theItem.is_shared())
+ if (!theHashTab[i].isFree() && theHashTab[i].key().is_shared())
{
- std::cerr << "ID: " << i << " Referenced URI: " << theHashTab[i].theItem << std::endl;
+ std::cerr << "ID: " << i << " Referenced URI: "
+ << theHashTab[i].key() << std::endl;
//delete theHashTab[i].theString.getp();
count++;
}
@@ -57,7 +58,8 @@
bool found = false;
zstring::size_type len = strlen(str);
- ulong hval = hashfun::h32(str, len, FNV_32_INIT) % theHashTabSize;
+
+ ulong hval = hashfun::h32(str, len, FNV_32_INIT) % bucket_count();
{
SYNC_CODE(AutoMutex lock(&theMutex);)
@@ -68,7 +70,7 @@
{
while (entry != NULL)
{
- if (ztd::equals(entry->theItem, str, len))
+ if (ztd::equals(entry->key(), str, len))
{
found = true;
break;
@@ -79,7 +81,7 @@
if (found)
{
- outStr = entry->theItem;
+ outStr = entry->key();
return false;
}
}
@@ -97,76 +99,78 @@
********************************************************************************/
void StringPool::garbageCollect()
{
- HashEntry<zstring, DummyHashValue>* entry;
+ HashEntry<zstring, DummyHashValue>* currEntry;
HashEntry<zstring, DummyHashValue>* freeList = NULL;
- zstring::size_type size = theHashTabSize;
+ csize size = bucket_count();
- for (ulong i = 0; i < size; ++i)
+ for (csize i = 0; i < size; ++i)
{
- entry = &theHashTab[i];
+ currEntry = &theHashTab[i];
// If the current hash bucket is empty, move to the next one
- if (entry->isFree())
+ if (currEntry->isFree())
{
- ZORBA_FATAL(entry->theNext == 0, "");
+ ZORBA_FATAL(currEntry->theNext == 0, "");
continue;
}
// Handle the 1st hash entry of the current hash bucket
- while (!entry->theItem.is_shared())
+ while (!currEntry->key().is_shared())
{
- if (entry->theNext == 0)
+ if (currEntry->theNext == 0)
{
- entry->setFree();
- theNumEntries--;
+ currEntry->setFree();
+ --theNumEntries;
break;
}
else
{
- HashEntry<zstring, DummyHashValue>* nextEntry = entry->getNext();
- *entry = *nextEntry;
- entry->setNext(nextEntry->getNext());
+ HashEntry<zstring, DummyHashValue>* nextEntry = currEntry->getNext();
+ assert(!nextEntry->isFree());
+ *currEntry = *nextEntry;
+ currEntry->setNext(nextEntry->getNext());
nextEntry->setFree();
nextEntry->setNext(freeList);
freeList = nextEntry;
- theNumEntries--;
+ --theNumEntries;
}
}
// Handle the overflow entries of the current hash bucket.
- HashEntry<zstring, DummyHashValue>* prevEntry = entry;
- entry = entry->getNext();
+ HashEntry<zstring, DummyHashValue>* prevEntry = currEntry;
+ currEntry = currEntry->getNext();
- while (entry != NULL)
+ while (currEntry != NULL)
{
- if (!entry->theItem.is_shared())
+ if (!currEntry->key().is_shared())
{
- prevEntry->setNext(entry->getNext());
- entry->setFree();
- entry->setNext(freeList);
- freeList = entry;
- theNumEntries--;
+ prevEntry->setNext(currEntry->getNext());
+ currEntry->setFree();
+ currEntry->setNext(freeList);
+ freeList = currEntry;
+ --theNumEntries;
- entry = prevEntry->getNext();
+ currEntry = prevEntry->getNext();
}
else
{
- prevEntry = entry;
- entry = entry->getNext();
+ prevEntry = currEntry;
+ currEntry = currEntry->getNext();
}
}
}
if (freeList != NULL)
{
- entry = freeList;
- while (entry->theNext != 0)
- entry = entry->getNext();
-
- entry->setNext(theHashTab[theHashTabSize].getNext());
- theHashTab[theHashTabSize].setNext(freeList);
+ currEntry = freeList;
+ while (currEntry->theNext != 0)
+ currEntry = currEntry->getNext();
+
+ currEntry->setNext(theHashTab[bucket_count()].getNext());
+
+ theHashTab[bucket_count()].setNext(freeList);
}
}
=== modified file 'src/unit_tests/CMakeLists.txt'
--- src/unit_tests/CMakeLists.txt 2012-09-17 00:36:37 +0000
+++ src/unit_tests/CMakeLists.txt 2012-09-19 12:44:24 +0000
@@ -25,6 +25,7 @@
test_uri.cpp
test_json_parser.cpp
test_fs_iterator.cpp
+ test_hashmaps.cpp
# memory_manager.cpp
)
=== added file 'src/unit_tests/test_hashmaps.cpp'
--- src/unit_tests/test_hashmaps.cpp 1970-01-01 00:00:00 +0000
+++ src/unit_tests/test_hashmaps.cpp 2012-09-19 12:44:24 +0000
@@ -0,0 +1,257 @@
+
+#include <cstdlib>
+#include <stdio.h>
+#include <iostream>
+
+#include <zorba/util/time.h>
+
+#include "zorbautils/hashmap.h"
+#include "util/hashmap32.h"
+#include "util/hashmap.h"
+#include "util/unordered_map.h"
+
+
+namespace zorba {
+
+namespace UnitTests {
+
+
+class IntCompFunc
+{
+public:
+ static bool equal(uint64_t k1, uint64_t k2)
+ {
+ return k1 == k2;
+ }
+
+ static uint32_t hash(uint64_t key)
+ {
+#if 1
+ return key;
+#else
+ char buf[9];
+ buf[0] = (unsigned char)(key>>56);
+ buf[1] = (unsigned char)(key>>48);
+ buf[2] = (unsigned char)(key>>40);
+ buf[3] = (unsigned char)(key>>32);
+ buf[4] = (unsigned char)(key>>24);
+ buf[5] = (unsigned char)(key>>16);
+ buf[6] = (unsigned char)(key>>8 );
+ buf[7] = (unsigned char)(key );
+ buf[8] = 0;
+ return (uint32_t)hashfun::h64((void*)buf,8,FNV_64_INIT);
+#endif
+ }
+};
+
+
+class StrCompFunc
+{
+public:
+ static bool equal(const zstring& k1, const zstring& k2)
+ {
+ return k1 == k2;
+ }
+
+ static uint32_t hash(const zstring& key)
+ {
+ return hashfun::h32(key.c_str(), FNV_32_INIT);
+ }
+};
+
+
+int test_hashmaps(int argc, char* argv[])
+{
+ if (argc < 4)
+ return 1;
+
+ int test_id = atoi(argv[1]);
+
+ double load_factor = atof(argv[2]);
+
+ int num = atoi(argv[3]);
+
+ uint64_t* int_buf = new uint64_t[num];
+
+ for (int i = 0; i < num; ++i)
+ {
+ int_buf[i] = rand() % num;
+ }
+
+ zstring* str_buf = new zstring[num];
+
+ for (int i = 0; i < num; ++i)
+ {
+ char tmp[20];
+ sprintf(tmp, "%d", rand() % num);
+ str_buf[i] = tmp;
+
+ //std::cout << str_buf[i] << std::endl;
+ }
+
+
+ HashMap<uint64_t, int, IntCompFunc> map1(1024, false);
+ HashMap<zstring, int, StrCompFunc> map2(1024, false);
+
+ std::unordered_map<uint64_t, int> map3(1024);
+ std::unordered_map<zstring, int> map4(1024);
+
+ hash64map<int> map5(1024, load_factor);
+ hashmap<zstring, int> map6(1024, load_factor);
+
+ map1.set_load_factor(load_factor);
+ map2.set_load_factor(load_factor);
+
+ if (test_id == 1)
+ {
+ zorba::time::walltime startTime;
+ zorba::time::get_current_walltime(startTime);
+
+ for (int i = 0; i < num; ++i)
+ {
+ uint64_t key = int_buf[i];
+ int value = 1;
+ (void)map1.insert(key, value);
+ }
+
+ zorba::time::walltime stopTime;
+ time::get_current_walltime(stopTime);
+
+ double time = time::get_walltime_elapsed(startTime, stopTime);
+
+ std::cout << "Load factor = " << load_factor << std::endl
+ << "Num numeric insertions = " << num << std::endl
+ << "HashMap entries = " << map1.size() << std::endl
+ << "HashMap capacity = " << map1.capacity() << std::endl
+ << "HashMap colisions = " << map1.collisions() << std::endl
+ << "Time = " << time << std::endl;
+ }
+ else if (test_id == 2)
+ {
+ zorba::time::walltime startTime;
+ zorba::time::get_current_walltime(startTime);
+
+ for (int i = 0; i < num; ++i)
+ {
+ zstring key = str_buf[i];
+ int value = 1;
+ (void)map2.insert(key, value);
+ }
+
+ zorba::time::walltime stopTime;
+ time::get_current_walltime(stopTime);
+
+ double time = time::get_walltime_elapsed(startTime, stopTime);
+
+ std::cout << "Load factor = " << load_factor << std::endl
+ << "Num zstring insertions = " << num << std::endl
+ << "HashMap entries = " << map2.size() << std::endl
+ << "HashMap buckets = " << map2.bucket_count() << std::endl
+ << "HashMap capacity = " << map2.capacity() << std::endl
+ << "Time = " << time << std::endl;
+ }
+ else if (test_id == 3)
+ {
+ zorba::time::walltime startTime;
+ zorba::time::get_current_walltime(startTime);
+
+ for (int i = 0; i < num; ++i)
+ {
+ uint64_t key = int_buf[i];
+ int value = 1;
+ (void)map3.insert(std::pair<uint64_t, int>(key, value));
+ }
+
+ zorba::time::walltime stopTime;
+ time::get_current_walltime(stopTime);
+
+ double time = time::get_walltime_elapsed(startTime, stopTime);
+
+ std::cout << "Load factor = " << load_factor << std::endl
+ << "Num numeric insertions = " << num << std::endl
+ << "UnorderedMap entries = " << map3.size() << std::endl
+ << "UnorderedMap buckets = " << map3.bucket_count() << std::endl
+ << "Time = " << time << std::endl;
+ }
+ else if (test_id == 4)
+ {
+ zorba::time::walltime startTime;
+ zorba::time::get_current_walltime(startTime);
+
+ for (int i = 0; i < num; ++i)
+ {
+ zstring key = str_buf[i];
+ int value = 1;
+ (void)map4.insert(std::pair<zstring, int>(key, value));
+ }
+
+ zorba::time::walltime stopTime;
+ time::get_current_walltime(stopTime);
+
+ double time = time::get_walltime_elapsed(startTime, stopTime);
+
+ std::cout << "Load factor = " << load_factor << std::endl
+ << "Num zstring insertions = " << num << std::endl
+ << "UnorderedMap entries = " << map4.size() << std::endl
+ << "UnorderedMap buckets = " << map4.bucket_count() << std::endl
+ << "Time = " << time << std::endl;
+ }
+ else if (test_id == 5)
+ {
+ zorba::time::walltime startTime;
+ zorba::time::get_current_walltime(startTime);
+
+ for (int i = 0; i < num; ++i)
+ {
+ uint64_t key = int_buf[i];
+ int value = 1;
+ (void)map5.put_unsync(key, value);
+ }
+
+ zorba::time::walltime stopTime;
+ time::get_current_walltime(stopTime);
+
+ double time = time::get_walltime_elapsed(startTime, stopTime);
+
+ std::cout << "Load factor = " << load_factor << std::endl
+ << "Num numeric insertions = " << num << std::endl
+ << "hashmap entries = " << map5.size() << std::endl
+ << "Time = " << time << std::endl;
+ }
+ else if (test_id == 6)
+ {
+ zorba::time::walltime startTime;
+ zorba::time::get_current_walltime(startTime);
+
+ for (int i = 0; i < num; ++i)
+ {
+ zstring key = str_buf[i];
+ int value = 1;
+ (void)map6.put(key, value, false);
+ }
+
+ zorba::time::walltime stopTime;
+ time::get_current_walltime(stopTime);
+
+ double time = time::get_walltime_elapsed(startTime, stopTime);
+
+ std::cout << "Load factor = " << load_factor << std::endl
+ << "Num zstring insertions = " << num << std::endl
+ << "hashmap entries = " << map6.size() << std::endl
+ << "Time = " << time << std::endl;
+ }
+ else
+ {
+ std::cout << "Invalid test id" << std::endl;
+ return 2;
+ }
+
+ delete [] int_buf;
+ delete [] str_buf;
+
+ return 0;
+}
+
+
+}
+}
=== modified file 'src/unit_tests/unit_test_list.h'
--- src/unit_tests/unit_test_list.h 2012-09-17 00:36:37 +0000
+++ src/unit_tests/unit_test_list.h 2012-09-19 12:44:24 +0000
@@ -25,34 +25,51 @@
namespace UnitTests {
int runUriTest(int argc, char* argv[]);
+
int runDebuggerProtocolTest(int argc, char* argv[]);
+
int test_base64( int, char*[] );
+
int test_base64_streambuf( int, char*[] );
+
int test_fs_iterator( int, char*[] );
+
#ifndef ZORBA_NO_ICU
int test_icu_streambuf( int, char*[] );
#endif /* ZORBA_NO_ICU */
+
int test_json_parser( int, char*[] );
+
int test_string( int, char*[] );
+
int test_unique_ptr( int, char*[] );
+
int test_fs_iterator( int, char*[] );
- //int test_mem_manager( int, char*[] );
+
#ifndef ZORBA_NO_FULL_TEXT
int test_stemmer( int, char*[] );
int test_thesaurus( int, char*[] );
int test_tokenizer( int, char*[] );
#endif /* ZORBA_NO_FULL_TEXT */
+
#ifndef ZORBA_HAVE_UNIQUE_PTR
int test_unique_ptr( int, char*[] );
#endif /* ZORBA_HAVE_UNIQUE_PTR */
+<<<<<<< TREE
int test_uuid( int, char*[] );
+=======
+
+>>>>>>> MERGE-SOURCE
#ifndef ZORBA_HAVE_UNORDERED_MAP
int test_unordered_map( int, char*[] );
#endif /* ZORBA_HAVE_UNORDERED_MAP */
+
#ifndef ZORBA_HAVE_UNORDERED_SET
int test_unordered_set( int, char*[] );
#endif /* ZORBA_HAVE_UNORDERED_SET */
+ int test_hashmaps(int argc, char* argv[]);
+
void initializeTestList();
} // namespace UnitTests
=== modified file 'src/unit_tests/unit_tests.cpp'
--- src/unit_tests/unit_tests.cpp 2012-09-17 00:36:37 +0000
+++ src/unit_tests/unit_tests.cpp 2012-09-19 12:44:24 +0000
@@ -38,29 +38,43 @@
void initializeTestList()
{
libunittests["base64"] = test_base64;
+
libunittests["base64_streambuf"] = test_base64_streambuf;
+
libunittests["fs_iterator"] = test_fs_iterator;
- //libunittests["memory_manager"] = test_mem_manager;
+
#ifndef ZORBA_NO_ICU
libunittests["icu_streambuf"] = test_icu_streambuf;
#endif /* ZORBA_NO_ICU */
+
libunittests["json_parser"] = test_json_parser;
+
libunittests["string"] = test_string;
+
#ifndef ZORBA_NO_FULL_TEXT
libunittests["stemmer"] = test_stemmer;
libunittests["thesaurus"] = test_thesaurus;
libunittests["tokenizer"] = test_tokenizer;
#endif /* ZORBA_NO_FULL_TEXT */
+
#ifndef ZORBA_HAVE_UNIQUE_PTR
libunittests["unique_ptr"] = test_unique_ptr;
#endif /* ZORBA_HAVE_UNIQUE_PTR */
+<<<<<<< TREE
libunittests["uuid"] = test_uuid;
+=======
+
+>>>>>>> MERGE-SOURCE
#ifndef ZORBA_HAVE_UNORDERED_MAP
libunittests["unordered_map"] = test_unordered_map;
#endif /* ZORBA_HAVE_UNORDERED_MAP */
+
#ifndef ZORBA_HAVE_UNORDERED_SET
libunittests["unordered_set"] = test_unordered_set;
#endif /* ZORBA_HAVE_UNORDERED_SET */
+
+ libunittests["hashmaps"] = test_hashmaps;
+
libunittests["uri"] = runUriTest;
#ifdef ZORBA_WITH_DEBUGGER
=== modified file 'src/zorbaserialization/bin_archiver.cpp'
--- src/zorbaserialization/bin_archiver.cpp 2012-09-17 00:36:37 +0000
+++ src/zorbaserialization/bin_archiver.cpp 2012-09-19 12:44:24 +0000
@@ -137,7 +137,7 @@
BinArchiver::BinArchiver(std::ostream* os)
:
Archiver(true),
- theStringPool(1024, false, false),
+ theStringPool(1024, false),
theFirstBinaryString(0)
{
this->is = NULL;
=== modified file 'src/zorbautils/hashmap.h'
--- src/zorbautils/hashmap.h 2012-09-17 00:36:37 +0000
+++ src/zorbautils/hashmap.h 2012-09-19 12:44:24 +0000
@@ -16,6 +16,7 @@
#ifndef ZORBA_UTILS_HASHMAP_H
#define ZORBA_UTILS_HASHMAP_H
+#include <vector>
#include <cstddef>
#include <zorba/config.h>
@@ -23,9 +24,10 @@
#include "common/common.h"
#include "zorbautils/fatal.h"
-#include "zorbautils/checked_vector.h"
#include "zorbautils/mutex.h"
+#include "store/api/shared_types.h"
+
namespace zorba
{
@@ -42,18 +44,81 @@
template <class T, class V>
class HashEntry
{
+ struct KeyHolder
+ {
+ char theKey[sizeof(T)];
+ };
+
+ struct ValueHolder
+ {
+ char theValue[sizeof(V)];
+ };
+
public:
bool theIsFree;
- T theItem;
- V theValue;
+ KeyHolder theKey;
+ ValueHolder theValue;
ptrdiff_t theNext; // offset from "this" to the next entry.
- HashEntry() : theIsFree(true), theNext(0) { }
+ HashEntry()
+ :
+ theIsFree(true),
+ theNext(0)
+ {
+ }
+
+ HashEntry(const HashEntry<T, V>& other)
+ {
+ theIsFree = other.theIsFree;
+ theNext = other.theNext;
+ if (!theIsFree)
+ {
+ new (&theKey) T(other.key());
+ new (&theValue) V(other.value());
+ }
+ }
~HashEntry()
{
- theIsFree = true;
- theNext = 0;
+ if (!theIsFree)
+ {
+ key().~T();
+ value().~V();
+ }
+ }
+
+ HashEntry<T, V>& operator = (const HashEntry<T, V>& other)
+ {
+ if (theIsFree)
+ {
+ assert(false);
+
+ if (!other.theIsFree)
+ {
+ new (&theKey) T(other.key());
+ new (&theValue) V(other.value());
+ }
+ }
+ else
+ {
+ if (!other.theIsFree)
+ {
+ key() = other.key();
+ value() = other.value();
+ }
+ else
+ {
+ assert(false);
+
+ key().~T();
+ value().~V();
+ }
+ }
+
+ theIsFree = other.theIsFree;
+ theNext = other.theNext;
+
+ return *this;
}
bool isFree() const
@@ -63,15 +128,39 @@
void setFree()
{
- theItem.~T();
+ key().~T();
+ value().~V();
theIsFree = true;
+ theNext = 0;
}
void unsetFree()
{
+ new (&theKey) T;
+ new (&theValue) V;
theIsFree = false;
}
+ T& key()
+ {
+ return *reinterpret_cast<T*>(&theKey);
+ }
+
+ const T& key() const
+ {
+ return *reinterpret_cast<const T*>(&theKey);
+ }
+
+ const V& value() const
+ {
+ return *reinterpret_cast<const V*>(&theValue);
+ }
+
+ V& value()
+ {
+ return *reinterpret_cast<V*>(&theValue);
+ }
+
void setNext(HashEntry* nextEntry)
{
theNext = (nextEntry == NULL ? 0 : nextEntry - this);
@@ -105,17 +194,17 @@
theHashTab : The hash table. The table is implemented as a vector of hash
entries and is devided in 2 areas: Each entry between 0 and
- theHashTabSize - 1 is the head of a hash bucket. Each entry
- between theHashTabSize+1 and theHashTab.size()-1 is either
+ theNumBuckets - 1 is the head of a hash bucket. Each entry
+ between theNumBuckets+1 and theHashTab.size()-1 is either
a "collision" entry (i.e., it belongs to a hash bucket with
more than one entries) or a "free" entry (i.e. it does not
currently belong to any bucket, but is available for
allocation as a collision entry when needed). Free entries
in the collision area are linked in a free list. Entry
- theHashTab[theHashTabSize] is reserved as the head of this
+ theHashTab[theNumBuckets] is reserved as the head of this
free list.
- theHashTabSize : The current number of hash buckets in theHashTab.
- theInitialSize : The initial number of hash buckets.
+ theNumBuckets : The current number of hash buckets in theHashTab.
+
theLoadFactor : The max fraction of non-empty hash buckets after which the
hash table is doubled in size.
@@ -130,11 +219,11 @@
friend class HashMap;
protected:
- checked_vector<HashEntry<T, V> >* theHashTab;
- size_t thePos;
+ std::vector<HashEntry<T, V> >* theHashTab;
+ csize thePos;
protected:
- iterator(checked_vector<HashEntry<T, V> >* ht, size_t pos)
+ iterator(std::vector<HashEntry<T, V> >* ht, csize pos)
:
theHashTab(ht),
thePos(pos)
@@ -150,7 +239,7 @@
HashEntry<T, V>& entry = (*theHashTab)[thePos];
- return entry.theItem;
+ return entry.key();
}
public:
@@ -194,7 +283,7 @@
const HashEntry<T, V>& entry = (*theHashTab)[thePos];
- return std::pair<T, V>(entry.theItem, entry.theValue);
+ return std::pair<T, V>(entry.key(), entry.value());
}
const T& getKey() const
@@ -203,16 +292,25 @@
const HashEntry<T, V>& entry = (*theHashTab)[thePos];
- return entry.theItem;
- }
-
- V& getValue() const
- {
- ZORBA_FATAL(thePos < theHashTab->size(), "");
-
- HashEntry<T, V>& entry = (*theHashTab)[thePos];
-
- return entry.theValue;
+ return entry.key();
+ }
+
+ const V& getValue() const
+ {
+ ZORBA_FATAL(thePos < theHashTab->size(), "");
+
+ HashEntry<T, V>& entry = (*theHashTab)[thePos];
+
+ return entry.value();
+ }
+
+ V& getValue()
+ {
+ ZORBA_FATAL(thePos < theHashTab->size(), "");
+
+ HashEntry<T, V>& entry = (*theHashTab)[thePos];
+
+ return entry.value();
}
void setValue(const V& val)
@@ -221,7 +319,7 @@
HashEntry<T, V>& entry = (*theHashTab)[thePos];
- entry.theValue = val;
+ entry.value() = val;
}
};
@@ -230,20 +328,22 @@
static const double DEFAULT_LOAD_FACTOR;
protected:
- ulong theNumEntries;
-
- size_t theHashTabSize;
- size_t theInitialSize;
- checked_vector<HashEntry<T, V> > theHashTab;
- double theLoadFactor;
- C theCompareFunction;
-
- bool theUseTransfer;
-
- SYNC_CODE(mutable Mutex theMutex;)
- SYNC_CODE(Mutex * theMutexp;)
-
- int numCollisions;
+ std::vector<HashEntry<T, V> > theHashTab;
+
+ csize theNumBuckets;
+
+ csize theNumEntries;
+
+ double theLoadFactor;
+
+ double theMaxLoad;
+
+ C theCompareFunction;
+
+ SYNC_CODE(mutable Mutex theMutex;)
+ SYNC_CODE(Mutex * theMutexp;)
+
+ csize theNumCollisions;
public:
@@ -257,19 +357,20 @@
depends on some parametrs (e.g. the collation or timezone). These parameters
are provided as data members of the given comparison-function obj.
********************************************************************************/
-HashMap(const C& compFunction, size_t size, bool sync, bool useTransfer = false)
+HashMap(const C& compFunction, csize size, bool sync)
:
+ theNumBuckets(size),
theNumEntries(0),
- theHashTabSize(size),
- theInitialSize(size),
- theHashTab(computeTabSize(size)),
theLoadFactor(DEFAULT_LOAD_FACTOR),
theCompareFunction(compFunction),
- theUseTransfer(useTransfer),
- numCollisions(0)
+ theNumCollisions(0)
{
+ theHashTab.resize(computeCapacity(size));
+
formatCollisionArea();
+ theMaxLoad = theNumBuckets * theLoadFactor;
+
SYNC_CODE(theMutexp = (sync ? &theMutex : NULL);)
}
@@ -284,18 +385,19 @@
theCompareFunction data member is initialized with the default constructor
of the C class.
********************************************************************************/
-HashMap(size_t size, bool sync, bool useTransfer = false)
+HashMap(csize size, bool sync)
:
+ theNumBuckets(size),
theNumEntries(0),
- theHashTabSize(size),
- theInitialSize(size),
- theHashTab(computeTabSize(size)),
theLoadFactor(DEFAULT_LOAD_FACTOR),
- theUseTransfer(useTransfer),
- numCollisions(0)
+ theNumCollisions(0)
{
+ theHashTab.resize(computeCapacity(size));
+
formatCollisionArea();
+ theMaxLoad = theNumBuckets * theLoadFactor;
+
SYNC_CODE(theMutexp = (sync ? &theMutex : NULL);)
}
@@ -340,7 +442,7 @@
/*******************************************************************************
********************************************************************************/
-ulong size() const
+csize size() const
{
return theNumEntries;
}
@@ -349,7 +451,7 @@
/*******************************************************************************
********************************************************************************/
-size_t capacity() const
+csize capacity() const
{
return theHashTab.size();
}
@@ -358,6 +460,24 @@
/*******************************************************************************
********************************************************************************/
+csize bucket_count() const
+{
+ return theNumBuckets;
+}
+
+
+/*******************************************************************************
+
+********************************************************************************/
+csize collisions() const
+{
+ return theNumCollisions;
+}
+
+
+/*******************************************************************************
+
+********************************************************************************/
C get_compare_function()
{
return theCompareFunction;
@@ -388,13 +508,17 @@
void clearNoSync()
{
theNumEntries = 0;
- numCollisions = 0;
-
- size_t n = theHashTab.size();
-
- for (size_t i = 0; i < n; ++i)
+ theNumCollisions = 0;
+
+ csize n = theHashTab.size();
+
+ HashEntry<T, V>* entry = &theHashTab[0];
+ HashEntry<T, V>* lastentry = &theHashTab[n];
+
+ for (; entry < lastentry; ++entry)
{
- theHashTab[i].~HashEntry<T, V>();
+ if (!entry->isFree())
+ entry->setFree();
}
formatCollisionArea();
@@ -406,13 +530,13 @@
********************************************************************************/
iterator begin() const
{
- return iterator(const_cast<checked_vector<HashEntry<T, V> >*>(&theHashTab), 0);
+ return iterator(const_cast<std::vector<HashEntry<T, V> >*>(&theHashTab), 0);
}
iterator end() const
{
- return iterator(const_cast<checked_vector<HashEntry<T, V> >*>(&theHashTab),
+ return iterator(const_cast<std::vector<HashEntry<T, V> >*>(&theHashTab),
theHashTab.size());
}
@@ -437,7 +561,7 @@
while (entry != NULL)
{
- if (equal(entry->theItem, item))
+ if (equal(entry->key(), item))
return true;
entry = entry->getNext();
@@ -468,7 +592,7 @@
while (entry != NULL)
{
- if (equal(entry->theItem, item))
+ if (equal(entry->key(), item))
return iterator(&theHashTab, entry - &theHashTab[0]);
entry = entry->getNext();
@@ -498,9 +622,9 @@
while (entry != NULL)
{
- if (equal(entry->theItem, item))
+ if (equal(entry->key(), item))
{
- value = entry->theValue;
+ value = entry->value();
return true;
}
@@ -527,8 +651,8 @@
if (!found)
{
- entry->theItem = pair.first;
- entry->theValue = pair.second;
+ entry->key() = pair.first;
+ entry->value() = pair.second;
}
return !found;
@@ -552,12 +676,12 @@
if (!found)
{
- entry->theItem = item;
- entry->theValue = value;
+ entry->key() = item;
+ entry->value() = value;
}
else
{
- value = entry->theValue;
+ value = entry->value();
}
return !found;
@@ -586,7 +710,7 @@
while (entry != NULL)
{
- if (equal(entry->theItem, item))
+ if (equal(entry->key(), item))
{
found = true;
break;
@@ -602,7 +726,7 @@
}
else
{
- entry->theValue = value;
+ entry->value() = value;
return true;
}
}
@@ -616,13 +740,13 @@
{
SYNC_CODE(AutoMutex lock(theMutexp);)
- if (ite.thePos < theHashTabSize)
+ if (ite.thePos < theNumBuckets)
{
eraseEntry(&theHashTab[ite.thePos], NULL);
}
else
{
- const T& item = theHashTab[ite.thePos].theItem;
+ const T& item = theHashTab[ite.thePos].key();
ulong hval = hash(item);
@@ -665,7 +789,7 @@
// If the item to remove is in the 1st entry of a bucket, then if the
// bucket has no other entries, just call the destructor on that entry,
// else copy the 2nd entry to the 1st entry and freeup the 2nd entry.
- if (equal(entry->theItem, item))
+ if (equal(entry->key(), item))
{
eraseEntry(entry, NULL);
return true;
@@ -678,7 +802,7 @@
while (entry != NULL)
{
- if (equal(entry->theItem, item))
+ if (equal(entry->key(), item))
{
eraseEntry(entry, preventry);
return true;
@@ -698,9 +822,9 @@
/*******************************************************************************
********************************************************************************/
-size_t computeTabSize(size_t size) const
+csize computeCapacity(csize size) const
{
- return size + 32 + size/5;
+ return size + 32 + size / (5 - 10 * (theLoadFactor - 0.7)) ;
}
@@ -724,7 +848,7 @@
********************************************************************************/
HashEntry<T, V>* bucket(ulong hvalue)
{
- return &theHashTab[hvalue % theHashTabSize];
+ return &theHashTab[hvalue % theNumBuckets];
}
@@ -733,7 +857,7 @@
********************************************************************************/
const HashEntry<T, V>* bucket(ulong hvalue) const
{
- return &theHashTab[hvalue % theHashTabSize];
+ return &theHashTab[hvalue % theNumBuckets];
}
@@ -742,7 +866,7 @@
********************************************************************************/
HashEntry<T, V>* freelist()
{
- return &theHashTab[theHashTabSize];
+ return &theHashTab[theNumBuckets];
}
@@ -755,41 +879,38 @@
{
if (entry->theNext == 0)
{
- entry->~HashEntry<T, V>();
+ entry->setFree();
}
else
{
HashEntry<T, V>* nextEntry = entry->getNext();
*entry = *nextEntry;
entry->setNext(nextEntry->getNext());
- nextEntry->~HashEntry<T, V>();
+ nextEntry->setFree();
nextEntry->setNext(freelist()->getNext());
freelist()->setNext(nextEntry);
}
- theNumEntries--;
+ --theNumEntries;
- if (theHashTabSize > theInitialSize &&
- theNumEntries < (theHashTabSize / 2) * theLoadFactor)
+ if (theNumEntries < theMaxLoad / 2)
{
- resizeHashTab(theHashTabSize / 2);
+ resizeHashTab(theNumBuckets / 2);
}
}
else
{
preventry->setNext(entry->getNext());
- entry->~HashEntry<T, V>();
+ entry->setFree();
entry->setNext(freelist()->getNext());
freelist()->setNext(entry);
- theNumEntries--;
- numCollisions--;
+ --theNumEntries;
- if (theHashTabSize > theInitialSize &&
- theNumEntries < (theHashTabSize / 2) * theLoadFactor)
+ if (theNumEntries < theMaxLoad / 2)
{
- resizeHashTab(theHashTabSize / 2);
+ resizeHashTab(theNumBuckets / 2);
}
}
}
@@ -812,7 +933,7 @@
// If the hash bucket is empty, its 1st entry is used to store the new string.
if (headEntry->isFree())
{
- theNumEntries++;
+ ++theNumEntries;
headEntry->unsetFree();
return headEntry;
}
@@ -822,7 +943,7 @@
while (currEntry != NULL)
{
- if (equal(currEntry->theItem, item))
+ if (equal(currEntry->key(), item))
{
found = true;
return currEntry;
@@ -836,22 +957,22 @@
// Do garbage collection if the hash table is more than 60% full. Note that
// gc does NOT resize theHashTab, so after gc, the item still belongs to the
// same bucket as before gc.
- if (theNumEntries > theHashTabSize * theLoadFactor)
+ if (theNumEntries > theMaxLoad)
{
garbageCollect();
if (headEntry->isFree())
{
- theNumEntries++;
+ ++theNumEntries;
headEntry->unsetFree();
return headEntry;
}
}
// Double the size of the hash table if it is more than 60% full.
- if (theNumEntries > theHashTabSize * theLoadFactor)
+ if (theNumEntries > theMaxLoad)
{
- resizeHashTab(theHashTabSize * 2);
+ resizeHashTab(theNumBuckets * 2);
goto retry;
}
@@ -859,8 +980,8 @@
// collision list for its bucket. We place the new item right after the
// headEntry for the bucket.
- theNumEntries++;
- numCollisions++;
+ ++theNumEntries;
+ ++theNumCollisions;
// If no free entry exists, we extend the collision area of the hash table.
if (freelist()->getNext() == 0)
@@ -886,9 +1007,13 @@
********************************************************************************/
void extendCollisionArea()
{
- size_t oldSize = theHashTab.size();
- size_t numCollisionEntries = oldSize - theHashTabSize;
- size_t newSize = theHashTabSize + 2 * numCollisionEntries;
+ csize oldSize = theHashTab.size();
+ csize numCollisionEntries = oldSize - theNumBuckets;
+ csize newSize = theNumBuckets + 2 * numCollisionEntries;
+
+ std::cout << "Extending collision area" << std::endl
+ << "numBuckets = " << theNumBuckets << " numCollisionEntries = "
+ << numCollisionEntries << std::endl << std::endl;
//foo(); for setting a breakpoint
@@ -909,7 +1034,8 @@
firstentry = freelist();
HashEntry<T, V>* lastentry = &theHashTab[theHashTab.size() - 1];
- for (HashEntry<T, V>* entry = firstentry; entry < lastentry; entry++)
+
+ for (HashEntry<T, V>* entry = firstentry; entry < lastentry; ++entry)
entry->theNext = 1;
lastentry->theNext = 0;
@@ -919,31 +1045,34 @@
/*******************************************************************************
********************************************************************************/
-void resizeHashTab(size_t newSize)
+void resizeHashTab(csize newSize)
{
- HashEntry<T, V>* entry;
- HashEntry<T, V>* oldentry;
+ if (newSize == 0)
+ newSize = 3;
+
+ csize oldcap = theHashTab.size();
+ csize newcap = computeCapacity(newSize);
// Create a new vector of new size and swap theHashTab with this new vector
- checked_vector<HashEntry<T, V> > oldTab(computeTabSize(newSize));
+ std::vector<HashEntry<T, V> > oldTab(newcap);
theHashTab.swap(oldTab);
- size_t oldsize = oldTab.size();
- theHashTabSize = newSize;
+ theNumBuckets = newSize;
+ theMaxLoad = theNumBuckets * theLoadFactor;
formatCollisionArea();
- numCollisions = 0;
+ HashEntry<T, V>* entry;
+ HashEntry<T, V>* oldentry = &oldTab[0];
+ HashEntry<T, V>* lastentry = &oldTab[oldcap];
// Now rehash every entry
- for (size_t i = 0; i < oldsize; i++)
+ for (; oldentry < lastentry; ++oldentry)
{
- oldentry = &oldTab[i];
-
if (oldentry->isFree())
continue;
- entry = bucket (hash(oldentry->theItem));
+ entry = bucket(hash(oldentry->key()));
if (!entry->isFree())
{
@@ -962,12 +1091,11 @@
freelist()->setNext(entry->getNext());
entry->setNext(headEntry->getNext());
headEntry->setNext(entry);
- numCollisions++;
}
- entry->theItem = oldentry->theItem;
- entry->theValue = oldentry->theValue;
entry->unsetFree();
+ entry->key() = oldentry->key();
+ entry->value() = oldentry->value();
}
}
=== modified file 'src/zorbautils/hashset.h'
--- src/zorbautils/hashset.h 2012-09-17 00:36:37 +0000
+++ src/zorbautils/hashset.h 2012-09-19 12:44:24 +0000
@@ -45,16 +45,16 @@
typedef typename HashMap<T, DummyHashValue, C>::iterator iterator;
-HashSet(const C& compFunction, ulong size, bool sync, bool useTransfer = false)
+HashSet(const C& compFunction, ulong size, bool sync)
:
- HashMap<T, DummyHashValue, C>(compFunction, size, sync, useTransfer)
+ HashMap<T, DummyHashValue, C>(compFunction, size, sync)
{
}
HashSet(ulong size, bool sync)
:
- HashMap<T, DummyHashValue, C>(size, sync, false)
+ HashMap<T, DummyHashValue, C>(size, sync)
{
}
@@ -100,12 +100,7 @@
if (!found)
{
- /*
- if (this->theUseTransfer)
- entry->theItem.transfer(item);
- else
- */
- entry->theItem = item;
+ entry->key() = item;
}
return !found;
@@ -129,19 +124,13 @@
entry = this->hashInsert(item, this->hash(item), found);
if (!found)
{
- /*
- if (this->theUseTransfer)
- entry->theItem.transfer(item);
- else
- */
- entry->theItem = item;
-
- outItem = entry->theItem;
+ entry->key() = item;
+ outItem = entry->key();
return true;
}
else
{
- outItem = entry->theItem;
+ outItem = entry->key();
return false;
}
}
@@ -164,13 +153,13 @@
entry = this->hashInsert(item, this->hash(item), found);
if (!found)
{
- entry->theItem = item;
- outItem = entry->theItem;
+ entry->key() = item;
+ outItem = entry->key();
return true;
}
else
{
- outItem = entry->theItem;
+ outItem = entry->key();
return false;
}
}
Follow ups