zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #11643
[Merge] lp:~zorba-coders/zorba/markos-scratch into lp:zorba
Markos Zaharioudakis has proposed merging lp:~zorba-coders/zorba/markos-scratch into lp:zorba.
Requested reviews:
Markos Zaharioudakis (markos-za)
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/markos-scratch/+merge/112827
Optimized hash function used for nodes (fixes bug #1010051) + some hashmap/hashset cleanup
--
https://code.launchpad.net/~zorba-coders/zorba/markos-scratch/+merge/112827
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'ChangeLog'
--- ChangeLog 2012-06-29 13:25:20 +0000
+++ ChangeLog 2012-06-29 18:25:24 +0000
@@ -17,6 +17,7 @@
* Improved hoist rule: tighter hoisting of expressions (also fixes bug #967428,
which is only a performance bug)
* Optimized hash sets used by fn:distinct-values and nodes-distinct
+ * Optimized hash function used for nodes (also fixes bug #1010051)
* No more serialization of compiler expressions.
* Big rewrite of plan serializer internals, resulting in big performance improvement.
=== modified file 'src/runtime/sequences/SequencesImpl.cpp'
--- src/runtime/sequences/SequencesImpl.cpp 2012-06-28 04:14:03 +0000
+++ src/runtime/sequences/SequencesImpl.cpp 2012-06-29 18:25:24 +0000
@@ -48,7 +48,6 @@
#include "store/api/iterator.h"
#include "store/api/item_factory.h"
#include "store/api/pul.h"
-#include "store/util/hashset_node_handle.h"
#include "context/static_context.h"
#include "context/dynamic_context.h"
=== modified file 'src/runtime/sequences/SequencesImpl.h'
--- src/runtime/sequences/SequencesImpl.h 2012-06-28 04:14:03 +0000
+++ src/runtime/sequences/SequencesImpl.h 2012-06-29 18:25:24 +0000
@@ -34,13 +34,6 @@
namespace zorba
{
-namespace store
-{
- class NodeHashSet;
-}
-
-class ValueCollCompareParam;
-
/////////////////////////////////////////////////////////////////////////////////
// //
=== modified file 'src/runtime/sequences/pregenerated/sequences.h'
--- src/runtime/sequences/pregenerated/sequences.h 2012-06-28 04:14:03 +0000
+++ src/runtime/sequences/pregenerated/sequences.h 2012-06-29 18:25:24 +0000
@@ -34,9 +34,7 @@
namespace zorba {
-namespace store{
- class NodeHashSet;
-}
+class NodeHandleHashSet;
class AtomicItemHandleHashSet;
/**
*
@@ -708,7 +706,7 @@
class HashSemiJoinIteratorState : public PlanIteratorState
{
public:
- store::NodeHashSet* theRightInput; //
+ NodeHandleHashSet* theRightInput; //
HashSemiJoinIteratorState();
=== modified file 'src/runtime/sequences/sequences_impl.cpp'
--- src/runtime/sequences/sequences_impl.cpp 2012-06-28 04:14:03 +0000
+++ src/runtime/sequences/sequences_impl.cpp 2012-06-29 18:25:24 +0000
@@ -53,10 +53,10 @@
#include <store/api/item_factory.h>
#include "store/api/temp_seq.h"
#include <store/api/pul.h>
-#include <store/util/hashset_node_handle.h>
#include <context/static_context.h>
+#include "zorbautils/hashset_node_itemh.h"
#include "zorbautils/hashset_atomic_itemh.h"
namespace zorbatm = zorba::time;
@@ -1106,7 +1106,7 @@
********************************************************************************/
HashSemiJoinIteratorState::HashSemiJoinIteratorState()
{
- theRightInput = new store::NodeHashSet();
+ theRightInput = new NodeHandleHashSet(1024, false);
}
=== modified file 'src/runtime/spec/sequences/sequences.xml'
--- src/runtime/spec/sequences/sequences.xml 2012-06-28 04:14:03 +0000
+++ src/runtime/spec/sequences/sequences.xml 2012-06-29 18:25:24 +0000
@@ -15,7 +15,7 @@
<zorba:header>
<zorba:include form="Quoted">runtime/base/narybase.h</zorba:include>
<zorba:include form="Quoted">runtime/core/path_iterators.h</zorba:include>
- <zorba:fwd-decl ns="store">NodeHashSet</zorba:fwd-decl>
+ <zorba:fwd-decl ns="zorba">NodeHandleHashSet</zorba:fwd-decl>
<zorba:fwd-decl ns="zorba">AtomicItemHandleHashSet</zorba:fwd-decl>
</zorba:header>
@@ -680,7 +680,7 @@
<zorba:state generateInit="false" generateReset="false"
generateConstructor="false" generateDestructor="false">
- <zorba:member type="store::NodeHashSet*" name="theRightInput" brief=""/>
+ <zorba:member type="NodeHandleHashSet*" name="theRightInput" brief=""/>
</zorba:state>
<zorba:constructor>
=== modified file 'src/store/naive/node_items.cpp'
--- src/store/naive/node_items.cpp 2012-06-28 04:14:03 +0000
+++ src/store/naive/node_items.cpp 2012-06-29 18:25:24 +0000
@@ -471,9 +471,29 @@
/*******************************************************************************
********************************************************************************/
-void XmlNode::setTreeInternal(const XmlTree* aNewTree)
-{
- theUnion.treeRCPtr = (long*)aNewTree;
+bool XmlNode::equals(const store::Item* other, long, const XQPCollator*) const
+{
+ assert(!isConnectorNode());
+ return this == other;
+}
+
+
+/*******************************************************************************
+
+********************************************************************************/
+uint32_t XmlNode::hash(long, const XQPCollator*) const
+{
+ assert(!isConnectorNode());
+ return reinterpret_cast<uintptr_t>(this);
+}
+
+
+/*******************************************************************************
+
+********************************************************************************/
+void XmlNode::setTreeInternal(const XmlTree* newTree)
+{
+ theUnion.treeRCPtr = (long*)newTree;
}
=== modified file 'src/store/naive/node_items.h'
--- src/store/naive/node_items.h 2012-06-28 04:14:03 +0000
+++ src/store/naive/node_items.h 2012-06-29 18:25:24 +0000
@@ -387,6 +387,7 @@
private:
void setTreeInternal(const XmlTree* t);
+
void setTree(const XmlTree* t);
void destroyInternal(bool removeType);
@@ -435,20 +436,9 @@
bool equals(
const store::Item* other,
long timezone = 0,
- const XQPCollator* aCollation = 0) const
- {
- assert(!isConnectorNode());
- return this == other;
- }
-
- uint32_t hash(long timezone = 0, const XQPCollator* aCollation = 0) const
- {
- assert(!isConnectorNode());
- XmlNode* node = const_cast<XmlNode*>(this);
- return hashfun::h32((void*)(&node), sizeof(node), FNV_32_INIT);
- }
-
- inline long compare2(const XmlNode* other) const;
+ const XQPCollator* aCollation = 0) const;
+
+ uint32_t hash(long timezone = 0, const XQPCollator* aCollation = 0) const;
void getBaseURI(zstring& uri) const
{
@@ -524,6 +514,8 @@
GuideNode* getDataGuide() const { return getTree()->getDataGuide(); }
#endif
+ inline long compare2(const XmlNode* other) const;
+
virtual XmlNode* copyInternal(
InternalNode* rootParent,
InternalNode* parent,
=== removed file 'src/store/util/hashset_node_handle.h'
--- src/store/util/hashset_node_handle.h 2012-06-28 04:14:03 +0000
+++ src/store/util/hashset_node_handle.h 1970-01-01 00:00:00 +0000
@@ -1,70 +0,0 @@
-/*
- * Copyright 2006-2008 The FLWOR Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ZORBA_STORE_UTIL_NODE_HANDLE_HASHSET
-#define ZORBA_STORE_UTIL_NODE_HANDLE_HASHSET
-
-#include "common/common.h"
-#include "zorbautils/hashset.h"
-
-namespace zorba { namespace store {
-
-
-/*******************************************************************************
- A hash-based set container of node rchandles, where equality is based on
- node identity
-********************************************************************************/
-class NodeHashSet
-{
-public:
-
- class CompareFunction
- {
- public:
- bool equal(const Item_t& t1, const Item_t& t2) const
- {
- return t1->equals(t2, 0);
- }
-
- uint32_t hash(const Item_t& t) const
- {
- return t->hash(0);
- }
- };
-
-private:
- HashSet<Item_t, CompareFunction> theSet;
-
-public:
- NodeHashSet(ulong size = 1024) : theSet(size, false)
- {
- }
-
- void clear() { theSet.clear(); }
-
- bool empty() const { return theSet.empty(); }
-
- bool exists(const Item_t& key) { return theSet.exists(key); }
-
- bool insert(Item_t& key) { return theSet.insert(key); }
-
- bool erase(const Item_t& key) { return theSet.erase(key); }
-};
-
-} // namespace store
-} // namespace zorba
-
-#endif
-/* vim:set et sw=2 ts=2: */
=== modified file 'src/zorbautils/hashmap.h'
--- src/zorbautils/hashmap.h 2012-06-28 04:14:03 +0000
+++ src/zorbautils/hashmap.h 2012-06-29 18:25:24 +0000
@@ -311,6 +311,15 @@
/*******************************************************************************
********************************************************************************/
+Mutex* get_mutex() const
+{
+ return theMutexp;
+}
+
+
+/*******************************************************************************
+
+********************************************************************************/
void set_load_factor(double v)
{
theLoadFactor = v;
@@ -410,13 +419,13 @@
Return true if the set already contains an item that is "equal" to the given
item; otherwise return false.
********************************************************************************/
-bool exists(const T& item)
+bool exists(const T& item) const
{
ulong hval = hash(item);
SYNC_CODE(AutoMutex lock(theMutexp);)
- HashEntry<T, V>* entry = bucket(hval);
+ const HashEntry<T, V>* entry = bucket(hval);
if (entry->isFree())
return false;
=== modified file 'src/zorbautils/hashmap_itemh.h'
--- src/zorbautils/hashmap_itemh.h 2012-06-28 04:14:03 +0000
+++ src/zorbautils/hashmap_itemh.h 2012-06-29 18:25:24 +0000
@@ -62,14 +62,24 @@
};
+/*******************************************************************************
+ A hash-based map mapping item handles to data items of type V. Equality is
+ based on the store::Item::equals() method.
+
+ It is used to map annotation names to annotation ids.
+
+ NOTE: Although the map uses raw item pointers instead of rchandles, reference
+ counting is still done, but done manually (see insert and clear methods)
+********************************************************************************/
template <class V>
-class ItemHandleHashMap : public HashMap<store::Item*,
- V,
- ItemHandleHashMapCmp>
+class ItemHandleHashMap
{
public:
typedef typename HashMap<store::Item*, V, ItemHandleHashMapCmp>::iterator iterator;
+private:
+ HashMap<store::Item*, V, ItemHandleHashMapCmp> theMap;
+
public:
ItemHandleHashMap(
long timezone,
@@ -77,27 +87,18 @@
ulong size,
bool sync)
:
- HashMap<store::Item*, V, ItemHandleHashMapCmp>(
- ItemHandleHashMapCmp(timezone, collation),
- size,
- sync)
+ theMap(ItemHandleHashMapCmp(timezone, collation), size, sync)
{
}
~ItemHandleHashMap()
{
- iterator ite = this->begin();
- iterator end = this->end();
-
- for (; ite != end; ++ite)
- {
- (*ite).first->removeReference();
- }
+ clear();
}
void clear()
{
- SYNC_CODE(AutoMutex lock(this->theMutexp);)
+ SYNC_CODE(AutoMutex lock(theMap.get_mutex());)
iterator ite = this->begin();
iterator end = this->end();
@@ -107,33 +108,38 @@
(*ite).first->removeReference();
}
- HashMap<store::Item*, V, ItemHandleHashMapCmp>::clearNoSync();
+ theMap.clearNoSync();
}
- void eraseEntry(
- HashEntry<store::Item*, V>* entry,
- HashEntry<store::Item*, V>* preventry)
- {
- entry->theItem->removeReference();
+ iterator begin() const { return theMap.begin(); }
- HashMap<store::Item*, V, ItemHandleHashMapCmp>::eraseEntry(entry, preventry);
- }
+ iterator end() const { return theMap.end(); }
iterator find(const store::Item_t& item)
{
- return HashMap<store::Item*, V, ItemHandleHashMapCmp>::find(item.getp());
+ return theMap.find(item.getp());
}
bool insert(const store::Item_t& item, V& value)
{
- bool inserted =
- HashMap<store::Item*, V, ItemHandleHashMapCmp>::insert(item.getp(), value);
+ bool inserted = theMap.insert(item.getp(), value);
if (inserted)
item->addReference();
return inserted;
}
+
+ bool erase(const store::Item_t& key)
+ {
+ bool found = theMap.erase(key.getp());
+
+ if (found)
+ key->removeReference();
+
+ return found;
+ }
+
};
}
=== removed file 'src/zorbautils/hashmap_str_obj.h'
--- src/zorbautils/hashmap_str_obj.h 2012-06-28 04:14:03 +0000
+++ src/zorbautils/hashmap_str_obj.h 1970-01-01 00:00:00 +0000
@@ -1,81 +0,0 @@
-/*
- * Copyright 2006-2008 The FLWOR Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-#ifndef ZORBA_HASHMAP_CHAR_PTR_TO_CLASS_POINTER
-#define ZORBA_HASHMAP_CHAR_PTR_TO_CLASS_POINTER
-
-#include "zorbautils/hashmap.h"
-#include "zorbautils/hashfun.h"
-
-namespace zorba {
-
-class CompareCharPtr
-{
-public:
- static uint32_t hash(const char* str)
- {
- return hashfun::h32(str, FNV_32_INIT);
- }
-
- static bool equal(const char* s1, const char* s2)
- {
- return !strcmp(s1, s2);
- }
-};
-
-
-template<typename T>
-class HashCharPtrObj : public HashMap<const char *, T, CompareCharPtr>
-{
-protected:
- bool theIsOwner;
-
-public:
- HashCharPtrObj(bool sync, bool owner)
- :
- HashMap<const char *, T, CompareCharPtr>(1024, sync),
- theIsOwner(owner)
- {
- }
-
- virtual ~HashCharPtrObj()
- {
- if (theIsOwner)
- freeAll();
- }
-
- void freeAll()
- {
- typename HashMap<const char *, T, CompareCharPtr>::iterator it;
- for(it = HashMap<const char *, T, CompareCharPtr>::begin();
- it != HashMap<const char *, T, CompareCharPtr>::end();
- ++it)
- {
- free((void*)(*it).first);
- }
- }
-};
-
-
-} // namespace zorba
-
-#endif
-/*
- * Local variables:
- * mode: c++
- * End:
- */
-/* vim:set et sw=2 ts=2: */
=== modified file 'src/zorbautils/hashset.h'
--- src/zorbautils/hashset.h 2012-06-28 04:14:03 +0000
+++ src/zorbautils/hashset.h 2012-06-29 18:25:24 +0000
@@ -77,7 +77,7 @@
Return true if the set already contains an item that is "equal" to the given
item; otherwise return false.
********************************************************************************/
-bool exists(const T& item)
+bool exists(const T& item) const
{
return HashMap<T, DummyHashValue, C>::exists(item);
}
=== modified file 'src/zorbautils/hashset_node_itemh.h'
--- src/zorbautils/hashset_node_itemh.h 2012-06-28 13:52:31 +0000
+++ src/zorbautils/hashset_node_itemh.h 2012-06-29 18:25:24 +0000
@@ -31,8 +31,10 @@
A hash-based set container of item handles, where equality is based on
object identity (i.e. pointer equality) rather than object value.
- It is used by the NodeDistinctIterator and for the population of a general
- index.
+ It is used
+ 1. by the NodeDistinctIterator
+ 2. for the population of a general index.
+ 3. by the HashSemiJoinIterator, which implements op:intersect and op:except.
NOTE: Although the set uses raw item pointers instead of rchandles, reference
counting is still done, but done manually (see insert and clear methods)
@@ -65,16 +67,20 @@
void clear();
- bool exists(store::Item* const key)
+ bool empty() const { return theSet.empty(); }
+
+ bool exists(const store::Item_t& key) const
{
- return theSet.exists(key);
+ return theSet.exists(key.getp());
}
- bool insert(store::Item* key)
+ bool insert(const store::Item_t& key)
{
assert(key->isNode());
- bool inserted = theSet.insert(key);
+ store::Item* key2 = key.getp();
+
+ bool inserted = theSet.insert(key2);
if (inserted)
{
@@ -82,6 +88,16 @@
}
return inserted;
}
+
+ bool erase(const store::Item_t& key)
+ {
+ bool found = theSet.erase(key.getp());
+
+ if (found)
+ key->removeReference();
+
+ return found;
+ }
};
Follow ups