← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/opt-count-index-probe into lp:zorba

 

Matthias Brantner has proposed merging lp:~zorba-coders/zorba/opt-count-index-probe into lp:zorba.

Requested reviews:
  Matthias Brantner (matthias-brantner)

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/opt-count-index-probe/+merge/105528

push-down of count(probe-index()) into the store. The optimization is done in codegen. In the simplestore, the operation is still implemented by iterating over all the items because it cannot be pushed-down further. However, other stores might provide more efficient implementations.
-- 
https://code.launchpad.net/~zorba-coders/zorba/opt-count-index-probe/+merge/105528
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/functions/func_sequences_impl.cpp'
--- src/functions/func_sequences_impl.cpp	2012-05-03 12:31:51 +0000
+++ src/functions/func_sequences_impl.cpp	2012-05-11 20:41:18 +0000
@@ -26,6 +26,7 @@
 
 #include "runtime/collections/collections_impl.h"
 #include "runtime/collections/collections.h"
+#include "runtime/indexing/index_ddl.h"
 
 #include "system/globalenv.h"
 
@@ -566,6 +567,38 @@
                                        collection.getChildren(),
                                        CountCollectionIterator::W3C);
   }
+  else if (typeid(ProbeIndexPointValueIterator) == counted_type)
+  {
+    ProbeIndexPointValueIterator& lIter
+      = static_cast<ProbeIndexPointValueIterator&>(*argv[0]);
+
+    return new ProbeIndexPointValueIterator(
+        sctx, loc, lIter.getChildren(), true);
+  }
+  else if (typeid(ProbeIndexRangeValueIterator) == counted_type)
+  {
+    ProbeIndexRangeValueIterator& lIter
+      = static_cast<ProbeIndexRangeValueIterator&>(*argv[0]);
+
+    return new ProbeIndexRangeValueIterator(
+        sctx, loc, lIter.getChildren(), true);
+  }
+  else if (typeid(ProbeIndexPointGeneralIterator) == counted_type)
+  {
+    ProbeIndexPointGeneralIterator& lIter
+      = static_cast<ProbeIndexPointGeneralIterator&>(*argv[0]);
+
+    return new ProbeIndexPointGeneralIterator(
+        sctx, loc, lIter.getChildren(), true);
+  }
+  else if (typeid(ProbeIndexRangeGeneralIterator) == counted_type)
+  {
+    ProbeIndexRangeGeneralIterator& lIter
+      = static_cast<ProbeIndexRangeGeneralIterator&>(*argv[0]);
+
+    return new ProbeIndexRangeGeneralIterator(
+        sctx, loc, lIter.getChildren(), true);
+  }
   else
   {
     return new FnCountIterator(sctx, loc, argv);

=== modified file 'src/runtime/indexing/index_ddl.cpp'
--- src/runtime/indexing/index_ddl.cpp	2012-05-03 12:31:51 +0000
+++ src/runtime/indexing/index_ddl.cpp	2012-05-11 20:41:18 +0000
@@ -579,11 +579,13 @@
 ProbeIndexPointValueIterator::ProbeIndexPointValueIterator(
     static_context* sctx,
     const QueryLoc& loc,
-    std::vector<PlanIter_t>& children)
+    std::vector<PlanIter_t>& children,
+    bool aCountOnly)
   : 
   NaryBaseIterator<ProbeIndexPointValueIterator,
                    ProbeIndexPointValueIteratorState>(sctx, loc, children),
-  theCheckKeyType(true)
+  theCheckKeyType(true),
+  theCountOnly(aCountOnly)
 {
 }
 
@@ -682,10 +684,19 @@
     if (i == numChildren)
     {
       state->theIterator->init(cond);
-      state->theIterator->open();
-      
-      while(state->theIterator->next(result)) 
-      {
+
+      if (!theCountOnly)
+      {
+        state->theIterator->open();
+        
+        while(state->theIterator->next(result)) 
+        {
+          STACK_PUSH(true, state);
+        }
+      }
+      else
+      {
+        state->theIterator->count(result);
         STACK_PUSH(true, state);
       }
     }
@@ -738,11 +749,13 @@
 ProbeIndexPointGeneralIterator::ProbeIndexPointGeneralIterator(
     static_context* sctx,
     const QueryLoc& loc,
-    std::vector<PlanIter_t>& children)
+    std::vector<PlanIter_t>& children,
+    bool aCountOnly)
   : 
   NaryBaseIterator<ProbeIndexPointGeneralIterator,
                    ProbeIndexPointGeneralIteratorState>(sctx, loc, children),
-  theCheckKeyType(true)
+  theCheckKeyType(true),
+  theCountOnly(aCountOnly)
 {
 }
 
@@ -758,6 +771,7 @@
   (NaryBaseIterator<ProbeIndexPointGeneralIterator,
                     ProbeIndexPointGeneralIteratorState>*)this);
 	ar & theCheckKeyType;
+  ar & theCountOnly;
 }
 
 
@@ -933,11 +947,13 @@
 ProbeIndexRangeValueIterator::ProbeIndexRangeValueIterator(
     static_context* sctx,
     const QueryLoc& loc,
-    std::vector<PlanIter_t>& children)
+    std::vector<PlanIter_t>& children,
+    bool aCountOnly)
   : 
   NaryBaseIterator<ProbeIndexRangeValueIterator,
                    ProbeIndexRangeValueIteratorState>(sctx, loc, children),
-  theCheckKeyType(true)
+  theCheckKeyType(true),
+  theCountOnly(aCountOnly)
 {
 }
 
@@ -954,6 +970,7 @@
                     ProbeIndexRangeValueIteratorState>*)this);
 
   ar & theCheckKeyType;
+  ar & theCountOnly;
 }
 
 
@@ -1114,10 +1131,18 @@
   }
 
   state->theIterator->init(cond);
-  state->theIterator->open();
+  if (!theCountOnly)
+  {
+    state->theIterator->open();
 
-  while(state->theIterator->next(result)) 
+    while(state->theIterator->next(result)) 
+    {
+      STACK_PUSH(true, state);
+    }
+  }
+  else
   {
+    state->theIterator->count(result);
     STACK_PUSH(true, state);
   }
 
@@ -1188,11 +1213,13 @@
 ProbeIndexRangeGeneralIterator::ProbeIndexRangeGeneralIterator(
     static_context* sctx,
     const QueryLoc& loc,
-    std::vector<PlanIter_t>& children)
+    std::vector<PlanIter_t>& children,
+    bool aCountOnly)
   : 
   NaryBaseIterator<ProbeIndexRangeGeneralIterator,
                    ProbeIndexRangeGeneralIteratorState>(sctx, loc, children),
-  theCheckKeyType(true)
+  theCheckKeyType(true),
+  theCountOnly(aCountOnly)
 {
 }
 
@@ -1209,6 +1236,7 @@
                     ProbeIndexRangeGeneralIteratorState>*)this);
 
   ar & theCheckKeyType;
+  ar & theCountOnly;
 }
 
 
@@ -1374,15 +1402,24 @@
       cond->pushBound(*state->theSearchItemsIte, false, inclUpper);
 
       state->theIterator->init(cond);
-      state->theIterator->open();
-
-      while(state->theIterator->next(result)) 
-      {
-        if (state->theNodeHashSet->exists(result))
-          STACK_PUSH(true, state);
-      }
-      
-      state->theIterator->close();
+
+      if (!theCountOnly)
+      {
+        state->theIterator->open();
+
+        while(state->theIterator->next(result)) 
+        {
+          if (state->theNodeHashSet->exists(result))
+            STACK_PUSH(true, state);
+        }
+        
+        state->theIterator->close();
+      }
+      else
+      {
+        state->theIterator->count(result);
+        STACK_PUSH(true, state);
+      }
     }
   }
 
@@ -1407,14 +1444,22 @@
       cond->pushBound(*state->theSearchItemsIte, haveLower, inclBound);
       
       state->theIterator->init(cond);
-      state->theIterator->open();
-
-      while(state->theIterator->next(result)) 
-      {
+      if (!theCountOnly)
+      {
+        state->theIterator->open();
+
+        while(state->theIterator->next(result)) 
+        {
+          STACK_PUSH(true, state);
+        }
+
+        state->theIterator->close();
+      }
+      else
+      {
+        state->theIterator->count(result);
         STACK_PUSH(true, state);
       }
-
-      state->theIterator->close();
     }
   }
 
@@ -1423,14 +1468,22 @@
     getSearchItems(planState, state, false, false, false, false);
 
     state->theIterator->init(cond);
-    state->theIterator->open();
-
-    while(state->theIterator->next(result)) 
-    {
+    if (!theCountOnly)
+    {
+      state->theIterator->open();
+
+      while(state->theIterator->next(result)) 
+      {
+        STACK_PUSH(true, state);
+      }
+
+      state->theIterator->close();
+    }
+    else
+    {
+      state->theIterator->count(result);
       STACK_PUSH(true, state);
     }
-
-    state->theIterator->close();
   }
 
  done:

=== modified file 'src/runtime/indexing/index_ddl.h'
--- src/runtime/indexing/index_ddl.h	2012-05-03 12:31:51 +0000
+++ src/runtime/indexing/index_ddl.h	2012-05-11 20:41:18 +0000
@@ -370,6 +370,7 @@
 {
 protected:
   bool theCheckKeyType;
+  bool theCountOnly;
 
 public:
   SERIALIZABLE_CLASS(ProbeIndexPointValueIterator);
@@ -388,7 +389,8 @@
   ProbeIndexPointValueIterator(
         static_context* sctx,
         const QueryLoc& loc,
-        std::vector<PlanIter_t>& children);
+        std::vector<PlanIter_t>& children,
+        bool aCountOnly = false);
 
   ~ProbeIndexPointValueIterator();
 
@@ -425,6 +427,7 @@
 {
 protected:
   bool theCheckKeyType;
+  bool theCountOnly;
 
 public:
   SERIALIZABLE_CLASS(ProbeIndexPointGeneralIterator);
@@ -438,7 +441,8 @@
   ProbeIndexPointGeneralIterator(
       static_context* sctx,
       const QueryLoc& loc,
-      std::vector<PlanIter_t>& children);
+      std::vector<PlanIter_t>& children,
+      bool aCountOnly = false);
   
   ~ProbeIndexPointGeneralIterator();
 
@@ -495,6 +499,7 @@
 {
 protected:
   bool theCheckKeyType;
+  bool theCountOnly;
 
 public:
   SERIALIZABLE_CLASS(ProbeIndexRangeValueIterator);
@@ -508,7 +513,8 @@
   ProbeIndexRangeValueIterator(
       static_context* sctx,
       const QueryLoc& loc,
-      std::vector<PlanIter_t>& children);
+      std::vector<PlanIter_t>& children,
+      bool aCountOnly = false);
 
   ~ProbeIndexRangeValueIterator();
 
@@ -566,6 +572,7 @@
 {
 protected:
   bool theCheckKeyType;
+  bool theCountOnly;
 
 public:
   SERIALIZABLE_CLASS(ProbeIndexRangeGeneralIterator);
@@ -579,7 +586,8 @@
   ProbeIndexRangeGeneralIterator(
       static_context* sctx,
       const QueryLoc& loc,
-      std::vector<PlanIter_t>& children);
+      std::vector<PlanIter_t>& children,
+      bool aCountOnly = false);
   
   ~ProbeIndexRangeGeneralIterator();
 

=== modified file 'src/store/api/iterator.h'
--- src/store/api/iterator.h	2012-05-03 12:31:51 +0000
+++ src/store/api/iterator.h	2012-05-11 20:41:18 +0000
@@ -187,6 +187,8 @@
   virtual void reset() = 0;
   
   virtual void close() = 0;
+
+  virtual void count(Item_t& result) = 0;
 };
 
 

=== modified file 'src/store/naive/simple_index_general.cpp'
--- src/store/naive/simple_index_general.cpp	2012-05-03 12:31:51 +0000
+++ src/store/naive/simple_index_general.cpp	2012-05-11 20:41:18 +0000
@@ -2080,6 +2080,24 @@
 }
 
 
+/******************************************************************************
+ The implementation here doesn't really give anything in terms of
+ performance but other implementations might be able to provide more
+ efficient ones.
+********************************************************************************/
+void ProbeGeneralIndexIterator::count(store::Item_t& result)
+{
+  xs_integer lRes = xs_integer(0);
+
+  open();
+  store::Item_t lTmp;
+  while (next(lTmp)) ++lRes;
+  close();
+
+  GET_FACTORY().createInteger(result, lRes);
+}
+
+
 /////////////////////////////////////////////////////////////////////////////////
 //                                                                             //
 //  ProbeHashGeneralIndexIterator                                              //

=== modified file 'src/store/naive/simple_index_general.h'
--- src/store/naive/simple_index_general.h	2012-05-03 12:31:51 +0000
+++ src/store/naive/simple_index_general.h	2012-05-11 20:41:18 +0000
@@ -437,6 +437,8 @@
   void close();
 
   bool next(store::Item_t& result);
+
+  void count(store::Item_t& result);
 };
 
 

=== modified file 'src/store/naive/simple_index_value.cpp'
--- src/store/naive/simple_index_value.cpp	2012-05-03 12:31:51 +0000
+++ src/store/naive/simple_index_value.cpp	2012-05-11 20:41:18 +0000
@@ -18,6 +18,9 @@
 #include <algorithm>
 
 #include "simple_index_value.h"
+#include "store_defs.h"
+#include "simple_store.h"
+#include "simple_item_factory.h"
 
 #include "diagnostics/xquery_diagnostics.h"
 #include "diagnostics/util_macros.h"
@@ -531,6 +534,24 @@
 }
 
 
+/******************************************************************************
+ The implementation here doesn't really give anything in terms of
+ performance but other implementations might be able to provide more
+ efficient ones.
+********************************************************************************/
+void ProbeValueHashIndexIterator::count(store::Item_t& result)
+{
+  xs_integer lRes = xs_integer(0);
+
+  open();
+  store::Item_t lTmp;
+  while (next(lTmp)) ++lRes;
+  close();
+
+  GET_FACTORY().createInteger(result, lRes);
+}
+
+
 /////////////////////////////////////////////////////////////////////////////////
 //                                                                             //
 //  Value Tree Index                                                           //
@@ -999,6 +1020,24 @@
 }
 
 
+/******************************************************************************
+ The implementation here doesn't really give anything in terms of
+ performance but other implementations might be able to provide more
+ efficient ones.
+********************************************************************************/
+void ProbeValueTreeIndexIterator::count(store::Item_t& result)
+{
+  xs_integer lRes = xs_integer(0);
+
+  open();
+  store::Item_t lTmp;
+  while (next(lTmp)) ++lRes;
+  close();
+
+  GET_FACTORY().createInteger(result, lRes);
+}
+
+
 } // namespace simplestore
 } // namespace zorba
 /* vim:set et sw=2 ts=2: */

=== modified file 'src/store/naive/simple_index_value.h'
--- src/store/naive/simple_index_value.h	2012-05-03 12:31:51 +0000
+++ src/store/naive/simple_index_value.h	2012-05-11 20:41:18 +0000
@@ -187,6 +187,8 @@
   void reset();
 
   void close();
+
+  void count(store::Item_t& result);
 };
 
 
@@ -289,6 +291,8 @@
   void reset();
 
   void close();
+
+  void count(store::Item_t& result);
 };
 
 

=== added file 'test/rbkt/ExpQueryResults/zorba/index/count.xml.res'
--- test/rbkt/ExpQueryResults/zorba/index/count.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/index/count.xml.res	2012-05-11 20:41:18 +0000
@@ -0,0 +1,1 @@
+1 4 4

=== added file 'test/rbkt/Queries/zorba/index/count.xq'
--- test/rbkt/Queries/zorba/index/count.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/index/count.xq	2012-05-11 20:41:18 +0000
@@ -0,0 +1,5 @@
+import module namespace def = "http://www.example.com/"; at "count.xqlib";
+
+def:init();
+
+def:count-user(), def:count-user-range(), def:count-user-general-range()

=== added file 'test/rbkt/Queries/zorba/index/count.xqlib'
--- test/rbkt/Queries/zorba/index/count.xqlib	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/index/count.xqlib	2012-05-11 20:41:18 +0000
@@ -0,0 +1,76 @@
+xquery version "3.0";
+
+module namespace def = "http://www.example.com/";;
+
+import module namespace ddl = "http://www.zorba-xquery.com/modules/store/static/collections/ddl";;
+import module namespace dml = "http://www.zorba-xquery.com/modules/store/static/collections/dml";;
+import module namespace index_ddl = "http://www.zorba-xquery.com/modules/store/static/indexes/ddl";;
+import module namespace index_dml = "http://www.zorba-xquery.com/modules/store/static/indexes/dml";;
+
+declare namespace ann = "http://www.zorba-xquery.com/annotations";;
+
+declare collection def:user as node()*;
+
+declare variable $def:user := xs:QName("def:user");
+ 
+declare %ann:automatic %ann:unique %ann:value-equality index def:user-by-uid
+  on nodes dml:collection(xs:QName("def:user"))
+  by xs:string(@uid) as xs:string;
+
+declare variable $def:user-by-uid := xs:QName("def:user-by-uid");
+ 
+declare %ann:automatic %ann:value-range index def:user-by-uid-range
+  on nodes dml:collection(xs:QName("def:user"))
+  by xs:string(@uid) as xs:string;
+
+declare variable $def:user-by-uid-range := xs:QName("def:user-by-uid-range");
+
+declare %ann:manual %ann:general-range index def:user-by-uid-general-range
+  on nodes dml:collection(xs:QName("def:user"))
+  by xs:string(@uid) as xs:string;
+
+declare variable $def:user-by-uid-general-range := xs:QName("def:user-by-uid-general-range");
+
+declare %ann:sequential function def:init()
+{
+  ddl:create($def:user);
+
+  index_ddl:create($def:user-by-uid);
+  index_ddl:create($def:user-by-uid-range);
+  index_ddl:create($def:user-by-uid-general-range);
+
+  dml:insert-nodes($def:user,
+    (
+      <user uid='1'>Matthias</user>,
+      <user uid='2'>David</user>,
+      <user uid='3'>Gabriel</user>,
+      <user uid='4'>William</user>
+    )
+  );
+
+  index_dml:refresh-index($def:user-by-uid-general-range);
+};
+
+
+declare function def:count-user()
+{
+  count(index_dml:probe-index-point-value(xs:QName("def:user-by-uid"), "1"))
+};
+
+declare function def:count-user-range()
+{
+  count(
+   index_dml:probe-index-range-value(
+    xs:QName("def:user-by-uid-range"),
+    "1", "4", fn:true(), fn:true(), fn:true(), fn:true())
+  )
+};
+
+declare function def:count-user-general-range()
+{
+  count(
+   index_dml:probe-index-range-general(
+    xs:QName("def:user-by-uid-general-range"),
+    "1", "4", fn:true(), fn:true(), fn:true(), fn:true())
+  )
+};


Follow ups