← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/markos1 into lp:zorba

 

Markos Zaharioudakis has proposed merging lp:~zorba-coders/zorba/markos1 into lp:zorba.

Requested reviews:
  Matthias Brantner (matthias-brantner)
  Markos Zaharioudakis (markos-za)

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/markos1/+merge/79723

optimization: store the udf plan size in the udf object so that it doesn't have to be recomputed every time the udf is invoked
-- 
https://code.launchpad.net/~zorba-coders/zorba/markos1/+merge/79723
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/functions/udf.cpp'
--- src/functions/udf.cpp	2011-08-02 20:09:05 +0000
+++ src/functions/udf.cpp	2011-10-18 17:52:26 +0000
@@ -53,7 +53,8 @@
   theScriptingKind(scriptingKind),
   theIsExiting(false),
   theIsLeaf(true),
-  theIsOptimized(false)
+  theIsOptimized(false),
+  thePlanStateSize(0)
 {
   setFlag(FunctionConsts::isUDF);
   resetFlag(FunctionConsts::isBuiltin);
@@ -115,7 +116,8 @@
         getPlan(ar.compiler_cb);
       }
 #else
-      getPlan(ar.compiler_cb);
+      uint32_t planStateSize;
+      getPlan(ar.compiler_cb, planStateSize);
 #endif
     }
     catch(...)
@@ -144,6 +146,7 @@
   //ar.set_is_temp_field(false);
   //if(save_plan)
   ar & thePlan;
+  ar & thePlanStateSize;
   ar & theArgVarsRefs;
 }
 
@@ -315,7 +318,7 @@
 /*******************************************************************************
 
 ********************************************************************************/
-PlanIter_t user_function::getPlan(CompilerCB* ccb)
+  PlanIter_t user_function::getPlan(CompilerCB* ccb, uint32_t& planStateSize)
 {
   if (thePlan == NULL)
   {
@@ -325,23 +328,25 @@
       theIsOptimized = true;
 
       expr_t body = getBody();
+
       RewriterContext rctx(ccb,
                            body,
                            this,
                            zstring(),
                            body->get_sctx()->is_in_ordered_mode());
+
       GENV_COMPILERSUBSYS.getDefaultOptimizingRewriter()->rewrite(rctx);
       body = rctx.getRoot();
       setBody(body);
     }
 
-    ulong numArgs = theArgVars.size();
+    csize numArgs = theArgVars.size();
 
     hash64map<std::vector<LetVarIter_t> *> argVarToRefsMap;
 
     theArgVarsRefs.resize(numArgs);
 
-    for (ulong i = 0; i < numArgs; ++i)
+    for (csize i = 0; i < numArgs; ++i)
     {
       argVarToRefsMap.put((uint64_t)&*theArgVars[i], &theArgVarsRefs[i]);
     }
@@ -357,6 +362,9 @@
                              nextVarId,
                              &argVarToRefsMap);
   }
+
+  planStateSize = thePlanStateSize;
+
   return thePlan;
 }
 

=== modified file 'src/functions/udf.h'
--- src/functions/udf.h	2011-06-14 17:26:33 +0000
+++ src/functions/udf.h	2011-10-18 17:52:26 +0000
@@ -40,29 +40,44 @@
   i.e., an fo_expr is created that points to the udf obj and also has a vector
   of pointers to the arg exprs appearing in the function call.
 
-  theLoc           : The query location where this udf is declared at.
-  theBodyExpr      : The expr tree representing what this function is doing.
-                     It is the result of translating the udf declaration (so
-                     for a udf with one or more params, it is the flwor expr
-                     described above). Note: translation of udf declarations
-                     includes normalization and optimization of the expr tree.
-  theArgVars       : The internally generated arg vars (the $xi_ vars described
-                     above)
-
-  theScriptingKind : The declared scripting kind of this udf. Notice that the
-                     getScriptingKind method will return the declared kind if 
-                     the body is NULL, but after the body has been translated,
-                     it will return the kind of the body expr.
-
-  theIsLeaf        : True if this udf does not invoke any other udfs
-
-  thePlan          :
-  theArgVarsRefs   : For each arg var, this vector stores the LetVarIterators 
-                     that represent the references to that var within the udf
-                     body. If there are more than one references of an arg var,
-                     these references are "mutually exclusive", ie, at most one
-                     of the references will actually be reached during each 
-                     particular execution of the body.
+  theLoc: 
+  -------
+  The query location where this udf is declared at.
+  
+  theBodyExpr:
+  ------------
+  The expr tree representing what this function is doing. It is the result of 
+  translating the udf declaration (so for a udf with one or more params, it is
+  the flwor expr described above). Note: translation of udf declarations
+  includes normalization and optimization of the expr tree.
+
+  theArgVars:
+  -----------
+  The internally generated arg vars (the $xi_ vars described above)
+
+  theScriptingKind:
+  -----------------
+  The declared scripting kind of this udf. Notice that the getScriptingKind 
+  method will return the declared kind if the body is NULL, but after the body
+  has been translated, it will return the kind of the body expr.
+
+  theIsLeaf:
+  ----------
+  True if this udf does not invoke any other udfs
+
+  thePlan:
+  --------
+
+  thePlanStateSize:
+  -----------------
+
+  theArgVarsRefs:
+  --------------- 
+  For each arg var, this vector stores the LetVarIterators that represent the 
+  references to that var within the udf body. If there are more than one 
+  references of an arg var, these references are "mutually exclusive", ie, 
+  at most one of the references will actually be reached during each particular
+  execution of the body.
 ********************************************************************************/
 class user_function : public function 
 {
@@ -84,6 +99,7 @@
   bool                        theIsOptimized;
 
   PlanIter_t                  thePlan;
+  uint32_t                    thePlanStateSize;
   std::vector<ArgVarRefs>     theArgVarsRefs;
 
 public:
@@ -93,11 +109,10 @@
 
 public:
   user_function(
-        const QueryLoc& loc,
-        const signature& sig,
-        expr_t expr_body,
-        short kind
-    );
+      const QueryLoc& loc,
+      const signature& sig,
+      expr_t expr_body,
+      short kind);
 
   virtual ~user_function();
 
@@ -139,8 +154,10 @@
 
   BoolAnnotationValue ignoresDuplicateNodes(expr* fo, ulong input) const;
 
-  PlanIter_t getPlan(CompilerCB *);
+  PlanIter_t getPlan(CompilerCB* cb, uint32_t& planStateSize);
   
+  void setPlaneStateSize(uint32_t size) { thePlanStateSize = size; }
+
   const std::vector<ArgVarRefs>& getArgVarsRefs() const;
 
   PlanIter_t codegen(

=== modified file 'src/runtime/core/fncall_iterator.cpp'
--- src/runtime/core/fncall_iterator.cpp	2011-10-10 09:04:35 +0000
+++ src/runtime/core/fncall_iterator.cpp	2011-10-18 17:52:26 +0000
@@ -28,6 +28,7 @@
 #include "diagnostics/user_exception.h"
 #include "diagnostics/xquery_exception.h"
 #include "diagnostics/xquery_stack_trace.h"
+#include "diagnostics/util_macros.h"
 
 #include "context/dynamic_context.h"
 
@@ -91,6 +92,7 @@
   thePlan(NULL),
   thePlanState(NULL),
   thePlanStateSize(0),
+  theLocalDCtx(NULL),
   thePlanOpen(false)
 {
 }
@@ -102,19 +104,13 @@
 UDFunctionCallIteratorState::~UDFunctionCallIteratorState()
 {
   if (thePlanOpen)
-  {
     thePlan->close(*thePlanState);
-    thePlanOpen = false;
-  }
 
   if (thePlanState != NULL)
-  {
-    if (thePlanState->theLocalDynCtx)
-      delete thePlanState->theLocalDynCtx;
-
     delete thePlanState;
-    thePlanState = NULL;
-  }
+
+  if (theLocalDCtx != NULL)
+    delete theLocalDCtx;
 }
 
 
@@ -123,17 +119,17 @@
 ********************************************************************************/
 void UDFunctionCallIteratorState::open(PlanState& planState, user_function* udf)
 {
-  thePlan = udf->getPlan(planState.theCompilerCB).getp();
+  thePlan = udf->getPlan(planState.theCompilerCB, thePlanStateSize).getp();
 
   thePlanStateSize = thePlan->getStateSizeOfSubtree();
 
   // Must allocate new dctx, as child of the "current" dctx, because the udf
   // may be a recursive udf with local block vars, all of which have the same
   // dynamic-context id, but they are distinct vars.
-  dynamic_context* localDCtx = new dynamic_context(planState.theGlobalDynCtx);
+  theLocalDCtx = new dynamic_context(planState.theGlobalDynCtx);
 
   thePlanState = new PlanState(planState.theGlobalDynCtx,
-                               localDCtx,
+                               theLocalDCtx,
                                thePlanStateSize,
                                planState.theStackDepth + 1,
                                planState.theMaxStackDepth);
@@ -224,9 +220,10 @@
   }
 
   if (planState.theStackDepth + 1 > planState.theMaxStackDepth)
-    throw XQUERY_EXCEPTION(zerr::ZXQP0003_INTERNAL_ERROR,
-                           ERROR_PARAMS(ZED(StackOverflow)),
-                           ERROR_LOC(loc));
+  {
+    RAISE_ERROR(zerr::ZXQP0003_INTERNAL_ERROR, loc,
+    ERROR_PARAMS(ZED(StackOverflow)));
+  }
 
   // Create the plan for the udf body (if not done already) and allocate
   // the plan state (but not the state block) and dynamic context.
@@ -234,7 +231,7 @@
 
   // Create a wrapper over each subplan that computes an argument expr, if the
   // associated param is actually used anywhere in the function body.
-  ulong numArgs = (ulong)theChildren.size();
+  csize numArgs = theChildren.size();
 
   state->theArgWrappers.resize(numArgs);
 
@@ -310,7 +307,8 @@
     // Open the plan, if not done already. This cannot be done in the openImpl
     // method because in the case of recursive functions, we will get into an
     // infinite loop.
-    if (!state->thePlanOpen) {
+    if (!state->thePlanOpen) 
+    {
       uint32_t planOffset = 0;
       state->thePlan->open(*state->thePlanState, planOffset);
       state->thePlanOpen = true;

=== modified file 'src/runtime/core/fncall_iterator.h'
--- src/runtime/core/fncall_iterator.h	2011-06-14 17:26:33 +0000
+++ src/runtime/core/fncall_iterator.h	2011-10-18 17:52:26 +0000
@@ -35,31 +35,40 @@
 
 
 /*******************************************************************************
-  thePlan          : The runtime plan for the function body. This is created
-                     during UDFunctionCallIterator::openImpl(), if it has not
-                     not been created already (during the openImpl() method of
-                     another UDFunctionCallIterator on the same udf). A pointer
-                     to this plan is also stored in the udf obj itself, and
-                     that's how we know if it has been created already or not.
-  thePlanState     : The plan state to run thePlan with. The PlanState obj is
-                     created during UDFunctionCallIterator::openImpl(), but the
-                     actual state block is created an initialized the 1st time
-                     that UDFunctionCallIterator::nextImpl() is called (at that
-                     time open() is invoked on thePlan).
-  thePlanStateSize : The size of the plan state block.
-  thePlanOpen      : Whether thePlan has been opened already or not.
-  theArgWrappers   : For each argument of this function call, theArgWrappers 
-                     stores a plan iterator wrapper over the sub plan that 
-                     computes the arg expr. This wrapping is needed because
-                     the body plan and the arg sub plans operate in different
-                     plan states. Note: Withinh the function body, there may 
-                     exist more than one references to an arg var V, but these 
-                     references are "mutually exclusive", ie, at most one of 
-                     the references will actually be reached during each 
-                     particular execution of the body. So, it is never the case 
-                     that the arg expr will have more than one consumers, and as
-                     a result we can bind all those V references to the same
-                     arg wrapper.
+  thePlan:
+  --------
+  The runtime plan for the function body. This is created during 
+  UDFunctionCallIterator::openImpl(), if it has not not been created already 
+  (during the openImpl() method of another UDFunctionCallIterator on the same
+  udf). A pointer to this plan is also stored in the udf obj itself, and that's
+  how we know if it has been created already or not.
+
+  thePlanState:
+  -------------
+  The plan state to run thePlan with. The PlanState obj is created during 
+  UDFunctionCallIterator::openImpl(), but the actual state block is created an
+  initialized the 1st time that UDFunctionCallIterator::nextImpl() is called 
+  (at that time open() is invoked on thePlan).
+
+  thePlanStateSize:
+  -----------------
+  The size of the plan state block.
+
+  thePlanOpen:
+  ------------
+  Whether thePlan has been opened already or not.
+
+  theArgWrappers:
+  ---------------
+  For each argument of this function call, theArgWrappers stores a plan iterator
+  wrapper over the sub plan that computes the arg expr. This wrapping is needed 
+  because the body plan and the arg sub plans operate in different plan states. 
+  Note: Withinh the function body, there may exist more than one references to 
+  an arg var V, but these references are "mutually exclusive", ie, at most one
+  of the references will actually be reached during each particular execution of
+  the body. So, it is never the case that the arg expr will have more than one 
+  consumers, and as a result we can bind all those V references to the same arg
+  wrapper.
 ********************************************************************************/
 class UDFunctionCallIteratorState : public PlanIteratorState 
 {
@@ -67,6 +76,7 @@
   PlanIterator                 * thePlan;
   PlanState                    * thePlanState;
   uint32_t                       thePlanStateSize;
+  dynamic_context              * theLocalDCtx;
   bool                           thePlanOpen;
   std::vector<store::Iterator_t> theArgWrappers;
 
@@ -81,8 +91,13 @@
 
 
 /*******************************************************************************
-  theUDF       : Pointer to the udf object.
-  theIsDynamic :
+  theUDF: 
+  -------
+  Pointer to the udf object.
+
+  theIsDynamic:
+  -------------
+
 ********************************************************************************/
 class UDFunctionCallIterator : public NaryBaseIterator<UDFunctionCallIterator, 
                                                        UDFunctionCallIteratorState> 


Follow ups