← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/markos-scratch into lp:zorba

 

Markos Zaharioudakis has proposed merging lp:~zorba-coders/zorba/markos-scratch into lp:zorba.

Requested reviews:
  Markos Zaharioudakis (markos-za)

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/markos-scratch/+merge/99664

Fixed performance problem with the findNodeSources function of the no-copy rule
-- 
https://code.launchpad.net/~zorba-coders/zorba/markos-scratch/+merge/99664
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'ChangeLog'
--- ChangeLog	2012-03-27 00:56:11 +0000
+++ ChangeLog	2012-03-28 04:46:19 +0000
@@ -7,6 +7,7 @@
 
 Bug Fixes/Other Changes:
   * Fixed bug in window iterator
+  * Fixed performance problem with the findNodeSources function of the no-copy rule
   * Fixed bug #872234 (prevent a rewritting to take place in case of sequential expr)
 
 

=== modified file 'src/compiler/rewriter/rules/nodeid_rules.cpp'
--- src/compiler/rewriter/rules/nodeid_rules.cpp	2012-03-27 00:56:11 +0000
+++ src/compiler/rewriter/rules/nodeid_rules.cpp	2012-03-28 04:46:19 +0000
@@ -685,20 +685,6 @@
     {
       user_function* udf = static_cast<user_function*>(f);
 
-#if 0
-      UdfCalls::iterator ite = std::find(theUdfCallPath.begin(), theUdfCallPath.end(), e);
-
-      if (ite == theUdfCallPath.end())
-      {
-        theUdfCallPath.push_back(e);
-
-        UDFCallChain nextUdfCall(e, &udfCaller);
-
-        applyInternal(rCtx, udf->getBody(), nextUdfCall);
-
-        theUdfCallPath.pop_back();
-      }
-#else
       UdfCalls::iterator ite = theProcessedUDFCalls.find(e);
 
       if (ite == theProcessedUDFCalls.end())
@@ -716,6 +702,9 @@
         {
           var_expr* argVar = udf->getArgVar(i);
 
+          // if an arg var of this udf has been marked as a source before, it 
+          // means that that var is consumed in some "nodeid-sesitive" operation,
+          // so we now have to find the sources of the arg expr and mark them.
           if (theSourceFinder->theVarSourcesMap.find(argVar) !=
               theSourceFinder->theVarSourcesMap.end())
           {
@@ -725,7 +714,6 @@
           }
         }
       }
-#endif
     } // f->isUdf()
     else
     {

=== modified file 'src/compiler/rewriter/tools/dataflow_annotations.cpp'
--- src/compiler/rewriter/tools/dataflow_annotations.cpp	2012-03-27 00:56:11 +0000
+++ src/compiler/rewriter/tools/dataflow_annotations.cpp	2012-03-28 04:46:19 +0000
@@ -818,7 +818,7 @@
   if (udfCaller->theFo)
     theStartingUdf = static_cast<user_function*>(udfCaller->theFo->get_func());
 
-  findNodeSourcesRec(node, sources, NULL);
+  findNodeSourcesRec(node, sources, theStartingUdf);
 
   for (csize i = 0; i < sources.size(); ++i)
   {
@@ -1031,8 +1031,7 @@
     {
       sources.push_back(node);
 
-      theSourceUdfMap.
-      insert(SourceUdfMapPair(node, (currentUdf ? currentUdf : theStartingUdf)));
+      theSourceUdfMap.insert(SourceUdfMapPair(node, currentUdf));
     }
 
     std::vector<expr*> enclosedExprs;
@@ -1101,62 +1100,70 @@
  
       bool recursive = (currentUdf ? currentUdf->isMutuallyRecursiveWith(udf) : false);
 
-      UdfSourcesMap::iterator ite = theUdfSourcesMap.find(udf);
-
-      std::vector<expr*>* udfSources;
-
-      if (ite == theUdfSourcesMap.end() ||
-          (recursive &&
-           std::find(theUdfCallPath.begin(), theUdfCallPath.end(), e) ==
-           theUdfCallPath.end()))
-      {
-        if (recursive)
-          theUdfCallPath.push_back(e);
-
-        // must do this before calling findNodeSourcesRec in order to break
-        // recursion cycle
+      if (recursive)
+      {
+        currentUdf->addRecursiveCall(node);
+      }
+      else
+      {
+        UdfSourcesMap::iterator ite = theUdfSourcesMap.find(udf);
+
+        std::vector<expr*>* udfSources;
+
         if (ite == theUdfSourcesMap.end())
         {
-          udfSources = new std::vector<expr*>;
-          theUdfSourcesMap.insert(UdfSourcesPair(udf, udfSources));
+          // must do this before calling findNodeSourcesRec in order to break
+          // recursion cycle
+          if (ite == theUdfSourcesMap.end())
+          {
+            udfSources = new std::vector<expr*>;
+            theUdfSourcesMap.insert(UdfSourcesPair(udf, udfSources));
+          }
+          else
+          {
+            udfSources = (*ite).second;
+          }
+
+          findNodeSourcesRec(udf->getBody(), *udfSources, udf);
+
+          if (udf->isRecursive())
+          {
+            std::vector<expr*>::const_iterator ite = udf->getRecursiveCalls().begin();
+            std::vector<expr*>::const_iterator end = udf->getRecursiveCalls().end();
+            for (; ite != end; ++ite)
+            {
+              findNodeSourcesRec((*ite), *udfSources, NULL);
+            }
+          }
         }
         else
         {
           udfSources = (*ite).second;
         }
 
-        findNodeSourcesRec(udf->getBody(), *udfSources, udf);
-
-        if (recursive)
-          theUdfCallPath.pop_back();
-      }
-      else
-      {
-        udfSources = (*ite).second;
-      }
-
-      csize numUdfSources = udfSources->size();
-
-      for (csize i = 0; i < numUdfSources; ++i)
-      {
-        expr* source = (*udfSources)[i];
-
-        if (source->get_expr_kind() == var_expr_kind)
-        {
-          var_expr* argVar = static_cast<var_expr*>(source);
-
-          ZORBA_ASSERT(argVar->get_kind() == var_expr::arg_var);
-
-          expr* argExpr = e->get_arg(argVar->get_param_pos());
-
-          findNodeSourcesRec(argExpr, sources, currentUdf);
-        }
-        else
-        {
-          if (std::find(sources.begin(), sources.end(), source) == sources.end())
-            sources.push_back(source);
-        }
-      }
+        csize numUdfSources = udfSources->size();
+
+        for (csize i = 0; i < numUdfSources; ++i)
+        {
+          expr* source = (*udfSources)[i];
+
+          if (source->get_expr_kind() == var_expr_kind)
+          {
+            var_expr* argVar = static_cast<var_expr*>(source);
+            
+            ZORBA_ASSERT(argVar->get_kind() == var_expr::arg_var);
+            
+            expr* argExpr = e->get_arg(argVar->get_param_pos());
+            
+            findNodeSourcesRec(argExpr, sources, currentUdf);
+          }
+          else
+          {
+            if (std::find(sources.begin(), sources.end(), source) == sources.end())
+              sources.push_back(source);
+          }
+        }
+      } // not recursive call
     } // f->isUdf()
     else
     {

=== modified file 'src/compiler/rewriter/tools/dataflow_annotations.h'
--- src/compiler/rewriter/tools/dataflow_annotations.h	2012-03-27 00:56:11 +0000
+++ src/compiler/rewriter/tools/dataflow_annotations.h	2012-03-28 04:46:19 +0000
@@ -93,7 +93,6 @@
   SourceUdfMap                 theSourceUdfMap;
 
   user_function              * theStartingUdf;
-  std::vector<fo_expr*>        theUdfCallPath;
 
 protected:
   void findNodeSourcesRec(

=== modified file 'src/functions/udf.cpp'
--- src/functions/udf.cpp	2012-03-27 00:56:11 +0000
+++ src/functions/udf.cpp	2012-03-28 04:46:19 +0000
@@ -248,6 +248,19 @@
 /*******************************************************************************
 
 ********************************************************************************/
+void user_function::addRecursiveCall(expr* call)
+{
+  if (std::find(theRecursiveCalls.begin(), theRecursiveCalls.end(), call) ==
+      theRecursiveCalls.end())
+  {
+    theRecursiveCalls.push_back(call);
+  }
+}
+
+
+/*******************************************************************************
+
+********************************************************************************/
 bool user_function::isRecursive() const
 {
   assert(isOptimized());

=== modified file 'src/functions/udf.h'
--- src/functions/udf.h	2012-03-27 00:56:11 +0000
+++ src/functions/udf.h	2012-03-28 04:46:19 +0000
@@ -120,7 +120,9 @@
 
   bool                        theIsExiting;
   bool                        theIsLeaf;
+
   std::vector<user_function*> theMutuallyRecursiveUDFs;
+  std::vector<expr*>          theRecursiveCalls;
 
   bool                        theIsOptimized;
 
@@ -132,7 +134,11 @@
   bool                        theCacheResults;
   bool                        theCacheComputed;
 
+<<<<<<< TREE
   rchandle<rclist<user_function*> > theLocalUdfs;//for plan serializer
+=======
+  rchandle<rclist<user_function*> >  theLocalUdfs;//for plan serializer
+>>>>>>> MERGE-SOURCE
 
 public:
   SERIALIZABLE_CLASS(user_function)
@@ -182,6 +188,10 @@
       const std::vector<user_function*>& udfs,
       const std::vector<user_function*>::const_iterator& cycle);
 
+  void addRecursiveCall(expr* call);
+
+  const std::vector<expr*>& getRecursiveCalls() const { return theRecursiveCalls; }
+
   bool isMutuallyRecursiveWith(const user_function* udf);
 
   bool isRecursive() const;

=== modified file 'src/store/api/item_factory.h'
--- src/store/api/item_factory.h	2012-03-27 00:56:11 +0000
+++ src/store/api/item_factory.h	2012-03-28 04:46:19 +0000
@@ -623,8 +623,9 @@
         zstring& content) = 0;
 
   /**
-   * Create a new text node N to store the typed value of an element node P.
-   * In this case, N can be the only child of P. 
+   * Create a new text node N to store the typed value of an element node P, and
+   * place N as the last child of P. Notice that in this case, P cannot have any
+   * subelements. 
    *
    * @param result  The new node N created by this method
    * @param parent  The parent P of the new node; may NOT be NULL.

=== added file 'test/rbkt/ExpQueryResults/zorba/no-copy/udfs3.xml.res'
--- test/rbkt/ExpQueryResults/zorba/no-copy/udfs3.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/no-copy/udfs3.xml.res	2012-03-28 04:46:19 +0000
@@ -0,0 +1,1 @@
+xml foo

=== added file 'test/rbkt/ExpQueryResults/zorba/no-copy/udfs4.xml.res'
--- test/rbkt/ExpQueryResults/zorba/no-copy/udfs4.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/no-copy/udfs4.xml.res	2012-03-28 04:46:19 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+element(NCName)

=== added file 'test/rbkt/Queries/zorba/no-copy/udfs3.xq'
--- test/rbkt/Queries/zorba/no-copy/udfs3.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/no-copy/udfs3.xq	2012-03-28 04:46:19 +0000
@@ -0,0 +1,14 @@
+
+
+declare function local:foo($n as node(), $level as xs:integer) as node()
+{
+  if ($level > 0)
+  then
+    local:foo(<a xmlns:foo="foo.com">{$n}</a>, $level - 1)
+  else
+    $n
+};
+
+
+
+in-scope-prefixes(local:foo(<b/>, 4)//b)

=== added file 'test/rbkt/Queries/zorba/no-copy/udfs4.xq'
--- test/rbkt/Queries/zorba/no-copy/udfs4.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/no-copy/udfs4.xq	2012-03-28 04:46:19 +0000
@@ -0,0 +1,96 @@
+declare namespace a="http://railroad.my28msec.com/lib/cst-to-ast";;
+declare namespace g="http://www.w3.org/2001/03/XPath/grammar";;
+
+declare function a:ast($nodes)
+{
+  for $node in $nodes
+  return
+    typeswitch ($node)
+    case element(NCName) return
+      "element(NCName)"
+    case element(StringLiteral) return
+      let $string := $node
+      return
+        element g:string {substring($string, 2, string-length($string) - 2)}
+    case element(NameOrString) return
+      let $a := a:ast($node/*[1])
+      return
+        if (count($node/*) = 2) then
+          element {node-name($a)}
+          {
+            $a/@*,
+            attribute context {substring($node/*[2], 2)},
+            $a/node()
+          }
+        else
+          $a
+    case element(ProcessingInstruction) return
+      processing-instruction {$node/NCName} {data($node/ProcessingInstructionContents)}
+    case element(Link) return
+      attribute xhref {$node/URL}
+    case element(Primary) return
+      if ($node/TOKEN = ".") then
+        element g:ref {attribute name {"."}}
+      else if ($node/TOKEN = "$") then
+        element g:endOfFile {}
+      else
+        a:ast($node/*[not(self::TOKEN)])
+    case element(Item) return
+      if ($node/TOKEN = "?") then
+        element g:optional {a:ast($node/Primary)}
+      else if ($node/TOKEN = "+") then
+        element g:oneOrMore {a:ast($node/Primary)}
+      else if ($node/TOKEN = "*") then
+        element g:zeroOrMore {a:ast($node/Primary)}
+      else
+        a:ast($node/Primary)
+    case element(SequenceOrDifference) return
+      if ($node/TOKEN = "-") then
+        element g:subtract
+        {
+          for $i in $node/Item
+          let $ast := a:ast($i)
+          return if (count($ast) = 1) then $ast else element g:sequence {$ast}
+        }
+      else
+        a:ast($node/Item)
+    case element(Choice) return
+      
+      if (count($node/SequenceOrDifference) = 1) then
+        a:ast($node/SequenceOrDifference)
+      else if ($node/TOKEN = "|") then
+          element g:choice
+          {
+            for $s in $node/SequenceOrDifference
+            let $ast := a:ast($s)
+            return if (count($ast) = 1) then $ast else element g:sequence {$ast}
+          }
+      else
+          element g:orderedChoice
+          {
+            for $s in $node/SequenceOrDifference
+            let $ast := a:ast($s)
+            return if (count($ast) = 1) then $ast else element g:sequence {$ast}
+          }
+    case element(Alternative) return
+      "element(Alternative)"
+    case element(Alternatives) return
+      "element(Alternatives)"
+    case element(Preference) return
+      let $lhs := $node/NameOrString[1]
+      let $lhs-ast := a:ast($lhs)
+      for $rhs in $node/NameOrString[. >> $lhs]
+      return
+        if ($node/TOKEN = "<<") then          
+          element g:preference {$lhs-ast, a:ast($rhs)}
+        else
+          element g:preference {a:ast($rhs), $lhs-ast}
+    case element(Delimiter) return
+      let $lhs-ast := a:ast($node/NCName)
+      for $rhs in $node/NameOrString
+      return element g:delimiter {$lhs-ast, a:ast($rhs)}
+    default return
+      error(xs:QName("a:ast"), concat("invalid parse tree node type: ", local-name($node)))
+};
+
+a:ast(<NCName>B</NCName>)


Follow ups