← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/substring-intopt into lp:zorba

 

Markos Zaharioudakis has proposed merging lp:~zorba-coders/zorba/substring-intopt into lp:zorba.

Requested reviews:
  Markos Zaharioudakis (markos-za)

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/substring-intopt/+merge/81949

Optimization of the fn:substring function in the case when its $start and $length arguments are integers.
-- 
https://code.launchpad.net/~zorba-coders/zorba/substring-intopt/+merge/81949
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/compiler/rewriter/rules/type_rules.cpp'
--- src/compiler/rewriter/rules/type_rules.cpp	2011-07-01 05:22:12 +0000
+++ src/compiler/rewriter/rules/type_rules.cpp	2011-11-11 08:41:26 +0000
@@ -1,12 +1,12 @@
 /*
  * Copyright 2006-2008 The FLWOR Foundation.
- * 
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * 
+ *
  * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -40,7 +40,7 @@
 #include "diagnostics/assert.h"
 
 
-namespace zorba 
+namespace zorba
 {
 
 static expr_t wrap_in_num_promotion(expr* arg, xqtref_t oldt, xqtref_t t);
@@ -51,14 +51,14 @@
 
 
 #if 0
-RULE_REWRITE_POST(InferUDFTypes) 
+RULE_REWRITE_POST(InferUDFTypes)
 {
   if (node->get_expr_kind() != fo_expr_kind)
     return NULL;
 
   fo_expr* fo = static_cast<fo_expr*>(node);
   user_function* udf = dynamic_cast<user_function*>(fo->get_func());
-  
+
   if (udf == NULL)
     return NULL;
 
@@ -93,11 +93,11 @@
   TypeManager* tm = sctx->get_typemanager();
   RootTypeManager& rtm = GENV_TYPESYSTEM;
 
-  if (node->get_expr_kind() == fo_expr_kind) 
+  if (node->get_expr_kind() == fo_expr_kind)
   {
     fo_expr* fo = static_cast<fo_expr *>(node);
 
-    if (fo->get_func()->getKind() == FunctionConsts::FN_BOOLEAN_1) 
+    if (fo->get_func()->getKind() == FunctionConsts::FN_BOOLEAN_1)
     {
       expr_t arg = fo->get_arg(0);
       xqtref_t arg_type = arg->get_return_type();
@@ -107,7 +107,7 @@
         return NULL;
     }
 
-    if (fo->get_func()->getKind() == FunctionConsts::FN_DATA_1) 
+    if (fo->get_func()->getKind() == FunctionConsts::FN_DATA_1)
     {
       expr_t arg = fo->get_arg(0);
       xqtref_t arg_type = arg->get_return_type();
@@ -121,7 +121,7 @@
   cast_base_expr* pe;
 
   // Note: the if cond is true for promote_expr, treat_expr, and cast_expr
-  if ((pe = dynamic_cast<cast_base_expr *>(node)) != NULL) 
+  if ((pe = dynamic_cast<cast_base_expr *>(node)) != NULL)
   {
     expr_t arg = pe->get_input();
     xqtref_t arg_type = arg->get_return_type();
@@ -143,7 +143,7 @@
         (node->get_expr_kind() != cast_expr_kind &&
          TypeOps::is_subtype(tm, *arg_type, *target_type, arg->get_loc())))
       return arg;
-    
+
     xqtref_t arg_ptype = TypeOps::prime_type(tm, *arg_type);
     xqtref_t target_ptype = TypeOps::prime_type(tm, *target_type);
 
@@ -166,7 +166,7 @@
     {
       treat_expr* te = static_cast<treat_expr *> (pe);
 
-      if (te->get_check_prime() && 
+      if (te->get_check_prime() &&
           TypeOps::is_subtype(tm, *arg_ptype, *target_ptype, arg->get_loc()))
       {
         te->set_check_prime(false);
@@ -205,7 +205,7 @@
 
   static_context* sctx = node->get_sctx();
 
-  if (node->get_expr_kind() == fo_expr_kind) 
+  if (node->get_expr_kind() == fo_expr_kind)
   {
     fo_expr* fo = static_cast<fo_expr *>(node);
     const function* fn = fo->get_func();
@@ -219,7 +219,7 @@
     {
       expr_t argExpr = fo->get_arg(0);
       xqtref_t argType = argExpr->get_return_type();
-      std::vector<xqtref_t> argTypes;  
+      std::vector<xqtref_t> argTypes;
       argTypes.push_back(argType);
 
       function* replacement = fn->specialize(sctx, argTypes);
@@ -236,7 +236,7 @@
                                                 argExpr->get_loc(),
                                                 argExpr,
                                                 rtm.DOUBLE_TYPE_STAR);
-          
+
           fo->set_arg(0, promoteExpr);
         }
 
@@ -248,7 +248,7 @@
     {
       expr_t argExpr = fo->get_arg(0);
       xqtref_t argType = argExpr->get_return_type();
-      std::vector<xqtref_t> argTypes;  
+      std::vector<xqtref_t> argTypes;
       argTypes.push_back(argType);
 
       function* replacement = fn->specialize(sctx, argTypes);
@@ -259,7 +259,9 @@
       }
     }
     else if (fnKind == FunctionConsts::FN_SUBSEQUENCE_2 ||
-             fnKind == FunctionConsts::FN_SUBSEQUENCE_3)
+             fnKind == FunctionConsts::FN_SUBSEQUENCE_3 ||
+             fnKind == FunctionConsts::FN_SUBSTRING_2 ||
+             fnKind == FunctionConsts::FN_SUBSTRING_3)
     {
       expr_t posExpr = fo->get_arg(1);
       if (posExpr->get_expr_kind() == promote_expr_kind)
@@ -286,18 +288,24 @@
         if (TypeOps::is_subtype(tm, *posType, *rtm.INTEGER_TYPE_ONE, posLoc) &&
             TypeOps::is_subtype(tm, *lenType, *rtm.INTEGER_TYPE_ONE, lenLoc))
         {
-          fo->set_func(GET_BUILTIN_FUNCTION(OP_ZORBA_SUBSEQUENCE_INT_3));
+          if(fnKind == FunctionConsts::FN_SUBSTRING_3)
+            fo->set_func(GET_BUILTIN_FUNCTION(OP_SUBSTRING_INT_3));
+          else
+            fo->set_func(GET_BUILTIN_FUNCTION(OP_ZORBA_SUBSEQUENCE_INT_3));
           fo->set_arg(1, posExpr);
           fo->set_arg(1, lenExpr);
         }
       }
       else if (TypeOps::is_subtype(tm, *posType, *rtm.INTEGER_TYPE_ONE, posLoc))
       {
-        fo->set_func(GET_BUILTIN_FUNCTION(OP_ZORBA_SUBSEQUENCE_INT_2));
+        if(fnKind == FunctionConsts::FN_SUBSTRING_2)
+          fo->set_func(GET_BUILTIN_FUNCTION(OP_SUBSTRING_INT_2));
+        else
+          fo->set_func(GET_BUILTIN_FUNCTION(OP_ZORBA_SUBSEQUENCE_INT_2));
         fo->set_arg(1, posExpr);
       }
     }
-    else if (fo->num_args() == 2) 
+    else if (fo->num_args() == 2)
     {
       expr* arg0 = fo->get_arg(0);
       expr* arg1 = fo->get_arg(1);
@@ -317,7 +325,7 @@
         if (specialize_numeric(fo, sctx) != NULL)
           return node;
       }
-      else if (props.specializeCmp() && fn->isComparisonFunction()) 
+      else if (props.specializeCmp() && fn->isComparisonFunction())
       {
         if (fn->isGeneralComparisonFunction())
         {
@@ -325,39 +333,39 @@
           argTypes.push_back(t0);
           argTypes.push_back(t1);
           function* replacement = fn->specialize(sctx, argTypes);
-          if (replacement != NULL) 
+          if (replacement != NULL)
           {
             fo->set_func(replacement);
             return node;
           }
         }
-        else if (fn->isValueComparisonFunction()) 
+        else if (fn->isValueComparisonFunction())
         {
           xqtref_t string_type = rtm.STRING_TYPE_QUESTION;
           bool string_cmp = true;
           expr_t nargs[2];
 
-          for (int i = 0; i < 2; ++i) 
+          for (int i = 0; i < 2; ++i)
           {
             expr* arg = (i == 0 ? arg0 : arg1);
             xqtref_t type = (i == 0 ? t0 : t1);
             const QueryLoc& loc = arg->get_loc();
 
-            if (TypeOps::is_subtype(tm, *type, *rtm.UNTYPED_ATOMIC_TYPE_QUESTION, loc)) 
+            if (TypeOps::is_subtype(tm, *type, *rtm.UNTYPED_ATOMIC_TYPE_QUESTION, loc))
             {
               nargs[i] = new cast_expr(arg->get_sctx(),
                                        arg->get_loc(),
                                        arg,
                                        string_type);
             }
-            else if (! TypeOps::is_subtype(tm, *type, *string_type, loc)) 
+            else if (! TypeOps::is_subtype(tm, *type, *string_type, loc))
             {
               string_cmp = false;
               break;
             }
           }
 
-          if (string_cmp) 
+          if (string_cmp)
           {
             for (int i = 0; i < 2; i++)
             {
@@ -369,13 +377,13 @@
             argTypes.push_back(string_type);
             argTypes.push_back(string_type);
             function* replacement = fn->specialize(sctx, argTypes);
-            if (replacement != NULL) 
+            if (replacement != NULL)
             {
               fo->set_func(replacement);
               return node;
-            } 
+            }
           }
-          else if (TypeOps::is_numeric(tm, *t0) && TypeOps::is_numeric(tm, *t1)) 
+          else if (TypeOps::is_numeric(tm, *t0) && TypeOps::is_numeric(tm, *t1))
           {
             xqtref_t aType = specialize_numeric(fo, sctx);
             if (aType != NULL)
@@ -454,23 +462,23 @@
   xqtref_t t0 = arg0->get_return_type();
   xqtref_t t1 = arg1->get_return_type();
 
-  xqtref_t aType = 
+  xqtref_t aType =
   TypeOps::arithmetic_type(tm,
                            *t0,
                            *t1,
                            fn->arithmeticKind() == ArithmeticConsts::DIVISION);
-  
+
   if (!TypeOps::is_numeric(tm, *aType))
   {
     return NULL;
   }
 
-  std::vector<xqtref_t> argTypes;  
-  argTypes.push_back(aType);
-  argTypes.push_back(aType);
-  
+  std::vector<xqtref_t> argTypes;
+  argTypes.push_back(aType);
+  argTypes.push_back(aType);
+
   function* replacement = fn->specialize(sctx, argTypes);
-  if (replacement != NULL) 
+  if (replacement != NULL)
   {
     fo->set_func(replacement);
 
@@ -482,7 +490,7 @@
 
     if (newArg1 != NULL)
       fo->set_arg(1, newArg1);
- 
+
     return aType;
   }
 
@@ -490,7 +498,7 @@
 }
 
 
-static expr_t wrap_in_num_promotion(expr* arg, xqtref_t oldt, xqtref_t t) 
+static expr_t wrap_in_num_promotion(expr* arg, xqtref_t oldt, xqtref_t t)
 {
   TypeManager* tm = arg->get_type_manager();
 
@@ -512,7 +520,7 @@
 }
 
 
-static function* flip_value_cmp(FunctionConsts::FunctionKind kind) 
+static function* flip_value_cmp(FunctionConsts::FunctionKind kind)
 {
   FunctionConsts::FunctionKind newKind;
 
@@ -594,3 +602,4 @@
 
 }
 /* vim:set et sw=2 ts=2: */
+/* vim:set et sw=2 ts=2: */

=== modified file 'src/functions/func_strings_impl.cpp'
--- src/functions/func_strings_impl.cpp	2011-06-14 17:26:33 +0000
+++ src/functions/func_strings_impl.cpp	2011-11-11 08:41:26 +0000
@@ -15,8 +15,16 @@
  */
 #include "stdafx.h"
 
+#include "common/shared_types.h"
+#include "types/typeops.h"
+
+#include "functions/function.h"
+#include "functions/function_impl.h"
+
 #include "functions/func_strings.h"
 
+#include "system/globalenv.h"
+
 #include "compiler/expression/expr_consts.h"
 
 
@@ -28,19 +36,45 @@
 ********************************************************************************/
 BoolAnnotationValue fn_concat::ignoresSortedNodes(
     expr* fo,
-    ulong input) const 
+    ulong input) const
 {
   return ANNOTATION_TRUE;
 }
 
 
 BoolAnnotationValue fn_concat::ignoresDuplicateNodes(
-    expr* fo, 
-    ulong input) const 
+    expr* fo,
+    ulong input) const
 {
   return ANNOTATION_TRUE;
 }
 
+function* fn_substring::specialize( static_context* sctx,
+    const std::vector<xqtref_t>& argTypes) const
+{
+  RootTypeManager &rtm = GENV_TYPESYSTEM;
+  TypeManager *tm = sctx->get_typemanager();
+  if(TypeOps::is_subtype(tm,
+                    *argTypes[0],
+                    *(rtm.INTEGER_TYPE_ONE)))
+  {
+    if(argTypes.size() > 1)
+    {
+      if(TypeOps::is_subtype(tm,
+                        *argTypes[1],
+                        *(rtm.INTEGER_TYPE_ONE)))
+        {
+          return new op_substring_int(theSignature, theKind);
+        }
+
+        return NULL;
+    }
+
+    return new op_substring_int(theSignature, theKind);
+  }
+
+  return NULL;
+}
 
 }
 

=== modified file 'src/functions/pregenerated/func_strings.cpp'
--- src/functions/pregenerated/func_strings.cpp	2011-10-19 15:28:51 +0000
+++ src/functions/pregenerated/func_strings.cpp	2011-11-11 08:41:26 +0000
@@ -110,6 +110,16 @@
   return new SubstringIterator(sctx, loc, argv);
 }
 
+PlanIter_t op_substring_int::codegen(
+  CompilerCB*,
+  static_context* sctx,
+  const QueryLoc& loc,
+  std::vector<PlanIter_t>& argv,
+  AnnotationHolder& ann) const
+{
+  return new SubstringIntOptIterator(sctx, loc, argv);
+}
+
 PlanIter_t fn_string_length::codegen(
   CompilerCB*,
   static_context* sctx,
@@ -443,6 +453,33 @@
   {
     
 
+    DECL_WITH_KIND(sctx, op_substring_int,
+        (createQName("http://www.zorba-xquery.com/internal/xquery-ops","","substring_int";), 
+        GENV_TYPESYSTEM.STRING_TYPE_QUESTION, 
+        GENV_TYPESYSTEM.INTEGER_TYPE_ONE, 
+        GENV_TYPESYSTEM.STRING_TYPE_ONE),
+        FunctionConsts::OP_SUBSTRING_INT_2);
+
+  }
+
+
+  {
+    
+
+    DECL_WITH_KIND(sctx, op_substring_int,
+        (createQName("http://www.zorba-xquery.com/internal/xquery-ops","","substring_int";), 
+        GENV_TYPESYSTEM.STRING_TYPE_QUESTION, 
+        GENV_TYPESYSTEM.INTEGER_TYPE_ONE, 
+        GENV_TYPESYSTEM.INTEGER_TYPE_ONE, 
+        GENV_TYPESYSTEM.STRING_TYPE_ONE),
+        FunctionConsts::OP_SUBSTRING_INT_3);
+
+  }
+
+
+  {
+    
+
     DECL_WITH_KIND(sctx, fn_string_length,
         (createQName("http://www.w3.org/2005/xpath-functions","","string-length";), 
         GENV_TYPESYSTEM.INTEGER_TYPE_ONE),

=== modified file 'src/functions/pregenerated/func_strings.h'
--- src/functions/pregenerated/func_strings.h	2011-10-19 15:28:51 +0000
+++ src/functions/pregenerated/func_strings.h	2011-11-11 08:41:26 +0000
@@ -141,6 +141,24 @@
 
 }
 
+  bool specializable() const { return true; }
+
+  function* specialize( static_context* sctx,
+                        const std::vector<xqtref_t>& argTypes) const;
+
+  CODEGEN_DECL();
+};
+
+
+//op:substring_int
+class op_substring_int : public function
+{
+public:
+  op_substring_int(const signature& sig, FunctionConsts::FunctionKind kind)
+    : function(sig, kind) {
+
+}
+
   CODEGEN_DECL();
 };
 

=== modified file 'src/functions/pregenerated/function_enum.h'
--- src/functions/pregenerated/function_enum.h	2011-10-27 20:55:51 +0000
+++ src/functions/pregenerated/function_enum.h	2011-11-11 08:41:26 +0000
@@ -338,6 +338,8 @@
   FN_STRING_JOIN_2,
   FN_SUBSTRING_2,
   FN_SUBSTRING_3,
+  OP_SUBSTRING_INT_2,
+  OP_SUBSTRING_INT_3,
   FN_STRING_LENGTH_0,
   FN_STRING_LENGTH_1,
   FN_NORMALIZE_SPACE_0,

=== modified file 'src/runtime/spec/strings/strings.xml'
--- src/runtime/spec/strings/strings.xml	2011-06-17 14:40:56 +0000
+++ src/runtime/spec/strings/strings.xml	2011-11-11 08:41:26 +0000
@@ -11,7 +11,7 @@
   xmlns:zorba="http://www.zorba-xquery.com";
   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
   xsi:schemaLocation="http://www.zorba-xquery.com ../runtime.xsd">
-    
+
 <zorba:header>
     <zorba:include form="Quoted">zorbautils/checked_vector.h</zorba:include>
     <zorba:include form="Quoted">zorbatypes/schema_types.h</zorba:include>
@@ -181,17 +181,39 @@
       fn:substring
   </zorba:description>
 
+  <zorba:function specializable="true">
+    <zorba:signature localname="substring" prefix="fn">
+      <zorba:param>xs:string?</zorba:param>
+      <zorba:param>xs:double</zorba:param>
+      <zorba:output>xs:string</zorba:output>
+    </zorba:signature>
+
+    <zorba:signature localname="substring" prefix="fn">
+      <zorba:param>xs:string?</zorba:param>
+      <zorba:param>xs:double</zorba:param>
+      <zorba:param>xs:double</zorba:param>
+      <zorba:output>xs:string</zorba:output>
+    </zorba:signature>
+  </zorba:function>
+</zorba:iterator>
+
+<zorba:iterator name="SubstringIntOptIterator">
+
+  <zorba:description author="Zorba Team">
+      fn:substring
+  </zorba:description>
+
   <zorba:function>
-    <zorba:signature localname="substring" prefix="fn">
+    <zorba:signature localname="substring_int" prefix="op">
       <zorba:param>xs:string?</zorba:param>
-      <zorba:param>xs:double</zorba:param>
+      <zorba:param>xs:integer</zorba:param>
       <zorba:output>xs:string</zorba:output>
     </zorba:signature>
 
-    <zorba:signature localname="substring" prefix="fn">
+    <zorba:signature localname="substring_int" prefix="op">
       <zorba:param>xs:string?</zorba:param>
-      <zorba:param>xs:double</zorba:param>
-      <zorba:param>xs:double</zorba:param>
+      <zorba:param>xs:integer</zorba:param>
+      <zorba:param>xs:integer</zorba:param>
       <zorba:output>xs:string</zorba:output>
     </zorba:signature>
   </zorba:function>

=== modified file 'src/runtime/strings/pregenerated/strings.cpp'
--- src/runtime/strings/pregenerated/strings.cpp	2011-10-19 15:28:51 +0000
+++ src/runtime/strings/pregenerated/strings.cpp	2011-11-11 08:41:26 +0000
@@ -232,6 +232,34 @@
 // </SubstringIterator>
 
 
+// <SubstringIntOptIterator>
+const char* SubstringIntOptIterator::class_name_str = "SubstringIntOptIterator";
+SubstringIntOptIterator::class_factory<SubstringIntOptIterator>
+SubstringIntOptIterator::g_class_factory;
+
+const serialization::ClassVersion 
+SubstringIntOptIterator::class_versions[] ={{ 1, 0x000905, false}};
+
+const int SubstringIntOptIterator::class_versions_count =
+sizeof(SubstringIntOptIterator::class_versions)/sizeof(struct serialization::ClassVersion);
+
+void SubstringIntOptIterator::accept(PlanIterVisitor& v) const {
+  v.beginVisit(*this);
+
+  std::vector<PlanIter_t>::const_iterator lIter = theChildren.begin();
+  std::vector<PlanIter_t>::const_iterator lEnd = theChildren.end();
+  for ( ; lIter != lEnd; ++lIter ){
+    (*lIter)->accept(v);
+  }
+
+  v.endVisit(*this);
+}
+
+SubstringIntOptIterator::~SubstringIntOptIterator() {}
+
+// </SubstringIntOptIterator>
+
+
 // <StringLengthIterator>
 const char* StringLengthIterator::class_name_str = "StringLengthIterator";
 StringLengthIterator::class_factory<StringLengthIterator>

=== modified file 'src/runtime/strings/pregenerated/strings.h'
--- src/runtime/strings/pregenerated/strings.h	2011-10-19 15:28:51 +0000
+++ src/runtime/strings/pregenerated/strings.h	2011-11-11 08:41:26 +0000
@@ -303,6 +303,42 @@
 
 /**
  * 
+ *      fn:substring
+ *  
+ * Author: Zorba Team
+ */
+class SubstringIntOptIterator : public NaryBaseIterator<SubstringIntOptIterator, PlanIteratorState>
+{ 
+public:
+  SERIALIZABLE_CLASS(SubstringIntOptIterator);
+
+  SERIALIZABLE_CLASS_CONSTRUCTOR2T(SubstringIntOptIterator,
+    NaryBaseIterator<SubstringIntOptIterator, PlanIteratorState>);
+
+  void serialize( ::zorba::serialization::Archiver& ar)
+  {
+    serialize_baseclass(ar,
+    (NaryBaseIterator<SubstringIntOptIterator, PlanIteratorState>*)this);
+  }
+
+  SubstringIntOptIterator(
+    static_context* sctx,
+    const QueryLoc& loc,
+    std::vector<PlanIter_t>& children)
+    : 
+    NaryBaseIterator<SubstringIntOptIterator, PlanIteratorState>(sctx, loc, children)
+  {}
+
+  virtual ~SubstringIntOptIterator();
+
+  void accept(PlanIterVisitor& v) const;
+
+  bool nextImpl(store::Item_t& result, PlanState& aPlanState) const;
+};
+
+
+/**
+ * 
  *    fn:string-length
  *  
  * Author: Zorba Team

=== modified file 'src/runtime/strings/strings_impl.cpp'
--- src/runtime/strings/strings_impl.cpp	2011-08-10 18:58:11 +0000
+++ src/runtime/strings/strings_impl.cpp	2011-11-11 08:41:26 +0000
@@ -1,12 +1,12 @@
 /*
  * Copyright 2006-2008 The FLWOR Foundation.
- * 
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * 
+ *
  * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -60,7 +60,7 @@
   *  fn:codepoints-to-string($arg as xs:integer*) as xs:string
   *_______________________________________________________________________*/
 bool
-CodepointsToStringIterator::nextImpl(store::Item_t& result, PlanState& planState) const 
+CodepointsToStringIterator::nextImpl(store::Item_t& result, PlanState& planState) const
 {
   store::Item_t item;
   zstring resStr;
@@ -108,7 +108,7 @@
  */
 bool StringToCodepointsIterator::nextImpl(
     store::Item_t& result,
-    PlanState& planState) const 
+    PlanState& planState) const
 {
   // TODO Optimization for large strings: large strings mean that a large
   // integer vector should be stored in the state that is not good.
@@ -118,19 +118,19 @@
   StringToCodepointsIteratorState* state;
   DEFAULT_STACK_INIT(StringToCodepointsIteratorState, state, planState);
 
-  if (consumeNext(item, theChildren [0].getp(), planState )) 
+  if (consumeNext(item, theChildren [0].getp(), planState ))
   {
     item->getStringValue2(inputStr);
 
     if (!inputStr.empty())
     {
       utf8::to_codepoints(inputStr, &state->theResult);
-  
+
       while (state->theIterator < state->theResult.size())
       {
-        GENV_ITEMFACTORY->createInteger( 
+        GENV_ITEMFACTORY->createInteger(
           result,
-          Integer(state->theResult[state->theIterator]) 
+          Integer(state->theResult[state->theIterator])
         );
 
         STACK_PUSH(true, state );
@@ -171,7 +171,7 @@
   *_______________________________________________________________________*/
 bool CompareStrIterator::nextImpl(
     store::Item_t& result,
-    PlanState& planState) const 
+    PlanState& planState) const
 {
   store::Item_t n0;
   store::Item_t n1;
@@ -181,7 +181,7 @@
   PlanIteratorState* state;
   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
 
-  if (consumeNext(n0, theChildren[0].getp(), planState )) 
+  if (consumeNext(n0, theChildren[0].getp(), planState ))
   {
     if (consumeNext(n1, theChildren[1].getp(), planState ))
     {
@@ -199,7 +199,7 @@
       }
 
       res  = utf8::compare(n0->getStringValue(), n1->getStringValue(), coll);
-      
+
       res = (res < 0 ? -1 : (res > 0 ? 1 : 0));
 
       GENV_ITEMFACTORY->createInteger(result, Integer(res));
@@ -222,7 +222,7 @@
   *_______________________________________________________________________*/
 bool CodepointEqualIterator::nextImpl(
     store::Item_t& result,
-    PlanState& planState) const 
+    PlanState& planState) const
 {
   store::Item_t item0;
   store::Item_t item1;
@@ -230,9 +230,9 @@
   PlanIteratorState* state;
   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
 
-  if (consumeNext(item0, theChildren [0].getp(), planState )) 
+  if (consumeNext(item0, theChildren [0].getp(), planState ))
   {
-    if (consumeNext(item1, theChildren [1].getp(), planState )) 
+    if (consumeNext(item1, theChildren [1].getp(), planState ))
     {
       GENV_ITEMFACTORY->createBoolean(result,
                                       item0->getStringValue() == item1->getStringValue());
@@ -254,7 +254,7 @@
   *_______________________________________________________________________*/
 bool ConcatStrIterator::nextImpl(
     store::Item_t& result,
-    PlanState& planState) const 
+    PlanState& planState) const
 {
   store::Item_t lItem;
   std::stringstream lResStream;
@@ -456,7 +456,7 @@
           bool lenItemExists = consumeNext(lenItem, theChildren[2], planState);
 
           ZORBA_ASSERT(lenItemExists);
-          
+
           len = lenItem->getDoubleValue();
 
           if (!len.isNaN())
@@ -476,7 +476,7 @@
             {
               ilen = (xs_int)(utf8_string<zstring>(strval).length() - istart + 1);
             }
-    
+
             if( !(start + len).isNaN())
             {
               if (ilen >= 0)
@@ -522,6 +522,117 @@
 /**
   *______________________________________________________________________
   *
+  *  7.4.3.1  fn:substring optimized for int arguments
+  *
+  *fn:substring($sourceString   as xs:string?,
+  *             $startingLoc    as xs:integer) as xs:string
+  *fn:substring($sourceString as xs:string?,
+  *             $startingLoc  as xs:integer,
+  *             $length       as xs:integer)   as xs:string
+  *_______________________________________________________________________*/
+bool SubstringIntOptIterator::nextImpl(
+    store::Item_t& result,
+    PlanState& planState) const
+{
+  store::Item_t stringItem;
+  store::Item_t startItem;
+  store::Item_t lenItem;
+  zstring strval;
+  zstring resStr;
+  xs_int start;
+  xs_int len;
+
+  PlanIteratorState* state;
+  DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
+
+  if (consumeNext(stringItem, theChildren[0].getp(), planState ))
+  {
+    stringItem->getStringValue2(strval);
+
+    if (!strval.empty())
+    {
+      bool startExists = consumeNext(startItem, theChildren[1], planState);
+
+      ZORBA_ASSERT(startExists);
+
+      // note: The first character of a string is located at position 1,
+      // not position 0.
+
+      start = startItem->getIntValue();
+
+      if( theChildren.size() == 2)
+      {
+        if (start <= 0)
+        {
+          resStr = strval;
+        }
+        else
+        {
+          try
+          {
+            resStr = utf8_string<zstring>(strval).substr(start-1);
+          }
+          catch (...)
+          {
+            zstring::size_type numChars = utf8_string<zstring>(strval).length();
+            if (static_cast<zstring::size_type>(start) > numChars)
+            {
+              // result is the empty string
+            }
+            else
+            {
+              throw;
+            }
+          }
+        }
+      }
+      else
+      {
+        bool lenItemExists = consumeNext(lenItem, theChildren[2], planState);
+
+        ZORBA_ASSERT(lenItemExists);
+
+        len = lenItem->getIntValue();
+
+        if (len >= 0)
+        {
+          if (start <= 0)
+          {
+              if ((len + start - 1) >= 0)
+                resStr = utf8_string<zstring>(strval).substr(0,  start - 1 + len);
+          }
+          else
+          {
+            try
+            {
+              resStr = utf8_string<zstring>(strval).substr(start-1, len);
+            }
+            catch (...)
+            {
+              zstring::size_type numChars = utf8_string<zstring>(strval).length();
+              if (static_cast<zstring::size_type>(start) > numChars)
+              {
+                // result is the empty string
+              }
+              else
+              {
+                throw;
+              }
+            }
+          }
+        }
+      }
+    } // non empty string arg
+  } // non NULL string arg
+
+STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
+
+STACK_END (state);
+}
+
+/**
+  *______________________________________________________________________
+  *
   *  7.4.4 fn:string-length
   *
   *fn:string-length()                   as xs:integer
@@ -566,7 +677,7 @@
   *fn:normalize-space($arg as xs:string?) as xs:string
   *_______________________________________________________________________*/
 bool NormalizeSpaceIterator::nextImpl(
-    store::Item_t& result, 
+    store::Item_t& result,
     PlanState& planState) const
 {
   store::Item_t item;
@@ -668,7 +779,7 @@
     // must push empty string due to return type of function
     STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
   }
-  
+
   STACK_END (state);
 }
 
@@ -716,7 +827,7 @@
   *_______________________________________________________________________*/
 bool LowerCaseIterator::nextImpl(
     store::Item_t& result,
-    PlanState& planState) const 
+    PlanState& planState) const
 {
   store::Item_t item;
   zstring resStr;
@@ -751,7 +862,7 @@
   *             $transString  as xs:string) as xs:string
   *_______________________________________________________________________*/
 bool TranslateIterator::nextImpl(
-    store::Item_t& result, 
+    store::Item_t& result,
     PlanState& planState) const
 {
   store::Item_t arg_item, map_item, trans_item;
@@ -812,7 +923,7 @@
 
     res = GENV_ITEMFACTORY->createString(result, result_string);
   }
-  
+
   if (!res)
   {
     res = GENV_ITEMFACTORY->createString(result, result_string);
@@ -832,7 +943,7 @@
   *_______________________________________________________________________*/
 bool EncodeForUriIterator::nextImpl(
     store::Item_t& result,
-    PlanState& planState) const 
+    PlanState& planState) const
 {
   store::Item_t item;
   zstring resStr;
@@ -841,7 +952,7 @@
   PlanIteratorState* state;
   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
 
-  if (consumeNext(item, theChildren [0].getp(), planState)) 
+  if (consumeNext(item, theChildren [0].getp(), planState))
   {
     item->getStringValue2(strval);
     uri::encode(strval, &resStr, true);
@@ -860,8 +971,8 @@
   *fn:iri-to-uri($iri as xs:string?) as xs:string
   *_______________________________________________________________________*/
 bool IriToUriIterator::nextImpl(
-    store::Item_t& result, 
-    PlanState& planState) const 
+    store::Item_t& result,
+    PlanState& planState) const
 {
   store::Item_t item;
   zstring lStrIri;
@@ -894,8 +1005,8 @@
   *fn:escape-html-uri($uri as xs:string?) as xs:string
   *_______________________________________________________________________*/
 bool EscapeHtmlUriIterator::nextImpl(
-    store::Item_t& result, 
-    PlanState& planState) const 
+    store::Item_t& result,
+    PlanState& planState) const
 {
   store::Item_t item;
   zstring lStrUri;
@@ -932,8 +1043,8 @@
   *             $collation  as xs:string) as xs:boolean
   *_______________________________________________________________________*/
 bool ContainsIterator::nextImpl(
-    store::Item_t& result, 
-    PlanState& planState) const 
+    store::Item_t& result,
+    PlanState& planState) const
 {
   store::Item_t item0;
   store::Item_t item1;
@@ -954,7 +1065,7 @@
   {
     item1->getStringValue2(arg2);
   }
-    
+
   if (arg2.empty())
   {
     STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, true), state );
@@ -979,7 +1090,7 @@
     }
     STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, resBool), state );
   }
-  
+
   STACK_END (state);
 }
 /*end class ContainsIterator*/
@@ -997,7 +1108,7 @@
   *_______________________________________________________________________*/
 bool StartsWithIterator::nextImpl(
     store::Item_t& result,
-    PlanState& planState) const 
+    PlanState& planState) const
 {
   store::Item_t item0;
   store::Item_t item1;
@@ -1062,8 +1173,8 @@
   *             $collation  as xs:string)   as xs:boolean
   *_______________________________________________________________________*/
 bool EndsWithIterator::nextImpl(
-    store::Item_t& result, 
-    PlanState& planState) const 
+    store::Item_t& result,
+    PlanState& planState) const
 {
   store::Item_t item0;
   store::Item_t item1;
@@ -1079,7 +1190,7 @@
   {
     item0->getStringValue2(arg1);
   }
-  
+
   if (consumeNext(item1, theChildren[1].getp(), planState ))
   {
     item1->getStringValue2(arg2);
@@ -1110,7 +1221,7 @@
     }
     STACK_PUSH( GENV_ITEMFACTORY->createBoolean(result, resBool), state );
   }
-  
+
   STACK_END (state);
 }
 
@@ -1127,8 +1238,8 @@
   *                     $collation  as xs:string)   as xs:string
   *_______________________________________________________________________*/
 bool SubstringBeforeIterator::nextImpl(
-    store::Item_t& result, 
-    PlanState& planState) const 
+    store::Item_t& result,
+    PlanState& planState) const
 {
   store::Item_t item0;
   store::Item_t item1;
@@ -1195,8 +1306,8 @@
   *                   $collation  as xs:string)   as xs:string
   *_______________________________________________________________________*/
 bool SubstringAfterIterator::nextImpl(
-    store::Item_t& result, 
-    PlanState& planState) const 
+    store::Item_t& result,
+    PlanState& planState) const
 {
   store::Item_t item0;
   store::Item_t item1;
@@ -1270,7 +1381,7 @@
   *           $flags   as xs:string) as xs:boolean
   *_______________________________________________________________________*/
 bool FnMatchesIterator::nextImpl(
-    store::Item_t& result, 
+    store::Item_t& result,
     PlanState& planState) const
 {
   zstring input;
@@ -1278,7 +1389,7 @@
   zstring flags;
   store::Item_t item;
   bool res = false;
-  
+
   PlanIteratorState* state;
   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
 
@@ -1290,7 +1401,7 @@
 
   item->getStringValue2(xquery_pattern);
 
-  if(theChildren.size() == 3) 
+  if(theChildren.size() == 3)
   {
     if (!consumeNext(item, theChildren[2].getp(), planState))
       ZORBA_ASSERT (false);
@@ -1298,20 +1409,20 @@
     item->getStringValue2(flags);
   }
 
-  try 
+  try
   {
     zstring lib_pattern;
     convert_xquery_re( xquery_pattern, &lib_pattern, flags.c_str() );
     res = utf8::match_part(input, lib_pattern, flags.c_str());
   }
-  catch(XQueryException& ex) 
+  catch(XQueryException& ex)
   {
     set_source( ex, loc );
     throw;
   }
 
-  STACK_PUSH(GENV_ITEMFACTORY->createBoolean(result, res), state); 
-  
+  STACK_PUSH(GENV_ITEMFACTORY->createBoolean(result, res), state);
+
   STACK_END(state);
 }
 
@@ -1340,7 +1451,7 @@
   zstring resStr;
   store::Item_t item;
   bool tmp;
-  
+
   PlanIteratorState* state;
   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
 
@@ -1357,7 +1468,7 @@
 
   item->getStringValue2(replacement);
 
-  if(theChildren.size() == 4) 
+  if(theChildren.size() == 4)
   {
     if (!consumeNext(item, theChildren[3].getp(), planState))
       ZORBA_ASSERT (false);
@@ -1365,16 +1476,16 @@
     item->getStringValue2(flags);
   }
 
-  try 
+  try
   {
     tmp = utf8::match_part(zstring(), pattern, flags.c_str());
   }
-  catch(XQueryException& ex) 
+  catch(XQueryException& ex)
   {
     set_source( ex, loc );
     throw;
   }
-  
+
   if (tmp)
     throw XQUERY_EXCEPTION(
       err::FORX0003, ERROR_PARAMS( pattern ), ERROR_LOC( loc )
@@ -1448,20 +1559,20 @@
       );
   } // local scope
 
-  try 
+  try
   {
     zstring lib_pattern;
     convert_xquery_re( pattern, &lib_pattern, flags.c_str() );
     utf8::replace_all(input, lib_pattern, flags.c_str(), replacement2, &resStr);
   }
-  catch(XQueryException& ex) 
+  catch(XQueryException& ex)
   {
     set_source( ex, loc );
     throw;
   }
-  
+
   STACK_PUSH(GENV_ITEMFACTORY->createString(result, resStr), state);
-  
+
   STACK_END (state);
 }
 
@@ -1514,7 +1625,7 @@
   item->getStringValue2(strval);
   state->thePattern = strval.str();
 
-  if(theChildren.size() == 3) 
+  if(theChildren.size() == 3)
   {
     if (!consumeNext(item, theChildren[2].getp(), planState))
       ZORBA_ASSERT (false);
@@ -1543,7 +1654,7 @@
 
   while ((xs_unsignedInt)state->start_pos < state->theString.length ())
   {
-    try 
+    try
     {
       unicode::regex re;
       //
@@ -1558,7 +1669,7 @@
       if ( !got_next )
         break;
     }
-    catch(XQueryException& ex) 
+    catch(XQueryException& ex)
     {
       set_source( ex, loc );
       throw;
@@ -1588,7 +1699,7 @@
   *                   $flags   as xs:string) as element(fn:analyze-string-result)
   *_______________________________________________________________________*/
 
-static void copyUtf8Chars(const char *&sin, 
+static void copyUtf8Chars(const char *&sin,
                           int &utf8start,
                           unsigned int &bytestart,
                           int utf8end,
@@ -1605,10 +1716,10 @@
   }
 }
 
-static void addNonMatchElement(store::Item_t &parent, 
-                               int &match_end1, 
+static void addNonMatchElement(store::Item_t &parent,
+                               int &match_end1,
                                unsigned int &match_end1_bytes,
-                               int match_start2, 
+                               int match_start2,
                                const char *&strin)
 {
   store::Item_t non_match_elem;
@@ -1635,12 +1746,12 @@
   GENV_ITEMFACTORY->createTextNode(non_match_text_item, non_match_elem, non_match_str);
 }
 
-static void addGroupElement(store::Item_t &parent, 
+static void addGroupElement(store::Item_t &parent,
                             store::Item_t &untyped_type_name,
                             store::NsBindings   &ns_binding,
                             zstring &baseURI,
-                            int match_start2, 
-                            int match_end2, 
+                            int match_start2,
+                            int match_end2,
                             unsigned int &match_end1_bytes,
                             const char *&sin,
                             unicode::regex &rx,
@@ -1695,9 +1806,9 @@
     {
       if(group_parent[i+1] > gparent)
       {
-        addGroupElement(group_elem, untyped_type_name, ns_binding, baseURI, 
-                        match_startg, match_endg, match_end1_bytes, 
-                        sin, rx, 
+        addGroupElement(group_elem, untyped_type_name, ns_binding, baseURI,
+                        match_startg, match_endg, match_end1_bytes,
+                        sin, rx,
                         i, group_parent, nr_pattern_groups, i);
         continue;
       }
@@ -1719,8 +1830,8 @@
   }
 }
 
-static void addMatchElement(store::Item_t &parent, 
-                    int match_start2, 
+static void addMatchElement(store::Item_t &parent,
+                    int match_start2,
                     unsigned int &match_end1_bytes,
                     int match_end2,
                     //utf8_string<zstring_p>::const_iterator& utf8_it,
@@ -1776,7 +1887,7 @@
 }
 
 bool FnAnalyzeStringIterator::nextImpl(
-    store::Item_t& result, 
+    store::Item_t& result,
     PlanState& planState) const
 {
   bool is_input_stream = false;
@@ -1797,7 +1908,7 @@
   zstring xquery_pattern;
   zstring flags;
   store::Item_t item;
-  
+
   PlanIteratorState* state;
   DEFAULT_STACK_INIT(PlanIteratorState, state, planState);
 
@@ -1819,7 +1930,7 @@
 
   item->getStringValue2(xquery_pattern);
 
-  if(theChildren.size() == 3) 
+  if(theChildren.size() == 3)
   {
     if (!consumeNext(item, theChildren[2].getp(), planState))
       ZORBA_ASSERT (false);
@@ -1827,7 +1938,7 @@
     item->getStringValue2(flags);
   }
 
-  try 
+  try
   {
     zstring lib_pattern;
     convert_xquery_re( xquery_pattern, &lib_pattern, flags.c_str() );
@@ -2017,14 +2128,14 @@
 
     }while(is_input_stream && !reachedEnd);
   }
-  catch(XQueryException& ex) 
+  catch(XQueryException& ex)
   {
     set_source( ex, loc );
     throw;
   }
 
-  STACK_PUSH(true, state); 
-  
+  STACK_PUSH(true, state);
+
   STACK_END(state);
 }
 

=== modified file 'src/runtime/visitors/planiter_visitor_impl_code.h'
--- src/runtime/visitors/planiter_visitor_impl_code.h	2011-10-11 01:05:23 +0000
+++ src/runtime/visitors/planiter_visitor_impl_code.h	2011-11-11 08:41:26 +0000
@@ -357,4 +357,5 @@
 PLAN_ITER_VISITOR(FlowCtlIterator);
 
 PLAN_ITER_VISITOR(CountCollectionIterator);
+
 /* vim:set et sw=2 ts=2: */

=== modified file 'src/runtime/visitors/pregenerated/planiter_visitor.h'
--- src/runtime/visitors/pregenerated/planiter_visitor.h	2011-10-27 20:55:51 +0000
+++ src/runtime/visitors/pregenerated/planiter_visitor.h	2011-11-11 08:41:26 +0000
@@ -540,6 +540,8 @@
 
     class SubstringIterator;
 
+    class SubstringIntOptIterator;
+
     class StringLengthIterator;
 
     class NormalizeSpaceIterator;
@@ -1358,6 +1360,9 @@
     virtual void beginVisit ( const SubstringIterator& ) = 0;
     virtual void endVisit   ( const SubstringIterator& ) = 0;
 
+    virtual void beginVisit ( const SubstringIntOptIterator& ) = 0;
+    virtual void endVisit   ( const SubstringIntOptIterator& ) = 0;
+
     virtual void beginVisit ( const StringLengthIterator& ) = 0;
     virtual void endVisit   ( const StringLengthIterator& ) = 0;
 

=== modified file 'src/runtime/visitors/pregenerated/printer_visitor.cpp'
--- src/runtime/visitors/pregenerated/printer_visitor.cpp	2011-10-27 20:55:51 +0000
+++ src/runtime/visitors/pregenerated/printer_visitor.cpp	2011-11-11 08:41:26 +0000
@@ -3667,6 +3667,20 @@
 // </SubstringIterator>
 
 
+// <SubstringIntOptIterator>
+void PrinterVisitor::beginVisit ( const SubstringIntOptIterator& a) {
+  thePrinter.startBeginVisit("SubstringIntOptIterator", ++theId);
+  printCommons( &a, theId );
+  thePrinter.endBeginVisit( theId );
+}
+
+void PrinterVisitor::endVisit ( const SubstringIntOptIterator& ) {
+  thePrinter.startEndVisit();
+  thePrinter.endEndVisit();
+}
+// </SubstringIntOptIterator>
+
+
 // <StringLengthIterator>
 void PrinterVisitor::beginVisit ( const StringLengthIterator& a) {
   thePrinter.startBeginVisit("StringLengthIterator", ++theId);

=== modified file 'src/runtime/visitors/pregenerated/printer_visitor.h'
--- src/runtime/visitors/pregenerated/printer_visitor.h	2011-10-27 20:55:51 +0000
+++ src/runtime/visitors/pregenerated/printer_visitor.h	2011-11-11 08:41:26 +0000
@@ -813,6 +813,9 @@
     void beginVisit( const SubstringIterator& );
     void endVisit  ( const SubstringIterator& );
 
+    void beginVisit( const SubstringIntOptIterator& );
+    void endVisit  ( const SubstringIntOptIterator& );
+
     void beginVisit( const StringLengthIterator& );
     void endVisit  ( const StringLengthIterator& );
 

=== modified file 'src/runtime/visitors/printer_visitor_impl.h'
--- src/runtime/visitors/printer_visitor_impl.h	2011-10-11 01:05:23 +0000
+++ src/runtime/visitors/printer_visitor_impl.h	2011-11-11 08:41:26 +0000
@@ -309,4 +309,5 @@
   DECLARE_VISITOR(FlowCtlIterator);
 
   DECLARE_VISITOR(CountCollectionIterator);
+
 /* vim:set et sw=2 ts=2: */


Follow ups