← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba

 

Paul J. Lucas has proposed merging lp:~zorba-coders/zorba/feature-ft_module into lp:zorba.

Requested reviews:
  Paul J. Lucas (paul-lucas)
  Matthias Brantner (matthias-brantner)
Related bugs:
  Bug #944795 in Zorba: "XQDoc doesn't handle & in URLs"
  https://bugs.launchpad.net/zorba/+bug/944795

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/106235

Getting in another public API change for 2.5 for the full-text module since now's the time to do it. Renamed tokenize() to tokenize-node() for 2 reasons:

1. There already exists tokenize-string() and therefore tokenize-node() is a better name than just plain tokenize().

2. The forthcoming addition of the black & white tokenization function will most likely be called tokenize-nodes() -- plural.
-- 
https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/106235
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'doc/zorba/ft_tokenizer.dox'
--- doc/zorba/ft_tokenizer.dox	2012-05-16 01:01:06 +0000
+++ doc/zorba/ft_tokenizer.dox	2012-05-17 18:22:21 +0000
@@ -152,7 +152,7 @@
   </tr>
 </table>
 
-A complete implementation of \c %tokenize() is non-trivial
+A complete implementation of \c %tokenize_string() is non-trivial
 and therefore an example is beyond the scope of this API documentation.
 However,
 the things a tokenizer should take into consideration include:

=== modified file 'modules/com/zorba-xquery/www/modules/full-text.xq'
--- modules/com/zorba-xquery/www/modules/full-text.xq	2012-05-08 23:49:22 +0000
+++ modules/com/zorba-xquery/www/modules/full-text.xq	2012-05-17 18:22:21 +0000
@@ -762,7 +762,7 @@
   as xs:string+ external;
 
 (:~
- : Tokenizes the given document.
+ : Tokenizes the given node and all of its descendants.
  :
  : @param $node The node to tokenize.
  : @param $lang The default
@@ -772,11 +772,11 @@
  : @error err:FTST0009 if <code>$lang</code> is not supported in general.
  : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq
  :)
-declare function ft:tokenize( $node as node(), $lang as xs:language )
+declare function ft:tokenize-node( $node as node(), $lang as xs:language )
   as element(ft-schema:token)* external;
 
 (:~
- : Tokenizes the given document.
+ : Tokenizes the given node and all of its descendants.
  :
  : @param $node The node to tokenize.
  : The document's default
@@ -789,7 +789,7 @@
  : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq
  : @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq
  :)
-declare function ft:tokenize( $node as node() )
+declare function ft:tokenize-node( $node as node() )
   as element(ft-schema:token)* external;
 
 (:~

=== modified file 'src/functions/func_ft_module_impl.cpp'
--- src/functions/func_ft_module_impl.cpp	2012-05-15 21:13:21 +0000
+++ src/functions/func_ft_module_impl.cpp	2012-05-17 18:22:21 +0000
@@ -25,14 +25,14 @@
 
 #ifndef ZORBA_NO_FULL_TEXT
 
-PlanIter_t full_text_tokenize::codegen(
+PlanIter_t full_text_tokenize_node::codegen(
   CompilerCB*,
   static_context* sctx,
   const QueryLoc& loc,
   std::vector<PlanIter_t>& argv,
   expr& ann) const
 {
-  return new TokenizeIterator(sctx, loc, argv);
+  return new TokenizeNodeIterator(sctx, loc, argv);
 }
 
 
@@ -90,20 +90,20 @@
                                    false);
   {
     DECL_WITH_KIND(sctx,
-                   full_text_tokenize,
-                   (createQName(FT_MODULE_NS, "", "tokenize"),
+                   full_text_tokenize_node,
+                   (createQName(FT_MODULE_NS, "", "tokenize-node"),
                     GENV_TYPESYSTEM.ANY_NODE_TYPE_ONE,
                     tokenize_return_type),
-                   FunctionConsts::FULL_TEXT_TOKENIZE_1);
+                   FunctionConsts::FULL_TEXT_TOKENIZE_NODE_1);
   }
   {
     DECL_WITH_KIND(sctx,
-                   full_text_tokenize,
-                   (createQName( FT_MODULE_NS, "", "tokenize"),
+                   full_text_tokenize_node,
+                   (createQName( FT_MODULE_NS, "", "tokenize-node"),
                     GENV_TYPESYSTEM.ANY_NODE_TYPE_ONE,
                     GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
                     tokenize_return_type),
-                   FunctionConsts::FULL_TEXT_TOKENIZE_2);
+                   FunctionConsts::FULL_TEXT_TOKENIZE_NODE_2);
   }
 
   xqtref_t tokenizer_properties_return_type =

=== modified file 'src/functions/func_ft_module_impl.h'
--- src/functions/func_ft_module_impl.h	2012-05-09 20:40:03 +0000
+++ src/functions/func_ft_module_impl.h	2012-05-17 18:22:21 +0000
@@ -30,11 +30,11 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 //full-text:tokenize
-class full_text_tokenize : public function
+class full_text_tokenize_node : public function
 {
 public:
-  full_text_tokenize(const signature& sig, FunctionConsts::FunctionKind kind)
-    : 
+  full_text_tokenize_node(const signature& sig,
+                          FunctionConsts::FunctionKind kind) : 
     function(sig, kind)
   {
 

=== modified file 'src/functions/function_consts.h'
--- src/functions/function_consts.h	2012-05-08 23:49:22 +0000
+++ src/functions/function_consts.h	2012-05-17 18:22:21 +0000
@@ -229,8 +229,8 @@
   FULL_TEXT_CURRENT_COMPARE_OPTIONS_0,
   FULL_TEXT_TOKENIZER_PROPERTIES_1,
   FULL_TEXT_TOKENIZER_PROPERTIES_0,
-  FULL_TEXT_TOKENIZE_2,
-  FULL_TEXT_TOKENIZE_1,
+  FULL_TEXT_TOKENIZE_NODE_2,
+  FULL_TEXT_TOKENIZE_NODE_1,
 #endif
 
 #include "functions/function_enum.h"

=== modified file 'src/runtime/full_text/ft_module_impl.cpp'
--- src/runtime/full_text/ft_module_impl.cpp	2012-05-17 15:21:43 +0000
+++ src/runtime/full_text/ft_module_impl.cpp	2012-05-17 18:22:21 +0000
@@ -528,14 +528,15 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-TokenizeIterator::TokenizeIterator( static_context *sctx, QueryLoc const &loc,
-                                    std::vector<PlanIter_t>& children ) :
-  NaryBaseIterator<TokenizeIterator,TokenizeIteratorState>(sctx, loc, children)
+TokenizeNodeIterator::TokenizeNodeIterator( static_context *sctx,
+                                            QueryLoc const &loc,
+                                            std::vector<PlanIter_t>& children ):
+  NaryBaseIterator<TokenizeNodeIterator,TokenizeNodeIteratorState>(sctx, loc, children)
 {
   initMembers();
 }
 
-void TokenizeIterator::initMembers() {
+void TokenizeNodeIterator::initMembers() {
   GENV_ITEMFACTORY->createQName(
     token_qname_, static_context::ZORBA_FULL_TEXT_FN_NS, "", "token" );
 
@@ -555,8 +556,8 @@
     ref_qname_, "", "", "node-ref" );
 }
 
-bool TokenizeIterator::nextImpl( store::Item_t &result,
-                                 PlanState &plan_state ) const {
+bool TokenizeNodeIterator::nextImpl( store::Item_t &result,
+                                     PlanState &plan_state ) const {
   store::Item_t node_name, attr_node;
   zstring base_uri;
   store::Item_t item;
@@ -567,8 +568,8 @@
   store::Item_t type_name;
   zstring value_string;
 
-  TokenizeIteratorState *state;
-  DEFAULT_STACK_INIT( TokenizeIteratorState, state, plan_state );
+  TokenizeNodeIteratorState *state;
+  DEFAULT_STACK_INIT( TokenizeNodeIteratorState, state, plan_state );
 
   if ( consumeNext( state->doc_item_, theChildren[0], plan_state ) ) {
     if ( theChildren.size() > 1 ) {
@@ -651,19 +652,19 @@
   STACK_END( state );
 }
 
-void TokenizeIterator::resetImpl( PlanState &plan_state ) const {
-  NaryBaseIterator<TokenizeIterator,TokenizeIteratorState>::
+void TokenizeNodeIterator::resetImpl( PlanState &plan_state ) const {
+  NaryBaseIterator<TokenizeNodeIterator,TokenizeNodeIteratorState>::
     resetImpl( plan_state );
-  TokenizeIteratorState *const state =
-    StateTraitsImpl<TokenizeIteratorState>::getState(
+  TokenizeNodeIteratorState *const state =
+    StateTraitsImpl<TokenizeNodeIteratorState>::getState(
       plan_state, this->theStateOffset
     );
   state->doc_tokens_->reset();
 }
 
-void TokenizeIterator::serialize( serialization::Archiver &ar ) {
+void TokenizeNodeIterator::serialize( serialization::Archiver &ar ) {
   serialize_baseclass(
-    ar, (NaryBaseIterator<TokenizeIterator,TokenizeIteratorState>*)this
+    ar, (NaryBaseIterator<TokenizeNodeIterator,TokenizeNodeIteratorState>*)this
   );
   if ( !ar.is_serializing_out() )
     initMembers();

=== modified file 'src/runtime/full_text/pregenerated/ft_module.cpp'
--- src/runtime/full_text/pregenerated/ft_module.cpp	2012-05-08 23:49:22 +0000
+++ src/runtime/full_text/pregenerated/ft_module.cpp	2012-05-17 18:22:21 +0000
@@ -295,12 +295,12 @@
 
 #endif
 #ifndef ZORBA_NO_FULL_TEXT
-// <TokenizeIterator>
-TokenizeIterator::class_factory<TokenizeIterator>
-TokenizeIterator::g_class_factory;
-
-
-void TokenizeIterator::accept(PlanIterVisitor& v) const {
+// <TokenizeNodeIterator>
+TokenizeNodeIterator::class_factory<TokenizeNodeIterator>
+TokenizeNodeIterator::g_class_factory;
+
+
+void TokenizeNodeIterator::accept(PlanIterVisitor& v) const {
   v.beginVisit(*this);
 
   std::vector<PlanIter_t>::const_iterator lIter = theChildren.begin();
@@ -312,17 +312,17 @@
   v.endVisit(*this);
 }
 
-TokenizeIterator::~TokenizeIterator() {}
-
-TokenizeIteratorState::TokenizeIteratorState() {}
-
-TokenizeIteratorState::~TokenizeIteratorState() {}
-
-
-void TokenizeIteratorState::reset(PlanState& planState) {
+TokenizeNodeIterator::~TokenizeNodeIterator() {}
+
+TokenizeNodeIteratorState::TokenizeNodeIteratorState() {}
+
+TokenizeNodeIteratorState::~TokenizeNodeIteratorState() {}
+
+
+void TokenizeNodeIteratorState::reset(PlanState& planState) {
   PlanIteratorState::reset(planState);
 }
-// </TokenizeIterator>
+// </TokenizeNodeIterator>
 
 #endif
 #ifndef ZORBA_NO_FULL_TEXT

=== modified file 'src/runtime/full_text/pregenerated/ft_module.h'
--- src/runtime/full_text/pregenerated/ft_module.h	2012-05-08 23:49:22 +0000
+++ src/runtime/full_text/pregenerated/ft_module.h	2012-05-17 18:22:21 +0000
@@ -455,20 +455,20 @@
  * 
  * Author: 
  */
-class TokenizeIteratorState : public PlanIteratorState
+class TokenizeNodeIteratorState : public PlanIteratorState
 {
 public:
   store::Item_t doc_item_; //
   FTTokenIterator_t doc_tokens_; //
 
-  TokenizeIteratorState();
+  TokenizeNodeIteratorState();
 
-  ~TokenizeIteratorState();
+  ~TokenizeNodeIteratorState();
 
   void reset(PlanState&);
 };
 
-class TokenizeIterator : public NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>
+class TokenizeNodeIterator : public NaryBaseIterator<TokenizeNodeIterator, TokenizeNodeIteratorState>
 { 
 protected:
   store::Item_t token_qname_; //
@@ -478,20 +478,20 @@
   store::Item_t value_qname_; //
   store::Item_t ref_qname_; //
 public:
-  SERIALIZABLE_CLASS(TokenizeIterator);
+  SERIALIZABLE_CLASS(TokenizeNodeIterator);
 
-  SERIALIZABLE_CLASS_CONSTRUCTOR2T(TokenizeIterator,
-    NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>);
+  SERIALIZABLE_CLASS_CONSTRUCTOR2T(TokenizeNodeIterator,
+    NaryBaseIterator<TokenizeNodeIterator, TokenizeNodeIteratorState>);
 
   void serialize( ::zorba::serialization::Archiver& ar);
 
-  TokenizeIterator(
+  TokenizeNodeIterator(
     static_context* sctx,
     const QueryLoc& loc,
     std::vector<PlanIter_t>& children)
     ;
 
-  virtual ~TokenizeIterator();
+  virtual ~TokenizeNodeIterator();
 
 public:
   void initMembers();

=== modified file 'src/runtime/spec/full_text/ft_module.xml'
--- src/runtime/spec/full_text/ft_module.xml	2012-05-08 23:49:22 +0000
+++ src/runtime/spec/full_text/ft_module.xml	2012-05-17 18:22:21 +0000
@@ -167,7 +167,7 @@
   </zorba:state>
 </zorba:iterator>
 
-<zorba:iterator name="TokenizeIterator"
+<zorba:iterator name="TokenizeNodeIterator"
                 generateResetImpl="true"
                 generateSerialize="false"
                 generateConstructor="false"

=== modified file 'src/runtime/visitors/pregenerated/planiter_visitor.h'
--- src/runtime/visitors/pregenerated/planiter_visitor.h	2012-05-08 23:49:22 +0000
+++ src/runtime/visitors/pregenerated/planiter_visitor.h	2012-05-17 18:22:21 +0000
@@ -227,7 +227,7 @@
     class ThesaurusLookupIterator;
 #endif
 #ifndef ZORBA_NO_FULL_TEXT
-    class TokenizeIterator;
+    class TokenizeNodeIterator;
 #endif
 #ifndef ZORBA_NO_FULL_TEXT
     class TokenizerPropertiesIterator;
@@ -951,8 +951,8 @@
     virtual void endVisit   ( const ThesaurusLookupIterator& ) = 0;
 #endif
 #ifndef ZORBA_NO_FULL_TEXT
-    virtual void beginVisit ( const TokenizeIterator& ) = 0;
-    virtual void endVisit   ( const TokenizeIterator& ) = 0;
+    virtual void beginVisit ( const TokenizeNodeIterator& ) = 0;
+    virtual void endVisit   ( const TokenizeNodeIterator& ) = 0;
 #endif
 #ifndef ZORBA_NO_FULL_TEXT
     virtual void beginVisit ( const TokenizerPropertiesIterator& ) = 0;

=== modified file 'src/runtime/visitors/pregenerated/printer_visitor.cpp'
--- src/runtime/visitors/pregenerated/printer_visitor.cpp	2012-05-08 23:49:22 +0000
+++ src/runtime/visitors/pregenerated/printer_visitor.cpp	2012-05-17 18:22:21 +0000
@@ -1412,18 +1412,18 @@
 
 #endif
 #ifndef ZORBA_NO_FULL_TEXT
-// <TokenizeIterator>
-void PrinterVisitor::beginVisit ( const TokenizeIterator& a) {
-  thePrinter.startBeginVisit("TokenizeIterator", ++theId);
+// <TokenizeNodeIterator>
+void PrinterVisitor::beginVisit ( const TokenizeNodeIterator& a) {
+  thePrinter.startBeginVisit("TokenizeNodeIterator", ++theId);
   printCommons( &a, theId );
   thePrinter.endBeginVisit( theId );
 }
 
-void PrinterVisitor::endVisit ( const TokenizeIterator& ) {
+void PrinterVisitor::endVisit ( const TokenizeNodeIterator& ) {
   thePrinter.startEndVisit();
   thePrinter.endEndVisit();
 }
-// </TokenizeIterator>
+// </TokenizeNodeIterator>
 
 #endif
 #ifndef ZORBA_NO_FULL_TEXT

=== modified file 'src/runtime/visitors/pregenerated/printer_visitor.h'
--- src/runtime/visitors/pregenerated/printer_visitor.h	2012-05-08 23:49:22 +0000
+++ src/runtime/visitors/pregenerated/printer_visitor.h	2012-05-17 18:22:21 +0000
@@ -348,8 +348,8 @@
 #endif
 
 #ifndef ZORBA_NO_FULL_TEXT
-    void beginVisit( const TokenizeIterator& );
-    void endVisit  ( const TokenizeIterator& );
+    void beginVisit( const TokenizeNodeIterator& );
+    void endVisit  ( const TokenizeNodeIterator& );
 #endif
 
 #ifndef ZORBA_NO_FULL_TEXT

=== renamed file 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-1.xml.res' => 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-node-1.xml.res'
=== renamed file 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-2.xml.res' => 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-node-2.xml.res'
=== renamed file 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-3.xml.res' => 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-node-3.xml.res'
=== renamed file 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-4.xml.res' => 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-node-4.xml.res'
=== renamed file 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq' => 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-1.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq	2012-05-08 17:24:54 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-1.xq	2012-05-17 18:22:21 +0000
@@ -2,7 +2,7 @@
 import schema namespace fts = "http://www.zorba-xquery.com/modules/full-text";;
 
 let $doc := <msg>hello, world</msg>
-let $tokens := ft:tokenize( $doc, xs:language("en") )
+let $tokens := ft:tokenize-node( $doc, xs:language("en") )
 let $t1 := validate { $tokens[1] }
 let $t2 := validate { $tokens[2] }
 

=== renamed file 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq' => 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-2.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq	2012-05-05 11:37:42 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-2.xq	2012-05-17 18:22:21 +0000
@@ -1,9 +1,8 @@
 import module namespace ft = "http://www.zorba-xquery.com/modules/full-text";;
-
 import schema namespace fts = "http://www.zorba-xquery.com/modules/full-text";;
 
 let $doc := <msg xml:lang="es">hola, mundo</msg>
-let $tokens := ft:tokenize( $doc )
+let $tokens := ft:tokenize-node( $doc )
 let $t1 := validate { $tokens[1] }
 let $t2 := validate { $tokens[2] }
 

=== renamed file 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq' => 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-3.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq	2012-05-05 16:28:22 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-3.xq	2012-05-17 18:22:21 +0000
@@ -4,7 +4,7 @@
 import schema namespace fts = "http://www.zorba-xquery.com/modules/full-text";;
 
 let $x := <p xml:lang="en">Houston, we have a <em>problem</em>!</p>
-let $tokens := ft:tokenize( $x )
+let $tokens := ft:tokenize-node( $x )
 let $node-ref := (validate { $tokens[5] })/@node-ref
 let $node := ref:node-by-reference( $node-ref )
 return $node instance of text()

=== renamed file 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq' => 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-4.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq	2012-05-05 16:28:22 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-4.xq	2012-05-17 18:22:21 +0000
@@ -4,7 +4,7 @@
 import schema namespace fts = "http://www.zorba-xquery.com/modules/full-text";;
 
 let $x := <msg xml:lang="en" content="Houston, we have a problem!"/>
-let $tokens := ft:tokenize( $x/@content )
+let $tokens := ft:tokenize-node( $x/@content )
 let $node-ref := (validate { $tokens[5] }) /@node-ref
 let $node := ref:node-by-reference( $node-ref )
 return $node instance of attribute(content)


Follow ups