zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #09906
[Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
Paul J. Lucas has proposed merging lp:~zorba-coders/zorba/feature-ft_module into lp:zorba.
Requested reviews:
Paul J. Lucas (paul-lucas)
Matthias Brantner (matthias-brantner)
Related bugs:
Bug #944795 in Zorba: "XQDoc doesn't handle & in URLs"
https://bugs.launchpad.net/zorba/+bug/944795
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/106235
Getting in another public API change for 2.5 for the full-text module since now's the time to do it. Renamed tokenize() to tokenize-node() for 2 reasons:
1. There already exists tokenize-string() and therefore tokenize-node() is a better name than just plain tokenize().
2. The forthcoming addition of the black & white tokenization function will most likely be called tokenize-nodes() -- plural.
--
https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/106235
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'doc/zorba/ft_tokenizer.dox'
--- doc/zorba/ft_tokenizer.dox 2012-05-16 01:01:06 +0000
+++ doc/zorba/ft_tokenizer.dox 2012-05-17 18:22:21 +0000
@@ -152,7 +152,7 @@
</tr>
</table>
-A complete implementation of \c %tokenize() is non-trivial
+A complete implementation of \c %tokenize_string() is non-trivial
and therefore an example is beyond the scope of this API documentation.
However,
the things a tokenizer should take into consideration include:
=== modified file 'modules/com/zorba-xquery/www/modules/full-text.xq'
--- modules/com/zorba-xquery/www/modules/full-text.xq 2012-05-08 23:49:22 +0000
+++ modules/com/zorba-xquery/www/modules/full-text.xq 2012-05-17 18:22:21 +0000
@@ -762,7 +762,7 @@
as xs:string+ external;
(:~
- : Tokenizes the given document.
+ : Tokenizes the given node and all of its descendants.
:
: @param $node The node to tokenize.
: @param $lang The default
@@ -772,11 +772,11 @@
: @error err:FTST0009 if <code>$lang</code> is not supported in general.
: @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq
:)
-declare function ft:tokenize( $node as node(), $lang as xs:language )
+declare function ft:tokenize-node( $node as node(), $lang as xs:language )
as element(ft-schema:token)* external;
(:~
- : Tokenizes the given document.
+ : Tokenizes the given node and all of its descendants.
:
: @param $node The node to tokenize.
: The document's default
@@ -789,7 +789,7 @@
: @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq
: @example test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq
:)
-declare function ft:tokenize( $node as node() )
+declare function ft:tokenize-node( $node as node() )
as element(ft-schema:token)* external;
(:~
=== modified file 'src/functions/func_ft_module_impl.cpp'
--- src/functions/func_ft_module_impl.cpp 2012-05-15 21:13:21 +0000
+++ src/functions/func_ft_module_impl.cpp 2012-05-17 18:22:21 +0000
@@ -25,14 +25,14 @@
#ifndef ZORBA_NO_FULL_TEXT
-PlanIter_t full_text_tokenize::codegen(
+PlanIter_t full_text_tokenize_node::codegen(
CompilerCB*,
static_context* sctx,
const QueryLoc& loc,
std::vector<PlanIter_t>& argv,
expr& ann) const
{
- return new TokenizeIterator(sctx, loc, argv);
+ return new TokenizeNodeIterator(sctx, loc, argv);
}
@@ -90,20 +90,20 @@
false);
{
DECL_WITH_KIND(sctx,
- full_text_tokenize,
- (createQName(FT_MODULE_NS, "", "tokenize"),
+ full_text_tokenize_node,
+ (createQName(FT_MODULE_NS, "", "tokenize-node"),
GENV_TYPESYSTEM.ANY_NODE_TYPE_ONE,
tokenize_return_type),
- FunctionConsts::FULL_TEXT_TOKENIZE_1);
+ FunctionConsts::FULL_TEXT_TOKENIZE_NODE_1);
}
{
DECL_WITH_KIND(sctx,
- full_text_tokenize,
- (createQName( FT_MODULE_NS, "", "tokenize"),
+ full_text_tokenize_node,
+ (createQName( FT_MODULE_NS, "", "tokenize-node"),
GENV_TYPESYSTEM.ANY_NODE_TYPE_ONE,
GENV_TYPESYSTEM.LANGUAGE_TYPE_ONE,
tokenize_return_type),
- FunctionConsts::FULL_TEXT_TOKENIZE_2);
+ FunctionConsts::FULL_TEXT_TOKENIZE_NODE_2);
}
xqtref_t tokenizer_properties_return_type =
=== modified file 'src/functions/func_ft_module_impl.h'
--- src/functions/func_ft_module_impl.h 2012-05-09 20:40:03 +0000
+++ src/functions/func_ft_module_impl.h 2012-05-17 18:22:21 +0000
@@ -30,11 +30,11 @@
///////////////////////////////////////////////////////////////////////////////
//full-text:tokenize
-class full_text_tokenize : public function
+class full_text_tokenize_node : public function
{
public:
- full_text_tokenize(const signature& sig, FunctionConsts::FunctionKind kind)
- :
+ full_text_tokenize_node(const signature& sig,
+ FunctionConsts::FunctionKind kind) :
function(sig, kind)
{
=== modified file 'src/functions/function_consts.h'
--- src/functions/function_consts.h 2012-05-08 23:49:22 +0000
+++ src/functions/function_consts.h 2012-05-17 18:22:21 +0000
@@ -229,8 +229,8 @@
FULL_TEXT_CURRENT_COMPARE_OPTIONS_0,
FULL_TEXT_TOKENIZER_PROPERTIES_1,
FULL_TEXT_TOKENIZER_PROPERTIES_0,
- FULL_TEXT_TOKENIZE_2,
- FULL_TEXT_TOKENIZE_1,
+ FULL_TEXT_TOKENIZE_NODE_2,
+ FULL_TEXT_TOKENIZE_NODE_1,
#endif
#include "functions/function_enum.h"
=== modified file 'src/runtime/full_text/ft_module_impl.cpp'
--- src/runtime/full_text/ft_module_impl.cpp 2012-05-17 15:21:43 +0000
+++ src/runtime/full_text/ft_module_impl.cpp 2012-05-17 18:22:21 +0000
@@ -528,14 +528,15 @@
///////////////////////////////////////////////////////////////////////////////
-TokenizeIterator::TokenizeIterator( static_context *sctx, QueryLoc const &loc,
- std::vector<PlanIter_t>& children ) :
- NaryBaseIterator<TokenizeIterator,TokenizeIteratorState>(sctx, loc, children)
+TokenizeNodeIterator::TokenizeNodeIterator( static_context *sctx,
+ QueryLoc const &loc,
+ std::vector<PlanIter_t>& children ):
+ NaryBaseIterator<TokenizeNodeIterator,TokenizeNodeIteratorState>(sctx, loc, children)
{
initMembers();
}
-void TokenizeIterator::initMembers() {
+void TokenizeNodeIterator::initMembers() {
GENV_ITEMFACTORY->createQName(
token_qname_, static_context::ZORBA_FULL_TEXT_FN_NS, "", "token" );
@@ -555,8 +556,8 @@
ref_qname_, "", "", "node-ref" );
}
-bool TokenizeIterator::nextImpl( store::Item_t &result,
- PlanState &plan_state ) const {
+bool TokenizeNodeIterator::nextImpl( store::Item_t &result,
+ PlanState &plan_state ) const {
store::Item_t node_name, attr_node;
zstring base_uri;
store::Item_t item;
@@ -567,8 +568,8 @@
store::Item_t type_name;
zstring value_string;
- TokenizeIteratorState *state;
- DEFAULT_STACK_INIT( TokenizeIteratorState, state, plan_state );
+ TokenizeNodeIteratorState *state;
+ DEFAULT_STACK_INIT( TokenizeNodeIteratorState, state, plan_state );
if ( consumeNext( state->doc_item_, theChildren[0], plan_state ) ) {
if ( theChildren.size() > 1 ) {
@@ -651,19 +652,19 @@
STACK_END( state );
}
-void TokenizeIterator::resetImpl( PlanState &plan_state ) const {
- NaryBaseIterator<TokenizeIterator,TokenizeIteratorState>::
+void TokenizeNodeIterator::resetImpl( PlanState &plan_state ) const {
+ NaryBaseIterator<TokenizeNodeIterator,TokenizeNodeIteratorState>::
resetImpl( plan_state );
- TokenizeIteratorState *const state =
- StateTraitsImpl<TokenizeIteratorState>::getState(
+ TokenizeNodeIteratorState *const state =
+ StateTraitsImpl<TokenizeNodeIteratorState>::getState(
plan_state, this->theStateOffset
);
state->doc_tokens_->reset();
}
-void TokenizeIterator::serialize( serialization::Archiver &ar ) {
+void TokenizeNodeIterator::serialize( serialization::Archiver &ar ) {
serialize_baseclass(
- ar, (NaryBaseIterator<TokenizeIterator,TokenizeIteratorState>*)this
+ ar, (NaryBaseIterator<TokenizeNodeIterator,TokenizeNodeIteratorState>*)this
);
if ( !ar.is_serializing_out() )
initMembers();
=== modified file 'src/runtime/full_text/pregenerated/ft_module.cpp'
--- src/runtime/full_text/pregenerated/ft_module.cpp 2012-05-08 23:49:22 +0000
+++ src/runtime/full_text/pregenerated/ft_module.cpp 2012-05-17 18:22:21 +0000
@@ -295,12 +295,12 @@
#endif
#ifndef ZORBA_NO_FULL_TEXT
-// <TokenizeIterator>
-TokenizeIterator::class_factory<TokenizeIterator>
-TokenizeIterator::g_class_factory;
-
-
-void TokenizeIterator::accept(PlanIterVisitor& v) const {
+// <TokenizeNodeIterator>
+TokenizeNodeIterator::class_factory<TokenizeNodeIterator>
+TokenizeNodeIterator::g_class_factory;
+
+
+void TokenizeNodeIterator::accept(PlanIterVisitor& v) const {
v.beginVisit(*this);
std::vector<PlanIter_t>::const_iterator lIter = theChildren.begin();
@@ -312,17 +312,17 @@
v.endVisit(*this);
}
-TokenizeIterator::~TokenizeIterator() {}
-
-TokenizeIteratorState::TokenizeIteratorState() {}
-
-TokenizeIteratorState::~TokenizeIteratorState() {}
-
-
-void TokenizeIteratorState::reset(PlanState& planState) {
+TokenizeNodeIterator::~TokenizeNodeIterator() {}
+
+TokenizeNodeIteratorState::TokenizeNodeIteratorState() {}
+
+TokenizeNodeIteratorState::~TokenizeNodeIteratorState() {}
+
+
+void TokenizeNodeIteratorState::reset(PlanState& planState) {
PlanIteratorState::reset(planState);
}
-// </TokenizeIterator>
+// </TokenizeNodeIterator>
#endif
#ifndef ZORBA_NO_FULL_TEXT
=== modified file 'src/runtime/full_text/pregenerated/ft_module.h'
--- src/runtime/full_text/pregenerated/ft_module.h 2012-05-08 23:49:22 +0000
+++ src/runtime/full_text/pregenerated/ft_module.h 2012-05-17 18:22:21 +0000
@@ -455,20 +455,20 @@
*
* Author:
*/
-class TokenizeIteratorState : public PlanIteratorState
+class TokenizeNodeIteratorState : public PlanIteratorState
{
public:
store::Item_t doc_item_; //
FTTokenIterator_t doc_tokens_; //
- TokenizeIteratorState();
+ TokenizeNodeIteratorState();
- ~TokenizeIteratorState();
+ ~TokenizeNodeIteratorState();
void reset(PlanState&);
};
-class TokenizeIterator : public NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>
+class TokenizeNodeIterator : public NaryBaseIterator<TokenizeNodeIterator, TokenizeNodeIteratorState>
{
protected:
store::Item_t token_qname_; //
@@ -478,20 +478,20 @@
store::Item_t value_qname_; //
store::Item_t ref_qname_; //
public:
- SERIALIZABLE_CLASS(TokenizeIterator);
+ SERIALIZABLE_CLASS(TokenizeNodeIterator);
- SERIALIZABLE_CLASS_CONSTRUCTOR2T(TokenizeIterator,
- NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>);
+ SERIALIZABLE_CLASS_CONSTRUCTOR2T(TokenizeNodeIterator,
+ NaryBaseIterator<TokenizeNodeIterator, TokenizeNodeIteratorState>);
void serialize( ::zorba::serialization::Archiver& ar);
- TokenizeIterator(
+ TokenizeNodeIterator(
static_context* sctx,
const QueryLoc& loc,
std::vector<PlanIter_t>& children)
;
- virtual ~TokenizeIterator();
+ virtual ~TokenizeNodeIterator();
public:
void initMembers();
=== modified file 'src/runtime/spec/full_text/ft_module.xml'
--- src/runtime/spec/full_text/ft_module.xml 2012-05-08 23:49:22 +0000
+++ src/runtime/spec/full_text/ft_module.xml 2012-05-17 18:22:21 +0000
@@ -167,7 +167,7 @@
</zorba:state>
</zorba:iterator>
-<zorba:iterator name="TokenizeIterator"
+<zorba:iterator name="TokenizeNodeIterator"
generateResetImpl="true"
generateSerialize="false"
generateConstructor="false"
=== modified file 'src/runtime/visitors/pregenerated/planiter_visitor.h'
--- src/runtime/visitors/pregenerated/planiter_visitor.h 2012-05-08 23:49:22 +0000
+++ src/runtime/visitors/pregenerated/planiter_visitor.h 2012-05-17 18:22:21 +0000
@@ -227,7 +227,7 @@
class ThesaurusLookupIterator;
#endif
#ifndef ZORBA_NO_FULL_TEXT
- class TokenizeIterator;
+ class TokenizeNodeIterator;
#endif
#ifndef ZORBA_NO_FULL_TEXT
class TokenizerPropertiesIterator;
@@ -951,8 +951,8 @@
virtual void endVisit ( const ThesaurusLookupIterator& ) = 0;
#endif
#ifndef ZORBA_NO_FULL_TEXT
- virtual void beginVisit ( const TokenizeIterator& ) = 0;
- virtual void endVisit ( const TokenizeIterator& ) = 0;
+ virtual void beginVisit ( const TokenizeNodeIterator& ) = 0;
+ virtual void endVisit ( const TokenizeNodeIterator& ) = 0;
#endif
#ifndef ZORBA_NO_FULL_TEXT
virtual void beginVisit ( const TokenizerPropertiesIterator& ) = 0;
=== modified file 'src/runtime/visitors/pregenerated/printer_visitor.cpp'
--- src/runtime/visitors/pregenerated/printer_visitor.cpp 2012-05-08 23:49:22 +0000
+++ src/runtime/visitors/pregenerated/printer_visitor.cpp 2012-05-17 18:22:21 +0000
@@ -1412,18 +1412,18 @@
#endif
#ifndef ZORBA_NO_FULL_TEXT
-// <TokenizeIterator>
-void PrinterVisitor::beginVisit ( const TokenizeIterator& a) {
- thePrinter.startBeginVisit("TokenizeIterator", ++theId);
+// <TokenizeNodeIterator>
+void PrinterVisitor::beginVisit ( const TokenizeNodeIterator& a) {
+ thePrinter.startBeginVisit("TokenizeNodeIterator", ++theId);
printCommons( &a, theId );
thePrinter.endBeginVisit( theId );
}
-void PrinterVisitor::endVisit ( const TokenizeIterator& ) {
+void PrinterVisitor::endVisit ( const TokenizeNodeIterator& ) {
thePrinter.startEndVisit();
thePrinter.endEndVisit();
}
-// </TokenizeIterator>
+// </TokenizeNodeIterator>
#endif
#ifndef ZORBA_NO_FULL_TEXT
=== modified file 'src/runtime/visitors/pregenerated/printer_visitor.h'
--- src/runtime/visitors/pregenerated/printer_visitor.h 2012-05-08 23:49:22 +0000
+++ src/runtime/visitors/pregenerated/printer_visitor.h 2012-05-17 18:22:21 +0000
@@ -348,8 +348,8 @@
#endif
#ifndef ZORBA_NO_FULL_TEXT
- void beginVisit( const TokenizeIterator& );
- void endVisit ( const TokenizeIterator& );
+ void beginVisit( const TokenizeNodeIterator& );
+ void endVisit ( const TokenizeNodeIterator& );
#endif
#ifndef ZORBA_NO_FULL_TEXT
=== renamed file 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-1.xml.res' => 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-node-1.xml.res'
=== renamed file 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-2.xml.res' => 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-node-2.xml.res'
=== renamed file 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-3.xml.res' => 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-node-3.xml.res'
=== renamed file 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-4.xml.res' => 'test/rbkt/ExpQueryResults/zorba/fulltext/ft-module-tokenize-node-4.xml.res'
=== renamed file 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq' => 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-1.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-1.xq 2012-05-08 17:24:54 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-1.xq 2012-05-17 18:22:21 +0000
@@ -2,7 +2,7 @@
import schema namespace fts = "http://www.zorba-xquery.com/modules/full-text";
let $doc := <msg>hello, world</msg>
-let $tokens := ft:tokenize( $doc, xs:language("en") )
+let $tokens := ft:tokenize-node( $doc, xs:language("en") )
let $t1 := validate { $tokens[1] }
let $t2 := validate { $tokens[2] }
=== renamed file 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq' => 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-2.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq 2012-05-05 11:37:42 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-2.xq 2012-05-17 18:22:21 +0000
@@ -1,9 +1,8 @@
import module namespace ft = "http://www.zorba-xquery.com/modules/full-text";
-
import schema namespace fts = "http://www.zorba-xquery.com/modules/full-text";
let $doc := <msg xml:lang="es">hola, mundo</msg>
-let $tokens := ft:tokenize( $doc )
+let $tokens := ft:tokenize-node( $doc )
let $t1 := validate { $tokens[1] }
let $t2 := validate { $tokens[2] }
=== renamed file 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq' => 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-3.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-3.xq 2012-05-05 16:28:22 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-3.xq 2012-05-17 18:22:21 +0000
@@ -4,7 +4,7 @@
import schema namespace fts = "http://www.zorba-xquery.com/modules/full-text";
let $x := <p xml:lang="en">Houston, we have a <em>problem</em>!</p>
-let $tokens := ft:tokenize( $x )
+let $tokens := ft:tokenize-node( $x )
let $node-ref := (validate { $tokens[5] })/@node-ref
let $node := ref:node-by-reference( $node-ref )
return $node instance of text()
=== renamed file 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq' => 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-4.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-4.xq 2012-05-05 16:28:22 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-node-4.xq 2012-05-17 18:22:21 +0000
@@ -4,7 +4,7 @@
import schema namespace fts = "http://www.zorba-xquery.com/modules/full-text";
let $x := <msg xml:lang="en" content="Houston, we have a problem!"/>
-let $tokens := ft:tokenize( $x/@content )
+let $tokens := ft:tokenize-node( $x/@content )
let $node-ref := (validate { $tokens[5] }) /@node-ref
let $node := ref:node-by-reference( $node-ref )
return $node instance of attribute(content)
Follow ups
-
[Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
From: noreply, 2012-05-18
-
[Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
From: Zorba Build Bot, 2012-05-18
-
[Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
From: Zorba Build Bot, 2012-05-18
-
[Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
From: Matthias Brantner, 2012-05-18
-
Re: [Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
From: Matthias Brantner, 2012-05-18
-
[Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
From: Zorba Build Bot, 2012-05-17
-
Re: [Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
From: Zorba Build Bot, 2012-05-17
-
[Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
From: Zorba Build Bot, 2012-05-17
-
[Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
From: Matthias Brantner, 2012-05-17
-
Re: [Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
From: Paul J. Lucas, 2012-05-17