zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #09219
[Merge] lp:~zorba-coders/zorba/feature-ft_module into lp:zorba
Matthias Brantner has proposed merging lp:~zorba-coders/zorba/feature-ft_module into lp:zorba.
Requested reviews:
Paul J. Lucas (paul-lucas)
Related bugs:
Bug #944795 in Zorba: "XQDoc doesn't handle & in URLs"
https://bugs.launchpad.net/zorba/+bug/944795
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/104830
optimized ft:tokenize (no validation of tokens + factorized creation of qnames)
--
https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/104830
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/full_text/ft_module_impl.cpp'
--- src/runtime/full_text/ft_module_impl.cpp 2012-04-28 00:48:03 +0000
+++ src/runtime/full_text/ft_module_impl.cpp 2012-05-05 11:42:19 +0000
@@ -453,9 +453,48 @@
///////////////////////////////////////////////////////////////////////////////
+TokenizeIterator::TokenizeIterator(
+ static_context* sctx,
+ const QueryLoc& loc,
+ std::vector<PlanIter_t>& children)
+ : NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>(sctx, loc, children)
+{
+ initMembers();
+}
+
+void TokenizeIterator::serialize( ::zorba::serialization::Archiver& ar)
+{
+ serialize_baseclass(ar,
+ (NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>*)this);
+ if (!ar.is_serializing_out())
+ {
+ initMembers();
+ }
+}
+
+void TokenizeIterator::initMembers() {
+ GENV_ITEMFACTORY->createQName(
+ token_qname_, static_context::ZORBA_FULL_TEXT_FN_NS, "", "token");
+
+ GENV_ITEMFACTORY->createQName(
+ lang_qname_, "", "", "lang");
+
+ GENV_ITEMFACTORY->createQName(
+ para_qname_, "", "", "paragraph");
+
+ GENV_ITEMFACTORY->createQName(
+ sent_qname_, "", "", "sentence");
+
+ GENV_ITEMFACTORY->createQName(
+ value_qname_, "", "", "value");
+
+ GENV_ITEMFACTORY->createQName(
+ ref_qname_, "", "", "node-ref");
+}
+
bool TokenizeIterator::nextImpl( store::Item_t &result,
PlanState &plan_state ) const {
- store::Item_t attr_name, attr_node;
+ store::Item_t node_name, attr_node;
zstring base_uri;
store::Item_t item;
iso639_1::type lang;
@@ -488,69 +527,60 @@
token = state->doc_tokens_->next();
ZORBA_ASSERT( token );
- if ( state->token_qname_.isNull() )
- GENV_ITEMFACTORY->createQName(
- state->token_qname_, static_context::ZORBA_FULL_TEXT_FN_NS, "",
- "token"
- );
-
base_uri = static_context::ZORBA_FULL_TEXT_FN_NS;
type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+ node_name = token_qname_;
GENV_ITEMFACTORY->createElementNode(
- result, nullptr, state->token_qname_, type_name, false, false,
+ result, nullptr, node_name, type_name, false, false,
ns_bindings, base_uri
);
if ( token->lang() ) {
value_string = iso639_1::string_of[ token->lang() ];
- GENV_ITEMFACTORY->createQName( attr_name, "", "", "lang" );
GENV_ITEMFACTORY->createString( item, value_string );
type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+ node_name = lang_qname_;
GENV_ITEMFACTORY->createAttributeNode(
- attr_node, result, attr_name, type_name, item
+ attr_node, result, node_name, type_name, item
);
}
ztd::to_string( token->para(), &value_string );
- GENV_ITEMFACTORY->createQName( attr_name, "", "", "paragraph" );
GENV_ITEMFACTORY->createString( item, value_string );
type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+ node_name = para_qname_;
GENV_ITEMFACTORY->createAttributeNode(
- attr_node, result, attr_name, type_name, item
+ attr_node, result, node_name, type_name, item
);
ztd::to_string( token->sent(), &value_string );
- GENV_ITEMFACTORY->createQName( attr_name, "", "", "sentence" );
GENV_ITEMFACTORY->createString( item, value_string );
type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+ node_name = sent_qname_;
GENV_ITEMFACTORY->createAttributeNode(
- attr_node, result, attr_name, type_name, item
+ attr_node, result, node_name, type_name, item
);
value_string = token->value();
- GENV_ITEMFACTORY->createQName( attr_name, "", "", "value" );
GENV_ITEMFACTORY->createString( item, value_string );
type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+ node_name = value_qname_;
GENV_ITEMFACTORY->createAttributeNode(
- attr_node, result, attr_name, type_name, item
+ attr_node, result, node_name, type_name, item
);
if ( store::Item const *const token_item = token->item() ) {
if ( GENV_STORE.getNodeReference( item, token_item ) ) {
item->getStringValue2( value_string );
- GENV_ITEMFACTORY->createQName( attr_name, "", "", "node-ref" );
GENV_ITEMFACTORY->createString( item, value_string );
type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+ node_name = ref_qname_;
GENV_ITEMFACTORY->createAttributeNode(
- attr_node, result, attr_name, type_name, item
+ attr_node, result, node_name, type_name, item
);
}
}
-#ifndef ZORBA_NO_XMLSCHEMA
- sctx->validate( result, result, StaticContextConsts::strict_validation );
-#endif /* ZORBA_NO_XMLSCHEMA */
-
STACK_PUSH( true, state );
} // while
}
=== modified file 'src/runtime/full_text/pregenerated/ft_module.h'
--- src/runtime/full_text/pregenerated/ft_module.h 2012-04-20 20:41:53 +0000
+++ src/runtime/full_text/pregenerated/ft_module.h 2012-05-05 11:42:19 +0000
@@ -424,7 +424,6 @@
public:
store::Item_t doc_item_; //
FTTokenIterator_t doc_tokens_; //
- store::Item_t token_qname_; //
TokenizeIteratorState();
@@ -435,28 +434,31 @@
class TokenizeIterator : public NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>
{
+protected:
+ store::Item_t token_qname_; //
+ store::Item_t lang_qname_; //
+ store::Item_t para_qname_; //
+ store::Item_t sent_qname_; //
+ store::Item_t value_qname_; //
+ store::Item_t ref_qname_; //
public:
SERIALIZABLE_CLASS(TokenizeIterator);
SERIALIZABLE_CLASS_CONSTRUCTOR2T(TokenizeIterator,
NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>);
- void serialize( ::zorba::serialization::Archiver& ar)
- {
- serialize_baseclass(ar,
- (NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>*)this);
- }
+ void serialize( ::zorba::serialization::Archiver& ar);
TokenizeIterator(
static_context* sctx,
const QueryLoc& loc,
std::vector<PlanIter_t>& children)
- :
- NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>(sctx, loc, children)
- {}
+ ;
virtual ~TokenizeIterator();
+public:
+ void initMembers();
void accept(PlanIterVisitor& v) const;
bool nextImpl(store::Item_t& result, PlanState& aPlanState) const;
=== modified file 'src/runtime/spec/full_text/ft_module.xml'
--- src/runtime/spec/full_text/ft_module.xml 2012-04-26 13:25:13 +0000
+++ src/runtime/spec/full_text/ft_module.xml 2012-05-05 11:42:19 +0000
@@ -165,14 +165,24 @@
<zorba:iterator name="TokenizeIterator"
generateResetImpl="true"
+ generateSerialize="false"
+ generateConstructor="false"
preprocessorGuard="#ifndef ZORBA_NO_FULL_TEXT">
<zorba:state generateInit="use-default">
<zorba:member type="store::Item_t" name="doc_item_"/>
<zorba:member type="FTTokenIterator_t" name="doc_tokens_"/>
- <zorba:member type="store::Item_t" name="token_qname_"/>
</zorba:state>
+ <zorba:member type="store::Item_t" name="token_qname_"/>
+ <zorba:member type="store::Item_t" name="lang_qname_"/>
+ <zorba:member type="store::Item_t" name="para_qname_"/>
+ <zorba:member type="store::Item_t" name="sent_qname_"/>
+ <zorba:member type="store::Item_t" name="value_qname_"/>
+ <zorba:member type="store::Item_t" name="ref_qname_"/>
+
+ <zorba:method name="initMembers" return="void"/>
+
</zorba:iterator>
<zorba:iterator name="TokenizerPropertiesIterator"
=== modified file 'src/runtime/spec/iterator_h.xq'
--- src/runtime/spec/iterator_h.xq 2012-05-03 12:31:51 +0000
+++ src/runtime/spec/iterator_h.xq 2012-05-05 11:42:19 +0000
@@ -153,16 +153,21 @@
local:children-decl($iter),
local:add-constructor-param($iter),
')',
- $gen:newline, gen:indent(2), ': ',
- $gen:newline, gen:indent(2), $base, '(sctx, loc', local:children-args($iter),
- if ($iter/@base)
- then concat(', ',
- string-join(
- for $base-param in $iter/zorba:constructor/zorba:parameter[@base = "true"]
- return $base-param/@name, ', '))
- else "",
- local:add-constructor-param-2($iter),
- $gen:newline, gen:indent(1), '{}',
+ $gen:newline, gen:indent(2),
+ if (not(exists($iter/@generateConstructor)) or $iter/@generateConstructor = "true")
+ then concat(
+ ': ',
+ $gen:newline, gen:indent(2), $base, '(sctx, loc', local:children-args($iter),
+ if ($iter/@base)
+ then concat(', ',
+ string-join(
+ for $base-param in $iter/zorba:constructor/zorba:parameter[@base = "true"]
+ return $base-param/@name, ', '))
+ else "",
+ local:add-constructor-param-2($iter),
+ $gen:newline, gen:indent(1), '{}')
+ else
+ ';',
$gen:newline, $gen:newline
)
};
=== modified file 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq 2012-03-08 18:46:22 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq 2012-05-05 11:42:19 +0000
@@ -1,9 +1,11 @@
import module namespace ft = "http://www.zorba-xquery.com/modules/full-text";
+import schema namespace fts = "http://www.zorba-xquery.com/modules/full-text";
+
let $doc := <msg xml:lang="es">hola, mundo</msg>
let $tokens := ft:tokenize( $doc )
-let $t1 := $tokens[1]
-let $t2 := $tokens[2]
+let $t1 := validate { $tokens[1] }
+let $t2 := validate { $tokens[2] }
return $t1/@value = "hola"
and $t1/@lang = "es"
Follow ups