zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #15945
[Merge] lp:~zorba-coders/zorba/canonicalize-core into lp:zorba
Juan Zacarias has proposed merging lp:~zorba-coders/zorba/canonicalize-core into lp:zorba.
Commit message:
Added Canonicalize function to modules/xml
Requested reviews:
Matthias Brantner (matthias-brantner)
Sorin Marian Nasoi (sorin.marian.nasoi)
Luis Rodriguez Gonzalez (kuraru)
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/canonicalize-core/+merge/135777
--
https://code.launchpad.net/~zorba-coders/zorba/canonicalize-core/+merge/135777
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'ChangeLog'
--- ChangeLog 2012-11-20 01:01:49 +0000
+++ ChangeLog 2012-11-22 21:13:24 +0000
@@ -7,6 +7,7 @@
* (bug #1039284) Implemented jn:json-doc().
* (bug #867363) Added http-uri-resolution feature to disable HTTP
when resolving URIs.
+ * Added canonicalize function to modules/xml.
Optimizations:
* Various optimizations in the implementation of the optimizer rules.
=== modified file 'modules/com/zorba-xquery/www/modules/xml.xq'
--- modules/com/zorba-xquery/www/modules/xml.xq 2012-09-19 21:16:15 +0000
+++ modules/com/zorba-xquery/www/modules/xml.xq 2012-11-22 21:13:24 +0000
@@ -418,3 +418,17 @@
return
parse-xml:parse($xml-string, $new_options)
};
+
+(:~
+ : canonicalize a given XML.
+ :
+ : @param $xml-string the XML as string.
+ :
+ : @return the canonicalized XML as string.
+ :
+ : @error s:CANO0001 invalid input.
+ :)
+declare function parse-xml:canonicalize(
+ $xml-string as xs:string
+ ) as xs:string external;
+
=== modified file 'modules/w3c/xpath_functions.xq'
--- modules/w3c/xpath_functions.xq 2012-09-19 21:16:15 +0000
+++ modules/w3c/xpath_functions.xq 2012-11-22 21:13:24 +0000
@@ -1056,3 +1056,5 @@
: @see for semantics please check <a href="http://www.w3.org/TR/xpath-functions-30/#func-uri-collection">fn:uri-collection</a>
:)
declare function fn:uri-collection($arg as xs:string?) as xs:anyURI* external;
+
+declare function fn:canonicalize($xml-string as xs:string) as xs:string
=== modified file 'src/functions/func_parse_fragment_impl.cpp'
--- src/functions/func_parse_fragment_impl.cpp 2012-05-16 15:23:50 +0000
+++ src/functions/func_parse_fragment_impl.cpp 2012-11-22 21:13:24 +0000
@@ -54,6 +54,7 @@
lParseOptType,
GENV_TYPESYSTEM.ANY_NODE_TYPE_STAR),
FunctionConsts::FN_ZORBA_XML_PARSE_2);
+
}
}
=== modified file 'src/functions/pregenerated/func_parse_fragment.cpp'
--- src/functions/pregenerated/func_parse_fragment.cpp 2012-10-08 12:09:36 +0000
+++ src/functions/pregenerated/func_parse_fragment.cpp 2012-11-22 21:13:24 +0000
@@ -41,6 +41,16 @@
return new FnZorbaParseXmlFragmentIterator(sctx, loc, argv);
}
+PlanIter_t fn_zorba_xml_canonicalize::codegen(
+ CompilerCB*,
+ static_context* sctx,
+ const QueryLoc& loc,
+ std::vector<PlanIter_t>& argv,
+ expr& ann) const
+{
+ return new FnZorbaCanonicalizeIterator(sctx, loc, argv);
+}
+
PlanIter_t fn_parse_xml_fragment_3_0::codegen(
CompilerCB*,
static_context* sctx,
@@ -56,6 +66,18 @@
{
+ DECL_WITH_KIND(sctx, fn_zorba_xml_canonicalize,
+ (createQName("http://www.zorba-xquery.com/modules/xml","","canonicalize"),
+ GENV_TYPESYSTEM.STRING_TYPE_ONE,
+ GENV_TYPESYSTEM.STRING_TYPE_ONE),
+ FunctionConsts::FN_ZORBA_XML_CANONICALIZE_1);
+
+ }
+
+
+
+
+ {
DECL_WITH_KIND(sctx, fn_parse_xml_fragment_3_0,
(createQName("http://www.w3.org/2005/xpath-functions","","parse-xml-fragment"),
GENV_TYPESYSTEM.STRING_TYPE_QUESTION,
=== modified file 'src/functions/pregenerated/func_parse_fragment.h'
--- src/functions/pregenerated/func_parse_fragment.h 2012-10-08 12:09:36 +0000
+++ src/functions/pregenerated/func_parse_fragment.h 2012-11-22 21:13:24 +0000
@@ -55,6 +55,23 @@
};
+//fn-zorba-xml:canonicalize
+class fn_zorba_xml_canonicalize : public function
+{
+public:
+ fn_zorba_xml_canonicalize(const signature& sig, FunctionConsts::FunctionKind kind)
+ :
+ function(sig, kind)
+ {
+
+ }
+
+ bool accessesDynCtx() const { return true; }
+
+ CODEGEN_DECL();
+};
+
+
//fn:parse-xml-fragment
class fn_parse_xml_fragment_3_0 : public function
{
=== modified file 'src/functions/pregenerated/function_enum.h'
--- src/functions/pregenerated/function_enum.h 2012-10-22 20:06:08 +0000
+++ src/functions/pregenerated/function_enum.h 2012-11-22 21:13:24 +0000
@@ -344,6 +344,7 @@
FN_FORMAT_INTEGER_2,
FN_FORMAT_INTEGER_3,
FN_ZORBA_XML_PARSE_2,
+ FN_ZORBA_XML_CANONICALIZE_1,
FN_PARSE_XML_FRAGMENT_1,
FN_PARSE_XML_1,
FN_SERIALIZE_1,
=== modified file 'src/runtime/parsing_and_serializing/parse_fragment_impl.cpp'
--- src/runtime/parsing_and_serializing/parse_fragment_impl.cpp 2012-09-19 21:16:15 +0000
+++ src/runtime/parsing_and_serializing/parse_fragment_impl.cpp 2012-11-22 21:13:24 +0000
@@ -32,6 +32,10 @@
#include "types/schema/schema.h"
#include "types/schema/validate.h"
+#include <libxml/tree.h>
+#include <libxml/parser.h>
+#include <libxml/xmlreader.h>
+#include <libxml/c14n.h>
namespace zorba
{
@@ -306,6 +310,49 @@
STACK_END(state);
}
+/*******************************************************************************
+ 14.9.1.1 fn-zorba-xml:canonicalize
+********************************************************************************/
+bool FnZorbaCanonicalizeIterator::nextImpl(store::Item_t& result, PlanState& planState) const
+{
+ zstring lDocString;
+ xmlDocPtr lDoc;
+ xmlChar* lResult;
+
+ FnZorbaCanonicalizeIteratorState* state;
+ DEFAULT_STACK_INIT(FnZorbaCanonicalizeIteratorState, state, planState);
+ if (consumeNext(result, theChildren[0].getp(), planState))
+ {
+ try
+ {
+ result->getStringValue2(lDocString);
+ lDoc = xmlReadMemory(lDocString.c_str(), lDocString.size(), "input.xml", NULL, XML_PARSE_NOERROR);
+
+ if (!lDoc)
+ {
+ zstring lErrorMsg;
+ lErrorMsg = "\"" + lDocString + "\"";
+ throw XQUERY_EXCEPTION(err::FODC0006, ERROR_PARAMS("parse-xml:canonicalize()", lErrorMsg ), ERROR_LOC(loc));
+ }
+
+ xmlC14NDocDumpMemory(lDoc, NULL, XML_C14N_1_1, NULL, 1, &lResult);
+ lDocString = zstring((char*)lResult);
+ xmlFree(lResult);
+ xmlFreeDoc(lDoc);
+ }
+ catch ( std::exception const& e)
+ {
+ throw XQUERY_EXCEPTION(err::FODC0006, ERROR_PARAMS("parse-xml:canonicalize()", e.what() ), ERROR_LOC(loc));
+ }
+ STACK_PUSH(GENV_ITEMFACTORY->createString(result, lDocString), state);
+ }
+ STACK_END(state);
+}
+
+void FnZorbaCanonicalizeIteratorState::reset(PlanState& planState)
+{
+ PlanIteratorState::reset(planState);
+}
/*******************************************************************************
14.9.2 fn:parse-xml-fragment
=== modified file 'src/runtime/parsing_and_serializing/pregenerated/parse_fragment.cpp'
--- src/runtime/parsing_and_serializing/pregenerated/parse_fragment.cpp 2012-10-08 12:09:36 +0000
+++ src/runtime/parsing_and_serializing/pregenerated/parse_fragment.cpp 2012-11-22 21:13:24 +0000
@@ -68,6 +68,42 @@
// </FnZorbaParseXmlFragmentIterator>
+// <FnZorbaCanonicalizeIterator>
+SERIALIZABLE_CLASS_VERSIONS(FnZorbaCanonicalizeIterator)
+
+void FnZorbaCanonicalizeIterator::serialize(::zorba::serialization::Archiver& ar)
+{
+ serialize_baseclass(ar,
+ (NaryBaseIterator<FnZorbaCanonicalizeIterator, FnZorbaCanonicalizeIteratorState>*)this);
+}
+
+
+void FnZorbaCanonicalizeIterator::accept(PlanIterVisitor& v) const
+{
+ v.beginVisit(*this);
+
+ std::vector<PlanIter_t>::const_iterator lIter = theChildren.begin();
+ std::vector<PlanIter_t>::const_iterator lEnd = theChildren.end();
+ for ( ; lIter != lEnd; ++lIter ){
+ (*lIter)->accept(v);
+ }
+
+ v.endVisit(*this);
+}
+
+FnZorbaCanonicalizeIterator::~FnZorbaCanonicalizeIterator() {}
+
+FnZorbaCanonicalizeIteratorState::FnZorbaCanonicalizeIteratorState() {}
+
+FnZorbaCanonicalizeIteratorState::~FnZorbaCanonicalizeIteratorState() {}
+
+
+void FnZorbaCanonicalizeIteratorState::init(PlanState& planState) {
+ PlanIteratorState::init(planState);
+}
+// </FnZorbaCanonicalizeIterator>
+
+
// <FnParseXmlFragmentIterator>
SERIALIZABLE_CLASS_VERSIONS(FnParseXmlFragmentIterator)
=== modified file 'src/runtime/parsing_and_serializing/pregenerated/parse_fragment.h'
--- src/runtime/parsing_and_serializing/pregenerated/parse_fragment.h 2012-10-08 12:09:36 +0000
+++ src/runtime/parsing_and_serializing/pregenerated/parse_fragment.h 2012-11-22 21:13:24 +0000
@@ -82,6 +82,49 @@
/**
+ * fn-zorba-xml:canonicalize
+ * Author: Zorba Team
+ */
+class FnZorbaCanonicalizeIteratorState : public PlanIteratorState
+{
+public:
+ zstring theDocString; //
+
+ FnZorbaCanonicalizeIteratorState();
+
+ ~FnZorbaCanonicalizeIteratorState();
+
+ void init(PlanState&);
+ void reset(PlanState&);
+};
+
+class FnZorbaCanonicalizeIterator : public NaryBaseIterator<FnZorbaCanonicalizeIterator, FnZorbaCanonicalizeIteratorState>
+{
+public:
+ SERIALIZABLE_CLASS(FnZorbaCanonicalizeIterator);
+
+ SERIALIZABLE_CLASS_CONSTRUCTOR2T(FnZorbaCanonicalizeIterator,
+ NaryBaseIterator<FnZorbaCanonicalizeIterator, FnZorbaCanonicalizeIteratorState>);
+
+ void serialize( ::zorba::serialization::Archiver& ar);
+
+ FnZorbaCanonicalizeIterator(
+ static_context* sctx,
+ const QueryLoc& loc,
+ std::vector<PlanIter_t>& children)
+ :
+ NaryBaseIterator<FnZorbaCanonicalizeIterator, FnZorbaCanonicalizeIteratorState>(sctx, loc, children)
+ {}
+
+ virtual ~FnZorbaCanonicalizeIterator();
+
+ void accept(PlanIterVisitor& v) const;
+
+ bool nextImpl(store::Item_t& result, PlanState& aPlanState) const;
+};
+
+
+/**
* fn:parse-xml-fragment
* Author: Zorba Team
*/
=== modified file 'src/runtime/pregenerated/iterator_enum.h'
--- src/runtime/pregenerated/iterator_enum.h 2012-10-15 13:39:36 +0000
+++ src/runtime/pregenerated/iterator_enum.h 2012-11-22 21:13:24 +0000
@@ -253,6 +253,7 @@
TYPE_FormatNumberIterator,
TYPE_FormatIntegerIterator,
TYPE_FnZorbaParseXmlFragmentIterator,
+ TYPE_FnZorbaCanonicalizeIterator,
TYPE_FnParseXmlFragmentIterator,
TYPE_FnParseXmlIterator,
TYPE_FnSerializeIterator,
=== modified file 'src/runtime/spec/parsing_and_serializing/parse_fragment.xml'
--- src/runtime/spec/parsing_and_serializing/parse_fragment.xml 2012-09-19 21:16:15 +0000
+++ src/runtime/spec/parsing_and_serializing/parse_fragment.xml 2012-11-22 21:13:24 +0000
@@ -46,6 +46,32 @@
</zorba:state>
</zorba:iterator>
+<!--
+/*******************************************************************************
+ * 14.9.1.1 parse-xml:canonicalize
+********************************************************************************/
+-->
+ <zorba:iterator name="FnZorbaCanonicalizeIterator">
+
+ <zorba:description author="Zorba Team">fn-zorba-xml:canonicalize</zorba:description>
+
+ <zorba:function>
+ <zorba:signature localname="canonicalize" prefix="fn-zorba-xml">
+ <zorba:param>xs:string</zorba:param> <!-- string to canonicalize -->
+ <zorba:output>xs:string</zorba:output>
+ </zorba:signature>
+
+ <zorba:methods>
+ <zorba:accessesDynCtx returnValue="true"/>
+ </zorba:methods>
+
+ </zorba:function>
+
+ <zorba:state generateReset="false">
+ <zorba:member type="zstring" name="theDocString"/>
+ </zorba:state>
+
+ </zorba:iterator>
<!--
/*******************************************************************************
=== modified file 'src/runtime/visitors/pregenerated/planiter_visitor.h'
--- src/runtime/visitors/pregenerated/planiter_visitor.h 2012-10-15 13:39:36 +0000
+++ src/runtime/visitors/pregenerated/planiter_visitor.h 2012-11-22 21:13:24 +0000
@@ -532,6 +532,8 @@
class FnZorbaParseXmlFragmentIterator;
+ class FnZorbaCanonicalizeIterator;
+
class FnParseXmlFragmentIterator;
class FnParseXmlIterator;
@@ -1476,6 +1478,9 @@
virtual void beginVisit ( const FnZorbaParseXmlFragmentIterator& ) = 0;
virtual void endVisit ( const FnZorbaParseXmlFragmentIterator& ) = 0;
+ virtual void beginVisit ( const FnZorbaCanonicalizeIterator& ) = 0;
+ virtual void endVisit ( const FnZorbaCanonicalizeIterator& ) = 0;
+
virtual void beginVisit ( const FnParseXmlFragmentIterator& ) = 0;
virtual void endVisit ( const FnParseXmlFragmentIterator& ) = 0;
=== modified file 'src/runtime/visitors/pregenerated/printer_visitor.cpp'
--- src/runtime/visitors/pregenerated/printer_visitor.cpp 2012-10-15 13:39:36 +0000
+++ src/runtime/visitors/pregenerated/printer_visitor.cpp 2012-11-22 21:13:24 +0000
@@ -3272,6 +3272,20 @@
// </FnZorbaParseXmlFragmentIterator>
+// <FnZorbaCanonicalizeIterator>
+void PrinterVisitor::beginVisit ( const FnZorbaCanonicalizeIterator& a) {
+ thePrinter.startBeginVisit("FnZorbaCanonicalizeIterator", ++theId);
+ printCommons( &a, theId );
+ thePrinter.endBeginVisit( theId );
+}
+
+void PrinterVisitor::endVisit ( const FnZorbaCanonicalizeIterator& ) {
+ thePrinter.startEndVisit();
+ thePrinter.endEndVisit();
+}
+// </FnZorbaCanonicalizeIterator>
+
+
// <FnParseXmlFragmentIterator>
void PrinterVisitor::beginVisit ( const FnParseXmlFragmentIterator& a) {
thePrinter.startBeginVisit("FnParseXmlFragmentIterator", ++theId);
=== modified file 'src/runtime/visitors/pregenerated/printer_visitor.h'
--- src/runtime/visitors/pregenerated/printer_visitor.h 2012-10-15 13:39:36 +0000
+++ src/runtime/visitors/pregenerated/printer_visitor.h 2012-11-22 21:13:24 +0000
@@ -818,6 +818,9 @@
void beginVisit( const FnZorbaParseXmlFragmentIterator& );
void endVisit ( const FnZorbaParseXmlFragmentIterator& );
+ void beginVisit( const FnZorbaCanonicalizeIterator& );
+ void endVisit ( const FnZorbaCanonicalizeIterator& );
+
void beginVisit( const FnParseXmlFragmentIterator& );
void endVisit ( const FnParseXmlFragmentIterator& );
=== added file 'test/rbkt/ExpQueryResults/zorba/parsing_and_serializing/canonicalize-1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/parsing_and_serializing/canonicalize-1.xml.res 1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/parsing_and_serializing/canonicalize-1.xml.res 2012-11-22 21:13:24 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<x a="2" b="1"></x>
\ No newline at end of file
=== added file 'test/rbkt/Queries/zorba/parsing_and_serializing/canonicalize-1.xq'
--- test/rbkt/Queries/zorba/parsing_and_serializing/canonicalize-1.xq 1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/parsing_and_serializing/canonicalize-1.xq 2012-11-22 21:13:24 +0000
@@ -0,0 +1,4 @@
+import module namespace parse-xml = "http://www.zorba-xquery.com/modules/xml";
+
+let $xml := "<x b='1' a='2'/>"
+return parse-xml:canonicalize($xml)
Follow ups