← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/canonicalize-core into lp:zorba

 

Juan Zacarias has proposed merging lp:~zorba-coders/zorba/canonicalize-core into lp:zorba.

Commit message:
Added Canonicalize function to modules/xml

Requested reviews:
  Matthias Brantner (matthias-brantner)
  Sorin Marian Nasoi (sorin.marian.nasoi)
  Luis Rodriguez Gonzalez (kuraru)

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/canonicalize-core/+merge/135777
-- 
https://code.launchpad.net/~zorba-coders/zorba/canonicalize-core/+merge/135777
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'ChangeLog'
--- ChangeLog	2012-11-20 01:01:49 +0000
+++ ChangeLog	2012-11-22 21:13:24 +0000
@@ -7,6 +7,7 @@
   * (bug #1039284) Implemented jn:json-doc().
   * (bug #867363) Added http-uri-resolution feature to disable HTTP
 	when resolving URIs.
+  * Added canonicalize function to modules/xml.
 
 Optimizations:
   * Various optimizations in the implementation of the optimizer rules.

=== modified file 'modules/com/zorba-xquery/www/modules/xml.xq'
--- modules/com/zorba-xquery/www/modules/xml.xq	2012-09-19 21:16:15 +0000
+++ modules/com/zorba-xquery/www/modules/xml.xq	2012-11-22 21:13:24 +0000
@@ -418,3 +418,17 @@
   return 
     parse-xml:parse($xml-string, $new_options)
 };
+
+(:~
+ : canonicalize a given XML.
+ :
+ : @param $xml-string the XML as string.
+ :
+ : @return the canonicalized XML as string.
+ :
+ : @error s:CANO0001 invalid input.
+ :)
+declare function parse-xml:canonicalize(
+  $xml-string as xs:string
+  ) as xs:string external;
+

=== modified file 'modules/w3c/xpath_functions.xq'
--- modules/w3c/xpath_functions.xq	2012-09-19 21:16:15 +0000
+++ modules/w3c/xpath_functions.xq	2012-11-22 21:13:24 +0000
@@ -1056,3 +1056,5 @@
  : @see for semantics please check <a href="http://www.w3.org/TR/xpath-functions-30/#func-uri-collection";>fn:uri-collection</a>
  :)
 declare function fn:uri-collection($arg as xs:string?) as xs:anyURI* external;
+
+declare function fn:canonicalize($xml-string as xs:string) as xs:string 

=== modified file 'src/functions/func_parse_fragment_impl.cpp'
--- src/functions/func_parse_fragment_impl.cpp	2012-05-16 15:23:50 +0000
+++ src/functions/func_parse_fragment_impl.cpp	2012-11-22 21:13:24 +0000
@@ -54,6 +54,7 @@
         lParseOptType, 
         GENV_TYPESYSTEM.ANY_NODE_TYPE_STAR),
         FunctionConsts::FN_ZORBA_XML_PARSE_2);
+
   }
 }
 

=== modified file 'src/functions/pregenerated/func_parse_fragment.cpp'
--- src/functions/pregenerated/func_parse_fragment.cpp	2012-10-08 12:09:36 +0000
+++ src/functions/pregenerated/func_parse_fragment.cpp	2012-11-22 21:13:24 +0000
@@ -41,6 +41,16 @@
   return new FnZorbaParseXmlFragmentIterator(sctx, loc, argv);
 }
 
+PlanIter_t fn_zorba_xml_canonicalize::codegen(
+  CompilerCB*,
+  static_context* sctx,
+  const QueryLoc& loc,
+  std::vector<PlanIter_t>& argv,
+  expr& ann) const
+{
+  return new FnZorbaCanonicalizeIterator(sctx, loc, argv);
+}
+
 PlanIter_t fn_parse_xml_fragment_3_0::codegen(
   CompilerCB*,
   static_context* sctx,
@@ -56,6 +66,18 @@
 
 
       {
+    DECL_WITH_KIND(sctx, fn_zorba_xml_canonicalize,
+        (createQName("http://www.zorba-xquery.com/modules/xml","","canonicalize";), 
+        GENV_TYPESYSTEM.STRING_TYPE_ONE, 
+        GENV_TYPESYSTEM.STRING_TYPE_ONE),
+        FunctionConsts::FN_ZORBA_XML_CANONICALIZE_1);
+
+  }
+
+
+
+
+      {
     DECL_WITH_KIND(sctx, fn_parse_xml_fragment_3_0,
         (createQName("http://www.w3.org/2005/xpath-functions","","parse-xml-fragment";), 
         GENV_TYPESYSTEM.STRING_TYPE_QUESTION, 

=== modified file 'src/functions/pregenerated/func_parse_fragment.h'
--- src/functions/pregenerated/func_parse_fragment.h	2012-10-08 12:09:36 +0000
+++ src/functions/pregenerated/func_parse_fragment.h	2012-11-22 21:13:24 +0000
@@ -55,6 +55,23 @@
 };
 
 
+//fn-zorba-xml:canonicalize
+class fn_zorba_xml_canonicalize : public function
+{
+public:
+  fn_zorba_xml_canonicalize(const signature& sig, FunctionConsts::FunctionKind kind)
+    : 
+    function(sig, kind)
+  {
+
+  }
+
+  bool accessesDynCtx() const { return true; }
+
+  CODEGEN_DECL();
+};
+
+
 //fn:parse-xml-fragment
 class fn_parse_xml_fragment_3_0 : public function
 {

=== modified file 'src/functions/pregenerated/function_enum.h'
--- src/functions/pregenerated/function_enum.h	2012-10-22 20:06:08 +0000
+++ src/functions/pregenerated/function_enum.h	2012-11-22 21:13:24 +0000
@@ -344,6 +344,7 @@
   FN_FORMAT_INTEGER_2,
   FN_FORMAT_INTEGER_3,
   FN_ZORBA_XML_PARSE_2,
+  FN_ZORBA_XML_CANONICALIZE_1,
   FN_PARSE_XML_FRAGMENT_1,
   FN_PARSE_XML_1,
   FN_SERIALIZE_1,

=== modified file 'src/runtime/parsing_and_serializing/parse_fragment_impl.cpp'
--- src/runtime/parsing_and_serializing/parse_fragment_impl.cpp	2012-09-19 21:16:15 +0000
+++ src/runtime/parsing_and_serializing/parse_fragment_impl.cpp	2012-11-22 21:13:24 +0000
@@ -32,6 +32,10 @@
 #include "types/schema/schema.h"
 #include "types/schema/validate.h"
 
+#include <libxml/tree.h>
+#include <libxml/parser.h>
+#include <libxml/xmlreader.h>
+#include <libxml/c14n.h>
 
 namespace zorba
 {
@@ -306,6 +310,49 @@
   STACK_END(state);
 }
 
+/*******************************************************************************
+  14.9.1.1 fn-zorba-xml:canonicalize
+********************************************************************************/
+bool FnZorbaCanonicalizeIterator::nextImpl(store::Item_t& result, PlanState& planState) const
+{
+  zstring lDocString;
+  xmlDocPtr lDoc;
+  xmlChar* lResult;
+
+  FnZorbaCanonicalizeIteratorState* state;
+  DEFAULT_STACK_INIT(FnZorbaCanonicalizeIteratorState, state, planState);
+  if (consumeNext(result, theChildren[0].getp(), planState))
+  {
+    try
+    {
+      result->getStringValue2(lDocString);
+      lDoc = xmlReadMemory(lDocString.c_str(), lDocString.size(), "input.xml", NULL, XML_PARSE_NOERROR);
+      
+      if (!lDoc)
+      {
+        zstring lErrorMsg;
+        lErrorMsg = "\"" + lDocString + "\"";
+        throw XQUERY_EXCEPTION(err::FODC0006, ERROR_PARAMS("parse-xml:canonicalize()", lErrorMsg ), ERROR_LOC(loc));
+      }
+    
+      xmlC14NDocDumpMemory(lDoc, NULL, XML_C14N_1_1, NULL, 1, &lResult);
+      lDocString = zstring((char*)lResult);    
+      xmlFree(lResult);
+      xmlFreeDoc(lDoc);
+    }
+    catch ( std::exception const& e)
+    {
+      throw XQUERY_EXCEPTION(err::FODC0006, ERROR_PARAMS("parse-xml:canonicalize()", e.what() ), ERROR_LOC(loc));
+    }
+    STACK_PUSH(GENV_ITEMFACTORY->createString(result, lDocString), state);
+  }
+  STACK_END(state);
+}
+
+void FnZorbaCanonicalizeIteratorState::reset(PlanState& planState)
+{
+  PlanIteratorState::reset(planState);
+}
 
 /*******************************************************************************
   14.9.2 fn:parse-xml-fragment

=== modified file 'src/runtime/parsing_and_serializing/pregenerated/parse_fragment.cpp'
--- src/runtime/parsing_and_serializing/pregenerated/parse_fragment.cpp	2012-10-08 12:09:36 +0000
+++ src/runtime/parsing_and_serializing/pregenerated/parse_fragment.cpp	2012-11-22 21:13:24 +0000
@@ -68,6 +68,42 @@
 // </FnZorbaParseXmlFragmentIterator>
 
 
+// <FnZorbaCanonicalizeIterator>
+SERIALIZABLE_CLASS_VERSIONS(FnZorbaCanonicalizeIterator)
+
+void FnZorbaCanonicalizeIterator::serialize(::zorba::serialization::Archiver& ar)
+{
+  serialize_baseclass(ar,
+  (NaryBaseIterator<FnZorbaCanonicalizeIterator, FnZorbaCanonicalizeIteratorState>*)this);
+}
+
+
+void FnZorbaCanonicalizeIterator::accept(PlanIterVisitor& v) const
+{
+  v.beginVisit(*this);
+
+  std::vector<PlanIter_t>::const_iterator lIter = theChildren.begin();
+  std::vector<PlanIter_t>::const_iterator lEnd = theChildren.end();
+  for ( ; lIter != lEnd; ++lIter ){
+    (*lIter)->accept(v);
+  }
+
+  v.endVisit(*this);
+}
+
+FnZorbaCanonicalizeIterator::~FnZorbaCanonicalizeIterator() {}
+
+FnZorbaCanonicalizeIteratorState::FnZorbaCanonicalizeIteratorState() {}
+
+FnZorbaCanonicalizeIteratorState::~FnZorbaCanonicalizeIteratorState() {}
+
+
+void FnZorbaCanonicalizeIteratorState::init(PlanState& planState) {
+  PlanIteratorState::init(planState);
+}
+// </FnZorbaCanonicalizeIterator>
+
+
 // <FnParseXmlFragmentIterator>
 SERIALIZABLE_CLASS_VERSIONS(FnParseXmlFragmentIterator)
 

=== modified file 'src/runtime/parsing_and_serializing/pregenerated/parse_fragment.h'
--- src/runtime/parsing_and_serializing/pregenerated/parse_fragment.h	2012-10-08 12:09:36 +0000
+++ src/runtime/parsing_and_serializing/pregenerated/parse_fragment.h	2012-11-22 21:13:24 +0000
@@ -82,6 +82,49 @@
 
 
 /**
+ * fn-zorba-xml:canonicalize
+ * Author: Zorba Team
+ */
+class FnZorbaCanonicalizeIteratorState : public PlanIteratorState
+{
+public:
+  zstring theDocString; //
+
+  FnZorbaCanonicalizeIteratorState();
+
+  ~FnZorbaCanonicalizeIteratorState();
+
+  void init(PlanState&);
+  void reset(PlanState&);
+};
+
+class FnZorbaCanonicalizeIterator : public NaryBaseIterator<FnZorbaCanonicalizeIterator, FnZorbaCanonicalizeIteratorState>
+{ 
+public:
+  SERIALIZABLE_CLASS(FnZorbaCanonicalizeIterator);
+
+  SERIALIZABLE_CLASS_CONSTRUCTOR2T(FnZorbaCanonicalizeIterator,
+    NaryBaseIterator<FnZorbaCanonicalizeIterator, FnZorbaCanonicalizeIteratorState>);
+
+  void serialize( ::zorba::serialization::Archiver& ar);
+
+  FnZorbaCanonicalizeIterator(
+    static_context* sctx,
+    const QueryLoc& loc,
+    std::vector<PlanIter_t>& children)
+    : 
+    NaryBaseIterator<FnZorbaCanonicalizeIterator, FnZorbaCanonicalizeIteratorState>(sctx, loc, children)
+  {}
+
+  virtual ~FnZorbaCanonicalizeIterator();
+
+  void accept(PlanIterVisitor& v) const;
+
+  bool nextImpl(store::Item_t& result, PlanState& aPlanState) const;
+};
+
+
+/**
  * fn:parse-xml-fragment
  * Author: Zorba Team
  */

=== modified file 'src/runtime/pregenerated/iterator_enum.h'
--- src/runtime/pregenerated/iterator_enum.h	2012-10-15 13:39:36 +0000
+++ src/runtime/pregenerated/iterator_enum.h	2012-11-22 21:13:24 +0000
@@ -253,6 +253,7 @@
   TYPE_FormatNumberIterator,
   TYPE_FormatIntegerIterator,
   TYPE_FnZorbaParseXmlFragmentIterator,
+  TYPE_FnZorbaCanonicalizeIterator,
   TYPE_FnParseXmlFragmentIterator,
   TYPE_FnParseXmlIterator,
   TYPE_FnSerializeIterator,

=== modified file 'src/runtime/spec/parsing_and_serializing/parse_fragment.xml'
--- src/runtime/spec/parsing_and_serializing/parse_fragment.xml	2012-09-19 21:16:15 +0000
+++ src/runtime/spec/parsing_and_serializing/parse_fragment.xml	2012-11-22 21:13:24 +0000
@@ -46,6 +46,32 @@
     </zorba:state>
       
   </zorba:iterator>
+<!--
+/*******************************************************************************
+ * 14.9.1.1 parse-xml:canonicalize
+********************************************************************************/
+-->
+  <zorba:iterator name="FnZorbaCanonicalizeIterator">
+      
+    <zorba:description author="Zorba Team">fn-zorba-xml:canonicalize</zorba:description>
+    
+    <zorba:function>
+      <zorba:signature localname="canonicalize" prefix="fn-zorba-xml">
+        <zorba:param>xs:string</zorba:param>  <!-- string to canonicalize -->
+        <zorba:output>xs:string</zorba:output>
+      </zorba:signature>
+ 
+      <zorba:methods>
+        <zorba:accessesDynCtx returnValue="true"/>
+      </zorba:methods>
+
+    </zorba:function>
+
+    <zorba:state generateReset="false">
+      <zorba:member type="zstring" name="theDocString"/>
+    </zorba:state>
+      
+  </zorba:iterator>
   
 <!--
 /*******************************************************************************

=== modified file 'src/runtime/visitors/pregenerated/planiter_visitor.h'
--- src/runtime/visitors/pregenerated/planiter_visitor.h	2012-10-15 13:39:36 +0000
+++ src/runtime/visitors/pregenerated/planiter_visitor.h	2012-11-22 21:13:24 +0000
@@ -532,6 +532,8 @@
 
     class FnZorbaParseXmlFragmentIterator;
 
+    class FnZorbaCanonicalizeIterator;
+
     class FnParseXmlFragmentIterator;
 
     class FnParseXmlIterator;
@@ -1476,6 +1478,9 @@
     virtual void beginVisit ( const FnZorbaParseXmlFragmentIterator& ) = 0;
     virtual void endVisit   ( const FnZorbaParseXmlFragmentIterator& ) = 0;
 
+    virtual void beginVisit ( const FnZorbaCanonicalizeIterator& ) = 0;
+    virtual void endVisit   ( const FnZorbaCanonicalizeIterator& ) = 0;
+
     virtual void beginVisit ( const FnParseXmlFragmentIterator& ) = 0;
     virtual void endVisit   ( const FnParseXmlFragmentIterator& ) = 0;
 

=== modified file 'src/runtime/visitors/pregenerated/printer_visitor.cpp'
--- src/runtime/visitors/pregenerated/printer_visitor.cpp	2012-10-15 13:39:36 +0000
+++ src/runtime/visitors/pregenerated/printer_visitor.cpp	2012-11-22 21:13:24 +0000
@@ -3272,6 +3272,20 @@
 // </FnZorbaParseXmlFragmentIterator>
 
 
+// <FnZorbaCanonicalizeIterator>
+void PrinterVisitor::beginVisit ( const FnZorbaCanonicalizeIterator& a) {
+  thePrinter.startBeginVisit("FnZorbaCanonicalizeIterator", ++theId);
+  printCommons( &a, theId );
+  thePrinter.endBeginVisit( theId );
+}
+
+void PrinterVisitor::endVisit ( const FnZorbaCanonicalizeIterator& ) {
+  thePrinter.startEndVisit();
+  thePrinter.endEndVisit();
+}
+// </FnZorbaCanonicalizeIterator>
+
+
 // <FnParseXmlFragmentIterator>
 void PrinterVisitor::beginVisit ( const FnParseXmlFragmentIterator& a) {
   thePrinter.startBeginVisit("FnParseXmlFragmentIterator", ++theId);

=== modified file 'src/runtime/visitors/pregenerated/printer_visitor.h'
--- src/runtime/visitors/pregenerated/printer_visitor.h	2012-10-15 13:39:36 +0000
+++ src/runtime/visitors/pregenerated/printer_visitor.h	2012-11-22 21:13:24 +0000
@@ -818,6 +818,9 @@
     void beginVisit( const FnZorbaParseXmlFragmentIterator& );
     void endVisit  ( const FnZorbaParseXmlFragmentIterator& );
 
+    void beginVisit( const FnZorbaCanonicalizeIterator& );
+    void endVisit  ( const FnZorbaCanonicalizeIterator& );
+
     void beginVisit( const FnParseXmlFragmentIterator& );
     void endVisit  ( const FnParseXmlFragmentIterator& );
 

=== added file 'test/rbkt/ExpQueryResults/zorba/parsing_and_serializing/canonicalize-1.xml.res'
--- test/rbkt/ExpQueryResults/zorba/parsing_and_serializing/canonicalize-1.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/parsing_and_serializing/canonicalize-1.xml.res	2012-11-22 21:13:24 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+&lt;x a="2" b="1"&gt;&lt;/x&gt;
\ No newline at end of file

=== added file 'test/rbkt/Queries/zorba/parsing_and_serializing/canonicalize-1.xq'
--- test/rbkt/Queries/zorba/parsing_and_serializing/canonicalize-1.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/parsing_and_serializing/canonicalize-1.xq	2012-11-22 21:13:24 +0000
@@ -0,0 +1,4 @@
+import module namespace parse-xml = "http://www.zorba-xquery.com/modules/xml";;
+
+let $xml := "<x b='1' a='2'/>"
+return parse-xml:canonicalize($xml)


Follow ups