zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #24974
[Merge] lp:~zorba-coders/zorba/bug-1210410-csv into lp:zorba/data-converters-module
Chris Hillery has proposed merging lp:~zorba-coders/zorba/bug-1210410-csv into lp:zorba/data-converters-module.
Commit message:
Updated options and annotations URIs to zorba.io.
Requested reviews:
Zorba Coders (zorba-coders)
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/bug-1210410-csv/+merge/179366
--
https://code.launchpad.net/~zorba-coders/zorba/bug-1210410-csv/+merge/179366
Your team Zorba Coders is requested to review the proposed merge of lp:~zorba-coders/zorba/bug-1210410-csv into lp:zorba/data-converters-module.
=== modified file 'CMakeLists.txt'
--- CMakeLists.txt 2011-12-05 22:42:10 +0000
+++ CMakeLists.txt 2013-08-09 09:45:48 +0000
@@ -14,7 +14,7 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
-PROJECT (zorba_data-converters_module)
+PROJECT (zorba_csv_module)
ENABLE_TESTING ()
INCLUDE (CTest)
=== removed directory 'cmake_modules'
=== removed file 'cmake_modules/FindLibTidy.cmake'
--- cmake_modules/FindLibTidy.cmake 2011-10-06 07:40:17 +0000
+++ cmake_modules/FindLibTidy.cmake 1970-01-01 00:00:00 +0000
@@ -1,60 +0,0 @@
-# Copyright 2006-2008 The FLWOR Foundation.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# - Try to find the HTML Tidy lib
-#
-# LIBTIDY_FOUND - true if LIBTIDY was found
-# LIBTIDY_INCLUDE_DIRS - Directory to include to get LIBTIDY headers
-# Note: always include LIBTIDY headers as e.g.,
-# tidy/tidy.h
-# LIBTIDY_LIBRARIES - Libraries to link against for the LIBTIDY
-#
-
-
-IF (LIBTIDY_INCLUDE_DIR)
- SET(LibTidy_FIND_QUIETLY TRUE)
-ENDIF (LIBTIDY_INCLUDE_DIR)
-
-# Look for the header file.
-FIND_PATH(LIBTIDY_INCLUDE_DIR tidy.h PATH_SUFFIXES tidy)
-MARK_AS_ADVANCED(LIBTIDY_INCLUDE_DIR)
-
-# Look for the library.
-# FIND_LIBRARY(LIBTIDY_LIBRARY NAMES tidy PATHS ${LIBTIDY_LIBRARIES})
-FIND_LIBRARY(LIBTIDY_LIBRARY NAMES tidy)
-MARK_AS_ADVANCED(LIBTIDY_LIBRARY)
-
-# INCLUDE(FindPackageHandleStandardArgs)
-# only available in cmake > 2.6
-# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LIBTIDY DEFAULT_MSG LIBTIDY_INCLUDE_DIR LIBTIDY_LIBRARY)
-#IF (LIBTIDY_FOUND)
-# SET(LIBTIDY_LIBRARIES ${LIBTIDY_LIBRARY})
-# SET(LIBTIDY_INCLUDE_DIRS ${LIBTIDY_INCLUDE_DIR})
-#ELSE (LIBTIDY_FOUND)
-# SET(LIBTIDY_LIBRARIES)
-# SET(LIBTIDY_INCLUDE_DIRS)
-#ENDIF (LIBTIDY_FOUND)
-IF (LIBTIDY_INCLUDE_DIR AND LIBTIDY_LIBRARY)
- SET(LIBTIDY_FOUND 1)
- SET(LIBTIDY_LIBRARIES ${LIBTIDY_LIBRARY})
- SET(LIBTIDY_INCLUDE_DIRS ${LIBTIDY_INCLUDE_DIR})
- IF(NOT LibTidy_FIND_QUIETLY)
- MESSAGE(STATUS "Found libtidy library : " ${LIBTIDY_LIBRARY})
- MESSAGE(STATUS "Found libtidy include path : " ${LIBTIDY_INCLUDE_DIR})
- ENDIF(NOT LibTidy_FIND_QUIETLY)
-ELSE (LIBTIDY_INCLUDE_DIR AND LIBTIDY_LIBRARY)
- SET(LIBTIDY_FOUND 0)
- SET(LIBTIDY_LIBRARIES)
- SET(LIBTIDY_INCLUDE_DIRS)
-ENDIF (LIBTIDY_INCLUDE_DIR AND LIBTIDY_LIBRARY)
=== removed directory 'cmake_modules/Windows'
=== removed file 'cmake_modules/Windows/FindJansson.cmake'
--- cmake_modules/Windows/FindJansson.cmake 2011-08-19 00:03:36 +0000
+++ cmake_modules/Windows/FindJansson.cmake 1970-01-01 00:00:00 +0000
@@ -1,30 +0,0 @@
-# Copyright 2010 The FLWOR Foundation.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# - Try to find the Jansson lib on Windows
-#
-# This is a proxy module that calls the FindJansson.cmake module. Before
-# doing that, we try to guess where Jansson might be on the user's machine.
-# The user should provide ZORBA_THIRD_PARTY_REQUIREMENTS which is a path where
-# the Jansson directory can be found. The Jansson directory must have "jansson"
-# (case insensitive) in its name.
-#
-# This module helps the Windows user to avoid providing the following two
-# variables when building Zorba:
-# -D Jansson_INCLUDE="path_to_3rd_party_dir\*jansson*\src"
-# -D Jansson_LIBRARY="path_to_3rd_party_dir\*jansson*\bin\[Release\]jansson.lib"
-#
-# See the FindLibTidy.cmake module shipped with Zorba for more information.
-
-FIND_PACKAGE_WIN32(NAME Jansson FOUND_VAR Jansson_FOUND SEARCH_NAMES jansson)
=== removed file 'cmake_modules/Windows/FindLibTidy.cmake'
--- cmake_modules/Windows/FindLibTidy.cmake 2011-08-19 00:03:36 +0000
+++ cmake_modules/Windows/FindLibTidy.cmake 1970-01-01 00:00:00 +0000
@@ -1,37 +0,0 @@
-# Copyright 2010 The FLWOR Foundation.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# - Try to find the HTML Tidy lib on Windows
-#
-# This is a proxy module that calls the FindLibTidy.cmake module. Before
-# doing that, we try to guess where LibTidy might be on the user's machine.
-# The user should provide ZORBA_THIRD_PARTY_REQUIREMENTS which is a path where
-# the LibTidy directory can be found. The LibTidy directory must have "tidy"
-# (case insensitive) in its name.
-#
-# This module helps the Windows user to avoid providing the following two
-# variables when building Zorba:
-# -D LIBTIDY_INCLUDE_DIR="path_to_3rd_party_dir\*tidy*\include"
-# -D LIBTIDY_LIBRARY="path_to_3rd_party_dir\*tidy*\lib\tidy.lib"
-#
-# See the FindLibTidy.cmake module shipped with Zorba for more information.
-
-FIND_PACKAGE_WIN32(NAME LibTidy FOUND_VAR LIBTIDY_FOUND SEARCH_NAMES tidy)
-
-IF (LIBTIDY_FOUND)
-
- # find the needed DLL's
- FIND_PACKAGE_DLLS_WIN32 (${FOUND_LOCATION} tidy.dll)
-
-ENDIF (LIBTIDY_FOUND)
=== modified file 'src/CMakeLists.txt'
--- src/CMakeLists.txt 2011-07-26 10:43:06 +0000
+++ src/CMakeLists.txt 2013-08-09 09:45:48 +0000
@@ -1,19 +1,33 @@
# Copyright 2006-2008 The FLWOR Foundation.
-#
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-# all external module libraries are generated in the directory
-# of the corresponding .xq file
-MESSAGE(STATUS "Add com")
-ADD_SUBDIRECTORY(com)
-MESSAGE(STATUS "End modules")
+# csv
+DECLARE_ZORBA_SCHEMA (FILE csv-options.xsd
+ URI "http://www.zorba-xquery.com/modules/converters/csv-options")
+DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/converters/csv" VERSION 1.0 FILE "csv.xq")
+
+# add unit test for streambuf implementation in csv, related to streamable strings
+CREATE_TEST_SOURCELIST (
+ StreambufUnitTests
+ StreambufUnitTests.cpp
+ streambuftest.cpp
+)
+ADD_EXECUTABLE (StreambufUnitTests ${StreambufUnitTests} csv.xq.src/stringiterator_streambuf.cpp)
+SET_TARGET_PROPERTIES (StreambufUnitTests PROPERTIES
+ FOLDER "Tests"
+)
+
+ADD_TEST ("streamable_string_streambuf" StreambufUnitTests "streambuftest")
+ADD_TEST_DIRECTORY("${PROJECT_SOURCE_DIR}/test")
+
=== removed file 'src/com/CMakeLists.txt'
--- src/com/CMakeLists.txt 2011-10-06 07:40:17 +0000
+++ src/com/CMakeLists.txt 1970-01-01 00:00:00 +0000
@@ -1,14 +0,0 @@
-# Copyright 2006-2008 The FLWOR Foundation.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-ADD_SUBDIRECTORY(zorba-xquery)
=== removed file 'src/com/zorba-xquery/CMakeLists.txt'
--- src/com/zorba-xquery/CMakeLists.txt 2011-10-06 07:40:17 +0000
+++ src/com/zorba-xquery/CMakeLists.txt 1970-01-01 00:00:00 +0000
@@ -1,14 +0,0 @@
-# Copyright 2006-2008 The FLWOR Foundation.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-ADD_SUBDIRECTORY(www)
=== removed file 'src/com/zorba-xquery/www/CMakeLists.txt'
--- src/com/zorba-xquery/www/CMakeLists.txt 2011-10-06 07:40:17 +0000
+++ src/com/zorba-xquery/www/CMakeLists.txt 1970-01-01 00:00:00 +0000
@@ -1,14 +0,0 @@
-# Copyright 2006-2008 The FLWOR Foundation.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-ADD_SUBDIRECTORY(modules)
=== removed file 'src/com/zorba-xquery/www/modules/CMakeLists.txt'
--- src/com/zorba-xquery/www/modules/CMakeLists.txt 2011-10-06 07:40:17 +0000
+++ src/com/zorba-xquery/www/modules/CMakeLists.txt 1970-01-01 00:00:00 +0000
@@ -1,14 +0,0 @@
-# Copyright 2006-2008 The FLWOR Foundation.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-ADD_SUBDIRECTORY(converters)
=== removed file 'src/com/zorba-xquery/www/modules/converters/CMakeLists.txt'
--- src/com/zorba-xquery/www/modules/converters/CMakeLists.txt 2012-02-28 20:41:53 +0000
+++ src/com/zorba-xquery/www/modules/converters/CMakeLists.txt 1970-01-01 00:00:00 +0000
@@ -1,61 +0,0 @@
-# Copyright 2006-2008 The FLWOR Foundation.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#
-# LibTidy
-#
-IF (ZORBA_SUPPRESS_LIBTIDY)
- MESSAGE (STATUS "ZORBA_SUPPRESS_LIBTIDY is true - not searching for LibTidy.")
-ELSE (ZORBA_SUPPRESS_LIBTIDY)
-
- MESSAGE (STATUS "Looking for LibTidy")
- FIND_PACKAGE (LibTidy)
-
- IF (LIBTIDY_FOUND)
- MESSAGE (STATUS "Found LibTidy library -- " ${LIBTIDY_LIBRARIES})
- SET (HTML_LINK_LIBRARIES ${LIBTIDY_LIBRARIES})
-
- INCLUDE_DIRECTORIES (${LIBTIDY_INCLUDE_DIR})
- INCLUDE_DIRECTORIES ("html.xq.src")
- DECLARE_ZORBA_SCHEMA (FILE "html-options.xsd"
- URI "http://www.zorba-xquery.com/modules/converters/html-options")
- DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/converters/html" VERSION 1.0 FILE "html.xq" LINK_LIBRARIES "${LIBTIDY_LIBRARIES}")
- ADD_TEST_DIRECTORY ("${PROJECT_SOURCE_DIR}/test_html")
-
- ADD_TEST(link_crawler_test_for_compilation "${ZORBA_EXE}" -f -q "${CMAKE_CURRENT_SOURCE_DIR}/../../../../../../test_html/Queries/converters/html/link_crawler2.xq2" --compile-only)
- ELSE (LIBTIDY_FOUND)
- MESSAGE (STATUS "LibTidy library not found -- if you want to use HTML Tidy functionality please set LIBTIDY_INCLUDE_DIR and LIBTIDY_LIBRARIES cmake parameters.")
- ENDIF (LIBTIDY_FOUND)
-ENDIF (ZORBA_SUPPRESS_LIBTIDY)
-MESSAGE (STATUS "")
-
-# csv
-DECLARE_ZORBA_SCHEMA (FILE csv-options.xsd
- URI "http://www.zorba-xquery.com/modules/converters/csv-options")
-DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/converters/csv" VERSION 1.0 FILE "csv.xq")
-
-# add unit test for streambuf implementation in csv, related to streamable strings
-CREATE_TEST_SOURCELIST (
- StreambufUnitTests
- StreambufUnitTests.cpp
- streambuftest.cpp
-)
-ADD_EXECUTABLE (StreambufUnitTests ${StreambufUnitTests} csv.xq.src/stringiterator_streambuf.cpp)
-SET_TARGET_PROPERTIES (StreambufUnitTests PROPERTIES
- FOLDER "Tests"
-)
-
-ADD_TEST ("streamable_string_streambuf" StreambufUnitTests "streambuftest")
-ADD_TEST_DIRECTORY("${PROJECT_SOURCE_DIR}/test")
-
=== removed file 'src/com/zorba-xquery/www/modules/converters/html-options.xsd'
--- src/com/zorba-xquery/www/modules/converters/html-options.xsd 2011-08-05 02:22:23 +0000
+++ src/com/zorba-xquery/www/modules/converters/html-options.xsd 1970-01-01 00:00:00 +0000
@@ -1,40 +0,0 @@
-<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
- xmlns:ho="http://www.zorba-xquery.com/modules/converters/html-options"
- targetNamespace="http://www.zorba-xquery.com/modules/converters/html-options"
- elementFormDefault="qualified" attributeFormDefault="unqualified"
- >
-<!--
-:: Copyright 2006-2008 The FLWOR Foundation.
-::
-:: Licensed under the Apache License, Version 2.0 (the "License");
-:: you may not use this file except in compliance with the License.
-:: You may obtain a copy of the License at
-::
-:: http://www.apache.org/licenses/LICENSE-2.0
-::
-:: Unless required by applicable law or agreed to in writing, software
-:: distributed under the License is distributed on an "AS IS" BASIS,
-:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-:: See the License for the specific language governing permissions and
-:: limitations under the License.
-::
--->
-
-
- <xs:element name="options">
- <xs:complexType>
- <xs:sequence>
- <xs:element name="tidyParam" minOccurs="0" maxOccurs="unbounded">
- <xs:complexType>
- <xs:simpleContent>
- <xs:extension base="xs:string">
- <xs:attribute name="name" type="xs:string" use="required"/>
- <xs:attribute name="value" type="xs:string" use="required"/>
- </xs:extension>
- </xs:simpleContent>
- </xs:complexType>
- </xs:element>
- </xs:sequence>
- </xs:complexType>
- </xs:element>
-</xs:schema>
\ No newline at end of file
=== renamed file 'src/com/zorba-xquery/www/modules/converters/html.xq' => 'src/com/zorba-xquery/www/modules/converters/html.xq.THIS'
=== removed directory 'src/com/zorba-xquery/www/modules/converters/html.xq.src'
=== removed file 'src/com/zorba-xquery/www/modules/converters/html.xq.src/html.cpp'
--- src/com/zorba-xquery/www/modules/converters/html.xq.src/html.cpp 2011-10-06 07:40:17 +0000
+++ src/com/zorba-xquery/www/modules/converters/html.xq.src/html.cpp 1970-01-01 00:00:00 +0000
@@ -1,145 +0,0 @@
-/*
- * Copyright 2006-2008 The FLWOR Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <sstream>
-
-#include <zorba/empty_sequence.h>
-#include <zorba/singleton_item_sequence.h>
-#include <zorba/item.h>
-
-#include "html.h"
-#include "tidy_wrapper.h"
-
-namespace zorba
-{
- namespace htmlmodule
- {
-
-//*****************************************************************************
-//*****************************************************************************
-
-ParseFunction::ParseFunction(const HtmlModule* aModule)
-: HtmlFunction(aModule)
-{
-}
-
-ItemSequence_t
-ParseFunction::evaluate(
- const ExternalFunction::Arguments_t& aArgs,
- const StaticContext* aSctxCtx,
- const DynamicContext* aDynCtx) const
- {
- std::auto_ptr<std::istringstream> iss;
- std::istream *is;
- String docString;
- Item lStringItem, lOptionsItem;
-
- if (aArgs.size() >= 1)
- {
- Iterator_t lArg0Iter = aArgs[0]->getIterator();
- lArg0Iter->open();
- lArg0Iter->next(lStringItem);
- lArg0Iter->close();
- }
-
- if ( lStringItem.isStreamable() )
- {
- //
- // The "iss" auto_ptr can NOT be used since it will delete the stream that,
- // in this case, is a data member inside another object and not dynamically
- // allocated.
- //
- // We can't replace "iss" with "is" since we still need the auto_ptr for
- // the case when the result is not streamable.
- //
- is = &lStringItem.getStream();
- }
- else
- {
- docString = lStringItem.getStringValue();
- iss.reset (new std::istringstream(docString.c_str()));
- is = iss.get();
- }
-
- if (aArgs.size() == 2)
- {
- Iterator_t lArg1Iter = aArgs[1]->getIterator();
- lArg1Iter->open();
- lArg1Iter->next(lOptionsItem);
- lArg1Iter->close();
- }
-
- return ItemSequence_t(new SingletonItemSequence(
- createHtmlItem( *is , lOptionsItem )));
- }
-
-//*****************************************************************************
-//*****************************************************************************
-
-ItemFactory* HtmlModule::theFactory = 0;
-
-HtmlModule::~HtmlModule()
-{
- for ( FuncMap_t::const_iterator lIter = theFunctions.begin();
- lIter != theFunctions.end();
- ++lIter)
- {
- delete lIter->second;
- }
- theFunctions.clear();
-}
-
-ExternalFunction*
-HtmlModule::getExternalFunction(const String& aLocalname)
-{
- ExternalFunction*& lFunc = theFunctions[aLocalname];
- if (!lFunc)
- {
- if (1 == 0)
- { }
- else if (aLocalname == "parse-internal")
- {
- lFunc = new ParseFunction(this);
- }
- }
- return lFunc;
-}
-
-void
-HtmlModule::destroy()
-{
- if (!dynamic_cast<HtmlModule*>(this))
- {
- return;
- }
- delete this;
-}
-//*****************************************************************************
-//*****************************************************************************
-
- } /* namespace htmlmodule */
-} /* namespace zorba */
-
-#ifdef WIN32
-# define DLL_EXPORT __declspec(dllexport)
-#else
-# define DLL_EXPORT __attribute__ ((visibility("default")))
-#endif
-
-extern "C" DLL_EXPORT zorba::ExternalModule* createModule()
-{
- return new zorba::htmlmodule::HtmlModule();
-}
=== removed file 'src/com/zorba-xquery/www/modules/converters/html.xq.src/html.h'
--- src/com/zorba-xquery/www/modules/converters/html.xq.src/html.h 2011-10-06 07:40:17 +0000
+++ src/com/zorba-xquery/www/modules/converters/html.xq.src/html.h 1970-01-01 00:00:00 +0000
@@ -1,112 +0,0 @@
-/*
- * Copyright 2006-2008 The FLWOR Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ZORBA_HTMLMODULE_HTML_H
-#define ZORBA_HTMLMODULE_HTML_H
-
-#include <map>
-
-#include <zorba/zorba.h>
-#include <zorba/function.h>
-#include <zorba/external_module.h>
-
-namespace zorba
-{
- namespace htmlmodule
- {
-//*****************************************************************************
-//*****************************************************************************
- class HtmlModule : public ExternalModule
- {
- private:
- static ItemFactory* theFactory;
-
- protected:
- class ltstr
- {
- public:
- bool operator()(const String& s1, const String& s2) const
- {
- return s1.compare(s2) < 0;
- }
- };
-
- typedef std::map<String, ExternalFunction*, ltstr> FuncMap_t;
-
- FuncMap_t theFunctions;
-
- public:
- virtual ~HtmlModule();
-
- virtual String
- getURI() const { return "http://www.zorba-xquery.com/modules/converters/html"; }
-
- virtual ExternalFunction*
- getExternalFunction(const String& aLocalname);
-
- virtual void
- destroy();
-
- static ItemFactory*
- getItemFactory()
- {
- if(!theFactory)
- {
- theFactory = Zorba::getInstance(0)->getItemFactory();
- }
- return theFactory;
- }
- };
-
-//*****************************************************************************
-//*****************************************************************************
- class HtmlFunction : public ContextualExternalFunction
- {
- protected:
- const HtmlModule* theModule;
- public:
- HtmlFunction(const HtmlModule* aModule)
- : theModule(aModule) {};
-
- ~HtmlFunction() {};
-
- virtual String
- getURI() const { return theModule->getURI(); }
-
- };
-
-//*****************************************************************************
-//*****************************************************************************
- class ParseFunction : public HtmlFunction
- {
- public:
- ParseFunction(const HtmlModule* aModule);
-
- virtual String
- getLocalName() const { return "parse-internal"; }
-
- virtual ItemSequence_t
- evaluate(const ExternalFunction::Arguments_t& args,
- const StaticContext* aSctxCtx,
- const DynamicContext* aDynCtx) const;
- };
-
-
-
-
- } /* namespace htmlmodule */
-} /* namespace zorba */
-
-#endif /* ZORBA_HTMLMODULE_HTML_H */
=== removed file 'src/com/zorba-xquery/www/modules/converters/html.xq.src/tidy_wrapper.cpp'
--- src/com/zorba-xquery/www/modules/converters/html.xq.src/tidy_wrapper.cpp 2011-10-06 07:40:17 +0000
+++ src/com/zorba-xquery/www/modules/converters/html.xq.src/tidy_wrapper.cpp 1970-01-01 00:00:00 +0000
@@ -1,34 +0,0 @@
-/*
- * Copyright 2006-2008 The FLWOR Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <iostream>
-
-#include <tidy.h>
-#include <buffio.h>
-
-#include "tidy_wrapper.h"
-
-#include <zorba/item_factory.h>
-#include <zorba/item.h>
-#include <zorba/xmldatamanager.h>
-
-namespace zorba
-{
- namespace htmlmodule
- {
-
- } /* namespace htmlmodule */
-} /* namespace zorba */
\ No newline at end of file
=== removed file 'src/com/zorba-xquery/www/modules/converters/html.xq.src/tidy_wrapper.h'
--- src/com/zorba-xquery/www/modules/converters/html.xq.src/tidy_wrapper.h 2011-08-18 23:42:49 +0000
+++ src/com/zorba-xquery/www/modules/converters/html.xq.src/tidy_wrapper.h 1970-01-01 00:00:00 +0000
@@ -1,200 +0,0 @@
-/*
- * Copyright 2006-2008 The FLWOR Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ZORBA_HTMLMODULE_TIDY_WRAPPER_H
-#define ZORBA_HTMLMODULE_TIDY_WRAPPER_H
-
-#include <iostream>
-#include <sstream>
-
-#include <tidy.h>
-#include <buffio.h>
-
-#include <zorba/item_factory.h>
-#include <zorba/item.h>
-#include <zorba/iterator.h>
-#include <zorba/store_consts.h>
-#include <zorba/xmldatamanager.h>
-#include <zorba/error.h>
-#include <zorba/diagnostic_list.h>
-#include <zorba/user_exception.h>
-#include <zorba/zorba.h>
-
-namespace zorba
-{
- namespace htmlmodule
- {
- class TidyReader {
- private:
- std::istream* theStream;
- // We need a buffer to support the unget function
- std::vector<unsigned int> theBuffer;
- public:
- TidyReader(std::istream* aStream) : theStream(aStream) {}
- TidyInputSource getInputSource()
- {
- TidyInputSource lResult;
- lResult.sourceData = this;
- lResult.getByte = &getByte;
- lResult.ungetByte = &ungetByte;
- lResult.eof = &isEof;
- return lResult;
- }
-
- public: // callback functions
- static int TIDY_CALL getByte(void* aData)
- {
- TidyReader* lReader = static_cast<TidyReader*>(aData);
- if (lReader->theBuffer.empty())
- return lReader->theStream->get();
- else
- {
- int lResult = lReader->theBuffer.back();
- lReader->theBuffer.pop_back();
- return lResult;
- }
- }
-
- static void TIDY_CALL ungetByte(void* aData, byte aByte)
- {
- TidyReader* lReader = static_cast<TidyReader*>(aData);
- lReader->theBuffer.push_back(aByte);
- }
-
- static Bool TIDY_CALL isEof(void* aData)
- {
- TidyReader* lReader = static_cast<TidyReader*>(aData);
- return lReader->theStream->eof() ? yes : no;
- }
- };
-
- static void checkRC(int rc, const char* errMsg)
- {
- if (rc > 1)
- {
- zorba::Item lError = Zorba::getInstance(0)->getItemFactory()
- ->createQName(
- "http://www.zorba-xquery.com/modules/converters/html",
- "InternalError");
- throw USER_EXCEPTION(lError, errMsg );
- }
- }
-
- static Bool setTidyOption(TidyDoc doc, const char* option, const char* value)
- {
- Bool ok = yes;
- TidyOptionId toID = tidyOptGetIdForName(option);
- if(toID < N_TIDY_OPTIONS)
- {
- ok = tidyOptSetValue(doc, toID, value);
- if (ok != yes)
- {
- zorba::Item lError = Zorba::getInstance(0)->getItemFactory()
- ->createQName(
- "http://www.zorba-xquery.com/modules/converters/html",
- "TidyOption");
- std::ostringstream lErrorMsg;
- lErrorMsg << "Error setting tidy option '" << option
- << "' with value '" << value << "'";
- throw USER_EXCEPTION(lError, lErrorMsg.str());
- }
- }
- else
- {
- return no;
- }
- return ok;
- }
-
- static Bool applyOptions(TidyDoc aDoc, zorba::Item &aOptions)
- {
- zorba::Iterator_t lAttributes, lElements;
- zorba::Item lAttr, lElementItem, lAttrName;
- zorba::String lStrName, lStrValue;
- Bool lRet = yes;
-
- if(!aOptions.isNull())
- {
- lElements = aOptions.getChildren();
- lElements->open();
- while (lElements->next(lElementItem)
- && lElementItem.getNodeKind () == store::StoreConsts::elementNode)
- {
- lAttributes = lElementItem.getAttributes();
- lAttributes->open();
- while (lAttributes->next(lAttr))
- {
- lAttr.getNodeName(lAttrName);
- if(lAttrName.getLocalName() == "name")
- lStrName = lAttr.getStringValue();
- else if(lAttrName.getLocalName() == "value")
- lStrValue = lAttr.getStringValue();
- }
- setTidyOption(aDoc, lStrName.c_str(), lStrValue.c_str());
- lAttributes->close();
- }
- lElements->close();
- }
- return lRet;
- }
-
- static zorba::Item createHtmlItem( std::istream& aStream , zorba::Item &aOptions)
- {
- TidyReader lReader(&aStream);
- TidyInputSource lInputSource = lReader.getInputSource();
-
- TidyBuffer output;
- tidyBufInit(&output);
- TidyBuffer errbuf;
- tidyBufInit(&errbuf);
- TidyDoc tDoc = tidyCreate();
-
- applyOptions(tDoc, aOptions);
-
- int rc = -1;
- rc = tidySetErrorBuffer(tDoc, &errbuf);
- checkRC(rc, "Could not set error buffer");
- rc = tidyParseSource(tDoc, &lInputSource);
- checkRC(rc, "Could not parse the source");
- rc = tidyCleanAndRepair(tDoc);
- checkRC(rc, "Could not clean and repair");
- rc = tidyRunDiagnostics(tDoc);
- if ( rc > 1 )
- rc = ( tidyOptSetBool(tDoc, TidyForceOutput, yes) ? rc : -1 );
-
- // Tidy does not support streaming for output, it only supports
- // something they call a "sink". Therefore we buffer it in a string.
- rc = tidySaveBuffer(tDoc, &output);
- checkRC(rc, "Could not save the buffer");
- std::string lResult((char*) output.bp, output.size);
- std::istringstream lStream(lResult);
-
- tidyBufFree(&output);
- tidyBufFree(&errbuf);
- tidyRelease(tDoc);
- XmlDataManager* lDM = Zorba::getInstance(0)->getXmlDataManager();
- try
- {
- return lDM->parseXML(lStream);
- } catch (ZorbaException&)
- {
- return NULL;//Zorba::getInstance(0)->getItemFactory()->createString(lResult);
- }
- }
- } /* namespace htmlmodule */
-} /* namespace zorba */
-
-#endif //ZORBA_HTMLMODULE_TIDY_WRAPPER_H
=== renamed file 'src/com/zorba-xquery/www/modules/converters/csv-options.xsd' => 'src/csv-options.xsd'
=== renamed file 'src/com/zorba-xquery/www/modules/converters/csv.xq' => 'src/csv.xq'
--- src/com/zorba-xquery/www/modules/converters/csv.xq 2012-09-28 12:49:51 +0000
+++ src/csv.xq 2013-08-09 09:45:48 +0000
@@ -35,7 +35,7 @@
:)
import schema namespace csv-options = "http://www.zorba-xquery.com/modules/converters/csv-options";
-declare namespace ver = "http://www.zorba-xquery.com/options/versioning";
+declare namespace ver = "http://zorba.io/options/versioning";
declare option ver:module-version "1.0";
(:~
=== renamed directory 'src/com/zorba-xquery/www/modules/converters/csv.xq.src' => 'src/csv.xq.src'
=== modified file 'src/csv.xq.src/csv.cpp'
--- src/com/zorba-xquery/www/modules/converters/csv.xq.src/csv.cpp 2011-08-18 20:54:58 +0000
+++ src/csv.xq.src/csv.cpp 2013-08-09 09:45:48 +0000
@@ -32,7 +32,6 @@
#include <zorba/singleton_item_sequence.h>
#include <zorba/empty_sequence.h>
#include <zorba/zorba_functions.h>
-#include <zorba/file.h>
#include "csv.h"
=== modified file 'src/csv.xq.src/csv.h'
--- src/com/zorba-xquery/www/modules/converters/csv.xq.src/csv.h 2012-01-11 18:17:54 +0000
+++ src/csv.xq.src/csv.h 2013-08-09 09:45:48 +0000
@@ -20,7 +20,7 @@
#include <zorba/iterator.h>
#include <zorba/external_module.h>
#include <zorba/function.h>
-#include <zorba/smart_ptr.h>
+#include <zorba/util/smart_ptr.h>
#include "stream_wrapper.h"
#include "stringiterator_streambuf.h"
=== modified file 'src/csv.xq.src/stream_wrapper.h'
--- src/com/zorba-xquery/www/modules/converters/csv.xq.src/stream_wrapper.h 2011-08-17 23:28:43 +0000
+++ src/csv.xq.src/stream_wrapper.h 2013-08-09 09:45:48 +0000
@@ -17,7 +17,7 @@
#define ZORBA_PROCESSMODULE_PROCESS_H
#include <zorba/zorba.h>
-#include <zorba/smart_ptr.h>
+#include <zorba/util/smart_ptr.h>
namespace zorba {
namespace csv {
@@ -85,4 +85,4 @@
}}
-#endif
\ No newline at end of file
+#endif
=== renamed file 'src/com/zorba-xquery/www/modules/converters/streambuftest.cpp' => 'src/streambuftest.cpp'
=== modified file 'test/Queries/converters/base64/base64.xq'
--- test/Queries/converters/base64/base64.xq 2011-05-19 16:57:42 +0000
+++ test/Queries/converters/base64/base64.xq 2013-08-09 09:45:48 +0000
@@ -1,3 +1,3 @@
-import module namespace base64 = "http://www.zorba-xquery.com/modules/converters/base64";
+import module namespace base64 = "http://zorba.io/modules/base64";
base64:decode(base64:encode("This is a test"))
=== modified file 'test/Queries/converters/base64/binaryReadWrite.xq'
--- test/Queries/converters/base64/binaryReadWrite.xq 2012-04-11 09:49:40 +0000
+++ test/Queries/converters/base64/binaryReadWrite.xq 2013-08-09 09:45:48 +0000
@@ -1,11 +1,11 @@
-import module namespace base64 = "http://www.zorba-xquery.com/modules/converters/base64";
+import module namespace base64 = "http://zorba.io/modules/base64";
import module namespace commons = "http://expath.org/ns/file/tests/commons" at "common.xqlib";
import module namespace file = "http://expath.org/ns/file";
import schema namespace output = "http://www.w3.org/2010/xslt-xquery-serialization";
-declare namespace an = "http://www.zorba-xquery.com/annotations";
+declare namespace an = "http://zorba.io/annotations";
declare variable $inFile as xs:string := fn:concat(file:dir-name(fn:static-base-uri()), "/../image/images/bird.gif");
declare variable $testDir as xs:string := fn:concat(file:dir-name(fn:static-base-uri()),"/tmpBinaryReadWrite");
=== modified file 'test/Queries/converters/base64/common.xqlib'
--- test/Queries/converters/base64/common.xqlib 2012-04-11 09:49:40 +0000
+++ test/Queries/converters/base64/common.xqlib 2013-08-09 09:45:48 +0000
@@ -4,7 +4,7 @@
import schema namespace output = "http://www.w3.org/2010/xslt-xquery-serialization";
-declare namespace an = "http://www.zorba-xquery.com/annotations";
+declare namespace an = "http://zorba.io/annotations";
declare function commons:error($messages as xs:string*) as xs:string* {
"
@@ -37,7 +37,7 @@
};
declare %an:sequential function commons:testWriteXml($path as xs:string, $xml as item()) as xs:string* {
- file:write($path, $xml, ());
+ file:write-text($path, serialize($xml));
"SUCCESS"
};
@@ -65,12 +65,15 @@
};
declare %an:sequential function commons:testWriteSerializeXml($path as xs:string, $xml as item()) as xs:string* {
- file:write(
+ file:write-text(
$path,
- $xml,
- <output:serialization-parameters>
- <output:method value="xml"/>
- </output:serialization-parameters>);
+ serialize(
+ $xml,
+ <output:serialization-parameters>
+ <output:method value="xml"/>
+ </output:serialization-parameters>
+ )
+ );
"SUCCESS";
};
=== removed directory 'test_html'
=== removed directory 'test_html/ExpQueryResults'
=== removed directory 'test_html/ExpQueryResults/converters'
=== removed directory 'test_html/ExpQueryResults/converters/html'
=== removed file 'test_html/ExpQueryResults/converters/html/tidy_1.xml.res'
--- test_html/ExpQueryResults/converters/html/tidy_1.xml.res 2011-10-06 07:40:17 +0000
+++ test_html/ExpQueryResults/converters/html/tidy_1.xml.res 1970-01-01 00:00:00 +0000
@@ -1,8 +0,0 @@
-<html>
-<head>
-<title>Foo</title>
-</head>
-<body>
-<p>Foo!</p>
-</body>
-</html>
\ No newline at end of file
=== removed file 'test_html/ExpQueryResults/converters/html/tidy_2.xml.res'
--- test_html/ExpQueryResults/converters/html/tidy_2.xml.res 2011-10-06 07:40:17 +0000
+++ test_html/ExpQueryResults/converters/html/tidy_2.xml.res 1970-01-01 00:00:00 +0000
@@ -1,8 +0,0 @@
-<html>
-<head>
-<title>Foo</title>
-</head>
-<body>
-<p>Foo!</p>
-</body>
-</html>
\ No newline at end of file
=== removed file 'test_html/ExpQueryResults/converters/html/tidy_3.xml.res'
--- test_html/ExpQueryResults/converters/html/tidy_3.xml.res 2011-10-06 07:40:17 +0000
+++ test_html/ExpQueryResults/converters/html/tidy_3.xml.res 1970-01-01 00:00:00 +0000
@@ -1,41 +0,0 @@
-<html>
-<head>
-<title>[ #426885 ] Definition list w/Center crashes</title>
-</head>
-<body>
-<center>
-<h1>Heading 1</h1>
-</center>
-<dl>
-<dt>
-<img src="redball.gif"/>
-<b>Term 1</b>
-</dt>
-<dt>
-<img src="redball.gif"/>
-<b>Term 2</b>
-</dt>
-<dd>
-<hr/>
-</dd>
-</dl>
-<center>
-<h1>Heading 2</h1>
-</center>
-<div style="margin-left: 2em">
-<dl>
-<dt>
-<img src="redball.gif"/>
-<b>Term 3</b>
-</dt>
-<dt>
-<img src="redball.gif"/>
-<b>Term 4</b>
-</dt>
-<dd>
-<hr/>
-</dd>
-</dl>
-</div>
-</body>
-</html>
\ No newline at end of file
=== removed file 'test_html/ExpQueryResults/converters/html/tidy_4.xml.res'
--- test_html/ExpQueryResults/converters/html/tidy_4.xml.res 2011-10-06 07:40:17 +0000
+++ test_html/ExpQueryResults/converters/html/tidy_4.xml.res 1970-01-01 00:00:00 +0000
@@ -1,9 +0,0 @@
-<html>
-<head>
-<title>[#427663] Line endings not supported correctly</title>
-</head>
-<body>
-<p>This is a carriage return^MThis is a Unix line-ending This is a
-DOS line ending^M</p>
-</body>
-</html>
\ No newline at end of file
=== removed directory 'test_html/Queries'
=== removed directory 'test_html/Queries/converters'
=== removed directory 'test_html/Queries/converters/html'
=== removed file 'test_html/Queries/converters/html/link_crawler2.xq2'
--- test_html/Queries/converters/html/link_crawler2.xq2 2012-04-11 09:49:40 +0000
+++ test_html/Queries/converters/html/link_crawler2.xq2 1970-01-01 00:00:00 +0000
@@ -1,263 +0,0 @@
-(:
- : Copyright 2006-2011 The FLWOR Foundation.
- :
- : Licensed under the Apache License, Version 2.0 (the "License");
- : you may not use this file except in compliance with the License.
- : You may obtain a copy of the License at
- :
- : http://www.apache.org/licenses/LICENSE-2.0
- :
- : Unless required by applicable law or agreed to in writing, software
- : distributed under the License is distributed on an "AS IS" BASIS,
- : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- : See the License for the specific language governing permissions and
- : limitations under the License.
-:)
-
-import module namespace http = "http://www.zorba-xquery.com/modules/http-client";
-import module namespace map = "http://www.zorba-xquery.com/modules/store/data-structures/unordered-map";
-import module namespace html = "http://www.zorba-xquery.com/modules/converters/html";
-import module namespace parse-xml = "http://www.zorba-xquery.com/modules/xml";
-import module namespace file = "http://expath.org/ns/file";
-
-declare namespace an = "http://www.zorba-xquery.com/annotations";
-declare namespace xhtml="http://www.w3.org/1999/xhtml";
-declare namespace output="http://www.w3.org/2010/xslt-xquery-serialization";
-declare namespace err="http://www.w3.org/2005/xqt-errors";
-declare namespace httpsch = "http://expath.org/ns/http-client";
-
-declare variable $top-uri as xs:string := "http://www.zorba-xquery.com/html/index/";
-declare variable $uri-host as xs:string := "http://www.zorba-xquery.com";
-
-
-
-declare variable $local:processed-internal-links := xs:QName("processed-internal-links");
-declare variable $local:processed-external-links := xs:QName("processed-external-links");
-declare variable $local:tidy-options := <options xmlns="http://www.zorba-xquery.com/modules/converters/html-options" >
- <tidyParam name="output-xml" value="yes" />
- <tidyParam name="doctype" value="omit" />
- <tidyParam name="quote-nbsp" value="no" />
- <tidyParam name="char-encoding" value="utf8" />
- <tidyParam name="newline" value="LF" />
- <tidyParam name="tidy-mark" value="no" />
- <tidyParam name="new-inline-tags" value="nav header section article footer xqdoc:custom d c options json-param" />
- </options>;
-
-
-
-declare %an:sequential function local:create-containers()
-{
- map:create($local:processed-internal-links, xs:QName("xs:string"));
- map:create($local:processed-external-links, xs:QName("xs:string"));
-};
-
-declare %an:sequential function local:delete-containers(){
- for $x in map:available-maps()
- return map:delete($x);
-};
-
-declare function local:is-internal($x as xs:string) as xs:boolean
-{
- starts-with($x, $uri-host)
-};
-
-declare function local:my-substring-before($s1 as xs:string, $s2 as xs:string) as xs:string
-{
-let $sb := fn:substring-before($s1, $s2)
-return if($sb = "") then $s1 else $sb
-};
-
-declare %an:sequential function local:get-real-link($href as xs:string, $start-uri as xs:string) as xs:string?
-{
- variable $absuri;
- try{
- $absuri := local:my-substring-before(resolve-uri(fn:normalize-space($href), $start-uri), "#");
- }
- catch *
- {
- map:insert($local:processed-external-links, (<FROM>{$start-uri}</FROM>,
- <MESSAGE>malformed</MESSAGE>,
- <RESULT>broken</RESULT>), $href);
- }
- $absuri
-};
-
-
-declare function local:get-media-type ($http-call as node()) as xs:string
-{
- local:my-substring-before($http-call/httpsch:header[@name = 'Content-Type'][1]/string(@value), ";")
-};
-
-declare function local:alive($http-call as item()*) as xs:boolean
-{
- if((count($http-call) ge 1) and
- ($http-call[1]/@status eq 200))
- then true() else fn:trace(false(), "alive")
-};
-
-declare function local:is-redirect($http-call as item()*) as xs:boolean
-{
- if((count($http-call) ge 1) and
- (($http-call[1]/@status idiv 100) eq 3))
- then fn:trace(true(), "redirect") else false()
-};
-
-
-declare %an:sequential function local:get-out-links-parsed($content as node()*, $uri as xs:string) as xs:string*
-{ distinct-values( for $y in ($content//*:a/string(@href),
- $content//*:link/string(@href),
- $content//*:script/string(@src),
- $content//*:img/string(@src),
- $content//*:area/string(@href)
- )
-return local:get-real-link($y, $uri))
-};
-
-
-declare %an:sequential function local:get-out-links-unparsed($content as xs:string, $uri as xs:string) as xs:string*{
-
- distinct-values(
- let $search := fn:analyze-string($content, "(<|&lt;|<)(((a|link|area).+?href)|((script|img).+?src))=([""'])(.*?)\7")
- for $other-uri2 in $search//group[@nr=8]/string()
- return local:get-real-link($other-uri2, $uri)
- )
-};
-
-
-declare %an:sequential function local:map-insert-result($map-name as xs:QName, $url as xs:string, $http-result as item()*)
-{
- if(count($http-result) ge 1)
- then
- map:insert($map-name, (<STATUS>{fn:string($http-result[1]/@status)}</STATUS>,
- <MESSAGE>{fn:string($http-result[1]/@message)}</MESSAGE>,
- <RESULT>{if(local:alive($http-result))
- then "Ok"
- else if(local:is-redirect($http-result))
- then "redirect"
- else "broken"
- }</RESULT>), $url);
- else map:insert($map-name, <RESULT>broken</RESULT>, $url);
- if(local:is-redirect($http-result)) then
- map:insert($map-name, <REDIRECT>{fn:string($http-result[1]/httpsch:header[@name = "Location"]/@value)}</REDIRECT>, $url);
- else {}
-};
-
-declare %an:sequential function local:process-link($x as xs:string, $baseUri as xs:string, $n as xs:integer) as item()*{
- if(local:is-internal($x))
- then local:process-internal-link($x, $baseUri, $n);
- else local:process-external-link($x, $baseUri);
-
-};
-
-declare %an:sequential function local:process-external-link($x as xs:string, $baseUri as xs:string){
- if(not(empty(map:get($local:processed-external-links, $x))))
- then exit returning false();
- else {}
- fn:trace($x, "HEAD external link");
- map:insert($local:processed-external-links, <FROM>{$baseUri}</FROM>, $x);
- variable $http-call:=();
- try{
- $http-call:=http:send-request(<httpsch:request method="GET" href="{$x}"/>, (), ());
- if((count($http-call) ge 1) and
- fn:not($http-call[1]/@status eq 200)) then
- {
- if(local:is-redirect($http-call)) then
- {
- local:map-insert-result($local:processed-external-links, $x, $http-call);
- }
- else {}
- $http-call:=http:send-request(<httpsch:request method="GET" href="{$x}"/>, (), ());
- local:map-insert-result($local:processed-external-links, $x, $http-call);
- }
- else
- {}
- }
- catch *
- { $http-call:=();}
- local:map-insert-result($local:processed-external-links, $x, $http-call);
-};
-
-
-declare %an:sequential function local:process-internal-link($x as xs:string, $baseUri as xs:string, $n as xs:integer){
- (: if($n=3) then exit returning (); else {} :)
- if(not(empty(map:get($local:processed-internal-links, $x))))
- then exit returning false();
- else {}
- fn:trace($x, "GET internal link");
- map:insert($local:processed-internal-links, <FROM>{$baseUri}</FROM>, $x);
- variable $http-call:=();
- try{
- $http-call:=http:send-request(<httpsch:request method="GET" href="{$x}" follow-redirect="false"/>, (), ());
- }
- catch * { }
- if(local:is-redirect($http-call)) then
- {
- local:map-insert-result($local:processed-internal-links, $x, $http-call);
- try{
- $http-call:=http:send-request(<httpsch:request method="GET" href="{$x}"/>, (), ());
- }
- catch * { }
- }
- else {}
- if( not(local:alive($http-call)))
- then { local:map-insert-result($local:processed-internal-links, $x, $http-call); exit returning ();}
- else {}
-
- if(not (local:get-media-type($http-call[1]) = "text/html"))
- then { local:map-insert-result($local:processed-internal-links, $x, $http-call); exit returning ();}
- else {}
- variable $string-content := string($http-call[2]);
- variable $content:=();
-
- try{
- $content:=html:parse($string-content,$local:tidy-options );
- local:map-insert-result($local:processed-internal-links, $x, $http-call);
- }
- catch *
- {
- map:insert($local:processed-internal-links, (<MESSAGE>{concat("cannot tidy: ", $err:description)}</MESSAGE>,
- <RESULT>broken</RESULT>), $x);
- try{
- $content:=parse-xml:parse-xml-fragment ($string-content, "");
- }
- catch *
- { map:insert($local:processed-internal-links, <MESSAGE>{concat("cannot parse: ", $err:description)}</MESSAGE>, $x);}
- }
- variable $links :=();
- if(empty($content))
- then $links:=local:get-out-links-unparsed($string-content, fn:trace($x, "parse with regex, because tidy failed"));
- else $links:=local:get-out-links-parsed($content, $x);
- for $l in $links
- return local:process-link($l, $x, $n+1);
-};
-
-
-
-
-declare function local:print-results() as element()*
-{
- for $x in map:keys($local:processed-internal-links)/map:attribute/@value/string()
- return <INTERNAL><LINK>{$x}</LINK>{map:get($local:processed-internal-links,$x)}</INTERNAL>,
- for $x in map:keys($local:processed-external-links)/map:attribute/@value/string()
- return <EXTERNAL><LINK>{$x}</LINK>{map:get($local:processed-external-links,$x)}</EXTERNAL>
-};
-
-(:==========================================
-===========================================:)
-
-variable $uri:= $top-uri;
-
-variable $result;
-
-local:create-containers();
-local:process-link($uri, "", 1);
-$result:=local:print-results() ;
-
-local:delete-containers();
-
-file:write(fn:resolve-uri("link_crawler_result.xml"),
- <result>{$result}</result>,
- <output:serialization-parameters>
- <output:indent value="yes"/>
- </output:serialization-parameters>)
-
=== removed file 'test_html/Queries/converters/html/tidy_1.xq'
--- test_html/Queries/converters/html/tidy_1.xq 2011-10-06 07:40:17 +0000
+++ test_html/Queries/converters/html/tidy_1.xq 1970-01-01 00:00:00 +0000
@@ -1,14 +0,0 @@
-(: tidy a html using different tidy options :)
-
-import module namespace html="http://www.zorba-xquery.com/modules/converters/html";
-import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";
-
-html:parse('<title>Foo</title><p>Foo!',
- <options xmlns="http://www.zorba-xquery.com/modules/converters/html-options" >
- <tidyParam name="output-xml" value="yes" />
- <tidyParam name="doctype" value="omit" />
- <tidyParam name="quote-nbsp" value="no" />
- <tidyParam name="char-encoding" value="utf8" />
- <tidyParam name="newline" value="LF" />
- <tidyParam name="tidy-mark" value="no" />
- </options>)
\ No newline at end of file
=== removed file 'test_html/Queries/converters/html/tidy_2.xq'
--- test_html/Queries/converters/html/tidy_2.xq 2011-10-06 07:40:17 +0000
+++ test_html/Queries/converters/html/tidy_2.xq 1970-01-01 00:00:00 +0000
@@ -1,6 +0,0 @@
-(: tidy a html using default tidy options :)
-
-import module namespace html="http://www.zorba-xquery.com/modules/converters/html";
-import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";
-
-html:parse('<title>Foo</title><p>Foo!')
\ No newline at end of file
=== removed file 'test_html/Queries/converters/html/tidy_3.xq'
--- test_html/Queries/converters/html/tidy_3.xq 2011-10-06 07:40:17 +0000
+++ test_html/Queries/converters/html/tidy_3.xq 1970-01-01 00:00:00 +0000
@@ -1,18 +0,0 @@
-import module namespace html="http://www.zorba-xquery.com/modules/converters/html";
-import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";
-
-html:parse('<HTML>
-<HEAD>
-<TITLE>[ #426885 ] Definition list w/Center crashes</TITLE>
-</HEAD>
-<BODY>
-<CENTER><H1>Heading 1</H1></CENTER>
-<DT><IMG src="redball.gif"><B>Term 1</B></DT>
-<DT><IMG src="redball.gif"><B>Term 2</B><HR></DT>
-<CENTER><H1>Heading 2</H1></CENTER>
-<UL>
-<DT><IMG src="redball.gif"><B>Term 3</B></DT>
-<DT><IMG src="redball.gif"><B>Term 4</B><HR></DT>
-</UL>
-</BODY>
-</HTML>')
\ No newline at end of file
=== removed file 'test_html/Queries/converters/html/tidy_4.xq'
--- test_html/Queries/converters/html/tidy_4.xq 2011-10-06 07:40:17 +0000
+++ test_html/Queries/converters/html/tidy_4.xq 1970-01-01 00:00:00 +0000
@@ -1,14 +0,0 @@
-import module namespace html="http://www.zorba-xquery.com/modules/converters/html";
-import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";
-
-html:parse('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
-<html>
-<head>
- <title>[#427663] Line endings not supported correctly</title>
-</head>
-<body>
-<p>This is a carriage return^MThis is a Unix line-ending
-This is a DOS line ending^M
-
-</body>
-</html>')
\ No newline at end of file
=== removed file 'test_html/Queries/converters/html/tidy_5_wrong_options.spec'
--- test_html/Queries/converters/html/tidy_5_wrong_options.spec 2011-08-18 23:42:49 +0000
+++ test_html/Queries/converters/html/tidy_5_wrong_options.spec 1970-01-01 00:00:00 +0000
@@ -1,1 +0,0 @@
-Error: http://www.zorba-xquery.com/modules/converters/html:TidyOption
=== removed file 'test_html/Queries/converters/html/tidy_5_wrong_options.xq'
--- test_html/Queries/converters/html/tidy_5_wrong_options.xq 2011-10-06 07:40:17 +0000
+++ test_html/Queries/converters/html/tidy_5_wrong_options.xq 1970-01-01 00:00:00 +0000
@@ -1,9 +0,0 @@
-(: tidy a html using wrong tidy option for a value :)
-
-import module namespace html="http://www.zorba-xquery.com/modules/converters/html";
-import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";
-
-html:parse('<title>Foo</title><p>Foo!',
- <options xmlns="http://www.zorba-xquery.com/modules/converters/html-options" >
- <tidyParam name="output-xml" value="maybe" />
- </options>)
\ No newline at end of file
=== removed file 'test_html/Queries/converters/html/tidy_6_wrong_options.spec'
--- test_html/Queries/converters/html/tidy_6_wrong_options.spec 2011-08-18 23:42:49 +0000
+++ test_html/Queries/converters/html/tidy_6_wrong_options.spec 1970-01-01 00:00:00 +0000
@@ -1,1 +0,0 @@
-Error: http://www.w3.org/2005/xqt-errors:XQDY0027
=== removed file 'test_html/Queries/converters/html/tidy_6_wrong_options.xq'
--- test_html/Queries/converters/html/tidy_6_wrong_options.xq 2011-08-18 23:42:49 +0000
+++ test_html/Queries/converters/html/tidy_6_wrong_options.xq 1970-01-01 00:00:00 +0000
@@ -1,14 +0,0 @@
-(: tidy a html using correct tidy options/values but wrongly formated as html-options :)
-
-import module namespace html="http://www.zorba-xquery.com/modules/converters/html";
-import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";
-
-html:parse('<title>Foo</title><p>Foo!',
- <options xmlns="http://www.zorba-xquery.com/modules/converters/html-options" >
- <tidyaram name="output-xml" value="yes" />
- <tidyParam name="doctype" value="omit" />
- <tidyParam name="quote-nbsp" value="no" />
- <tidyParam name="char-encoding" value="utf8" />
- <tidyParam name="newline" value="LF" />
- <tidyParam name="tidy-mark" value="no" />
- </options>)
Follow ups