zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #14956
[Merge] lp:~zorba-coders/zorba/fread-pdf-trunk into lp:zorba
Cezar Andrei has proposed merging lp:~zorba-coders/zorba/fread-pdf-trunk into lp:zorba.
Commit message:
Add read-pdf module for getting text and rendered images from pdf documents.
Make doc comments for createBaser64Binary more explicit on what parameters they expect and what they do.
Change, return value to xs_int for getIntValue() method.
Requested reviews:
Cezar Andrei (cezar-andrei)
Matthias Brantner (matthias-brantner)
Juan Zacarias (juan457)
Related bugs:
Bug #1012417 in Zorba: "PDF to XML data convertor"
https://bugs.launchpad.net/zorba/+bug/1012417
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/fread-pdf-trunk/+merge/126822
Add read-pdf module for getting text and rendered images from pdf documents.
Make doc comments for createBaser64Binary more explicit on what parameters they expect and what they do.
Change, return value to xs_int for getIntValue() method.
--
https://code.launchpad.net/~zorba-coders/zorba/fread-pdf-trunk/+merge/126822
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'ChangeLog'
--- ChangeLog 2012-09-27 22:25:40 +0000
+++ ChangeLog 2012-09-27 23:07:25 +0000
@@ -17,10 +17,11 @@
* Recognize the {http://www.zorba-xquery.com/extensions}no-copy pragma to avoid
copying nodes before insertion into a collection.
* Recognize the no-copy pragma to avoid copying nodes before insertion into a collection.
- * Added createUntypedAtomic to API's ItemFactory.
+ * Added createUntypedAtomic to API's ItemFactory.
* new xqDoc feature; reporting collections and indexes
* new xqDoc function parameter to enable/disable xqDoc generation of
certain components
+ * Adding new external module read-pdf, it converts PDF documents to text or rendered images.
Optimizations:
* Improved memory management for compiler expressions (no more ref counting)
=== modified file 'cmake_modules/ZorbaModule.cmake'
--- cmake_modules/ZorbaModule.cmake 2012-09-17 00:36:37 +0000
+++ cmake_modules/ZorbaModule.cmake 2012-09-27 23:07:25 +0000
@@ -114,9 +114,20 @@
# relative to CMAKE_CURRENT_SOURCE_DIR)
# LINK_LIBRARIES - (optional) List of libraries to link external
# function library against
+# CONFIG_FILES - (optional) List of files to configure with package
+# information; see below
# TEST_ONLY - (optional) Module is for testcases only and should not
# be installed
#
+# CONFIG_FILES - any files specific here will be copied to
+# CMAKE_CURRENT_BINARY_DIR using CONFIGURE_FILE(). They may contain
+# the following @VARIABLES@ which will be substituted:
+# ZORBA_MODULE_RELATIVE_DIR - directory portion of mangled URI
+# ZORBA_MODULE_LIBFILE_WE - filename (without extension) portion of
+# mangled URI
+# The input files should have a .in extension. The resulting file in
+# the build directory will have the .in removed.
+#
# QQQ this currently doesn't support modules with multiple component
# .xq files. (Neither does Zorba's automatic loading mechanism, so
# this probably isn't a huge deal, but worth thinking about.)
@@ -125,7 +136,7 @@
# file enough to deduce the URI and version?
MACRO (DECLARE_ZORBA_MODULE)
# Parse and validate arguments
- PARSE_ARGUMENTS(MODULE "LINK_LIBRARIES;EXTRA_SOURCES"
+ PARSE_ARGUMENTS(MODULE "LINK_LIBRARIES;EXTRA_SOURCES;CONFIG_FILES"
"URI;FILE;VERSION" "TEST_ONLY" ${ARGN})
IF (NOT MODULE_FILE)
MESSAGE (FATAL_ERROR "'FILE' argument is required for ZORBA_DECLARE_MODULE()")
@@ -353,6 +364,20 @@
"${version_infix}" "" 1 "${MODULE_TEST_ONLY}")
ENDFOREACH (version_infix)
+ # Configure any module-specified config files.
+ SET (ZORBA_MODULE_RELATIVE_DIR ${module_path})
+ SET (ZORBA_MODULE_LIBFILE_WE ${module_filewe})
+ FOREACH (_config_file ${MODULE_CONFIG_FILES})
+ # Strip off .in - can't use GET_FILENAME_COMPONENT as it always removes
+ # the longest possible extension
+ STRING (REGEX REPLACE "\\.in$" "" _config_filename_we "${_config_file}")
+ IF (NOT IS_ABSOLUTE "${_config_file}")
+ SET (_config_file "${CMAKE_CURRENT_SOURCE_DIR}/${_config_file}")
+ ENDIF (NOT IS_ABSOLUTE "${_config_file}")
+ CONFIGURE_FILE (${_config_file}
+ "${CMAKE_CURRENT_BINARY_DIR}/${_config_filename_we}" @ONLY)
+ ENDFOREACH (_config_file)
+
# Last but not least, whip up a test case that ensures the module
# can at least be compiled. Don't bother for test-only modules
# (presumably they're there to be tested!).
=== modified file 'include/zorba/item_factory.h'
--- include/zorba/item_factory.h 2012-09-26 17:09:11 +0000
+++ include/zorba/item_factory.h 2012-09-27 23:07:25 +0000
@@ -123,8 +123,8 @@
/** \brief Creates a Base64Binary Item
* see [http://www.w3.org/TR/xmlschema-2/#base64Binary]
*
- * @param aBinData a pointer to the base6c4 binary data.
- * @param aLength the length of the base64 binary data.
+ * @param aBinData a pointer to the base64 encoded data. The data is copied from aBinData.
+ * @param aLength the length of the base64 encoded data.
* @return The Base64Binary Item.
*/
virtual Item
@@ -133,7 +133,7 @@
/** \brief Creates a Base64Binary Item
* see [http://www.w3.org/TR/xmlschema-2/#base64Binary]
*
- * @param aStream A stream containing the Base64 encoded data.
+ * @param aStream A stream containing the Base64 encoded data. The data is copied from aStream imediately.
* @return the Base64Binary Item.
*/
virtual Item
@@ -142,11 +142,11 @@
/** \brief Creates a Base64Binary Item
* see [http://www.w3.org/TR/xmlschema-2/#base64Binary]
*
- * @param aBinData the data in binary form. The data is copied from aBinData.
- * @param aLength the length of the data
+ * @param aBinData the data in binary form (not encoded). The data is copied from aBinData.
+ * @param aLength the length of the binary data
* @return the Base64Binary Item.
*/
- virtual Item
+ virtual Item
createBase64Binary(const unsigned char* aBinData, size_t aLength) = 0;
/** \brief Creates a streamable Base64Binary Item
@@ -742,7 +742,7 @@
* @param aNames A vector containing the name and value of each pair.
*/
virtual Item createJSONObject(std::vector<std::pair<Item, Item> >& aNames) = 0;
-
+
/**
* Create a JSON Array containing the specified items.
*
=== modified file 'modules/ExternalModules.conf'
--- modules/ExternalModules.conf 2012-09-26 04:47:44 +0000
+++ modules/ExternalModules.conf 2012-09-27 23:07:25 +0000
@@ -26,6 +26,7 @@
# "tag" is the VCS tag to check out (optional - defaults to HEAD;
# currently only works for bzr, since svn tags are just different URLS)
+<<<<<<< TREE
archive bzr lp:zorba/archive-module zorba-2.7
csv bzr lp:zorba/csv-module zorba-2.7
data-cleaning bzr lp:zorba/data-cleaning-module zorba-2.7
@@ -47,3 +48,27 @@
schema-tools bzr lp:zorba/schema-tools-module zorba-2.7
stack bzr lp:zorba/stack-module zorba-2.7
queue bzr lp:zorba/queue-module zorba-2.7
+=======
+archive bzr lp:zorba/archive-module zorba-2.6
+csv bzr lp:zorba/csv-module zorba-2.6
+data-cleaning bzr lp:zorba/data-cleaning-module zorba-2.6
+data-formatting bzr lp:zorba/data-formatting-module zorba-2.6
+excel bzr lp:zorba/excel-module zorba-2.6
+geo bzr lp:zorba/geo-module zorba-2.6
+graphviz bzr lp:zorba/graphviz-module zorba-2.6
+html bzr lp:zorba/html-module zorba-2.6
+http-client bzr lp:zorba/http-client-module zorba-2.6
+image bzr lp:zorba/image-module zorba-2.6
+languages bzr lp:zorba/languages-module zorba-2.6
+oauth bzr lp:zorba/oauth-module zorba-2.6
+process bzr lp:zorba/process-module zorba-2.6
+read-pdf bzr lp:zorba/read-pdf-module
+security bzr lp:zorba/security-module zorba-2.6
+system bzr lp:zorba/system-module zorba-2.6
+xqxq bzr lp:zorba/xqxq-module zorba-2.6
+email bzr lp:zorba/email-module zorba-2.6
+util-jvm bzr lp:zorba/util-jvm-module
+schema-tools bzr lp:zorba/schema-tools-module zorba-2.6
+stack bzr lp:zorba/stack-module zorba-2.6
+queue bzr lp:zorba/queue-module zorba-2.6
+>>>>>>> MERGE-SOURCE
=== modified file 'src/store/naive/atomic_items.h'
--- src/store/naive/atomic_items.h 2012-09-17 00:36:37 +0000
+++ src/store/naive/atomic_items.h 2012-09-27 23:07:25 +0000
@@ -1776,7 +1776,7 @@
friend class AtomicItem;
protected:
- int32_t theValue;
+ xs_int theValue;
protected:
IntItem(xs_int aValue) : theValue(aValue) {}
@@ -1792,7 +1792,7 @@
xs_long getLongValue() const { return static_cast<xs_long>(theValue); }
- int32_t getIntValue() const { return theValue; }
+ xs_int getIntValue() const { return theValue; }
store::SchemaTypeCode getTypeCode() const { return store::XS_INT; }
=== modified file 'test/rbkt/modules/CMakeLists.txt'
--- test/rbkt/modules/CMakeLists.txt 2012-09-17 00:36:37 +0000
+++ test/rbkt/modules/CMakeLists.txt 2012-09-27 23:07:25 +0000
@@ -27,8 +27,9 @@
DECLARE_ZORBA_MODULE(URI "http://zorba-tests.28msec.us/modules/B" VERSION 1.0
FILE "${CMAKE_CURRENT_SOURCE_DIR}/module-B.xq" TEST_ONLY)
+# This one also has a CONFIG_FILE
DECLARE_ZORBA_MODULE(URI "http://zorba-tests.28msec.us/modules/ext"
- VERSION 2.0 FILE "ext2.xq" TEST_ONLY)
+ VERSION 2.0 FILE "ext2.xq" TEST_ONLY CONFIG_FILES ext2_config.txt.in)
DECLARE_ZORBA_MODULE(URI "http://zorba-tests.28msec.us/modules/ext"
VERSION 1.0 FILE "ext.xq" TEST_ONLY)
=== added file 'test/rbkt/modules/ext2_config.txt.in'
--- test/rbkt/modules/ext2_config.txt.in 1970-01-01 00:00:00 +0000
+++ test/rbkt/modules/ext2_config.txt.in 2012-09-27 23:07:25 +0000
@@ -0,0 +1,1 @@
+This is a test: "@ZORBA_MODULE_RELATIVE_DIR@" "@ZORBA_MODULE_LIBFILE_WE@"
Follow ups