zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #23554
[Merge] lp:~zorba-coders/zorba/update3.0_info-extraction-module into lp:zorba/info-extraction-module
Juan Zacarias has proposed merging lp:~zorba-coders/zorba/update3.0_info-extraction-module into lp:zorba/info-extraction-module.
Commit message:
Update to 3.0
Requested reviews:
Chris Hillery (ceejatec)
Related bugs:
Bug #1188043 in Zorba: "Update non-core module "info-extraction""
https://bugs.launchpad.net/zorba/+bug/1188043
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/update3.0_info-extraction-module/+merge/170463
Update to 3.0
--
https://code.launchpad.net/~zorba-coders/zorba/update3.0_info-extraction-module/+merge/170463
Your team Zorba Coders is subscribed to branch lp:zorba/info-extraction-module.
=== modified file 'src/CMakeLists.txt'
--- src/CMakeLists.txt 2012-07-03 20:02:52 +0000
+++ src/CMakeLists.txt 2013-06-19 22:53:34 +0000
@@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/info-extraction" FILE "info-extraction-module.xq")
+DECLARE_ZORBA_MODULE (URI "http://zorba.io/modules/info-extraction" FILE "info-extraction-module.xq")
-DECLARE_ZORBA_SCHEMA (URI "http://www.zorba-xquery.com/modules/info-extraction" FILE "info-extraction-module.xsd")
+DECLARE_ZORBA_SCHEMA (URI "http://zorba.io/modules/info-extraction" FILE "info-extraction-module.xsd")
=== modified file 'src/info-extraction-module.xq'
--- src/info-extraction-module.xq 2013-06-15 19:42:23 +0000
+++ src/info-extraction-module.xq 2013-06-19 22:53:34 +0000
@@ -17,14 +17,14 @@
:)
(:~
- : This library module provides data extraction functions that return a list
- : of entities, relations, categories and concepts present in a given text.
+ : <p>This library module provides data extraction functions that return a list
+ : of entities, relations, categories and concepts present in a given text.</p>
:
: @author Pedro Antunes
: @project Zorba/Data Cleaning/Info Extraction
:)
-module namespace ex = "http://www.zorba-xquery.com/modules/info-extraction";
+module namespace ex = "http://zorba.io/modules/info-extraction";
declare namespace ann = "http://www.zorba-xquery.com/annotations";
@@ -35,9 +35,9 @@
import schema namespace h = "http://expath.org/ns/http-client";
(:~
- : Uses Yahoo's Content Analysis webservice to return a list of entities
- : encountered in the text supplied as input.
- : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ : <p>Uses Yahoo's Content Analysis webservice to return a list of entities
+ : encountered in the text supplied as input.</p>
+ : <p>See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.</p>
:
: @param $text String to be analyzed
: @return Sequence of recognized entities
@@ -60,9 +60,9 @@
};
(:~
- : Uses Yahoo's Content Analysis webservice to return a list of categories (topics) related
- : to the text supplied as input.
- : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ : <p>Uses Yahoo's Content Analysis webservice to return a list of categories (topics) related
+ : to the text supplied as input.</p>
+ : <p>See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.</p>
:
: @param $text String to be analyzed
: @return Sequence of recognized categories
@@ -78,9 +78,9 @@
};
(:~
- : Uses Yahoo's Content Analysis webservice to return a list of relations (entities found and related wikipedia links)
- : encountered in the text supplied as input.
- : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ : <p>Uses Yahoo's Content Analysis webservice to return a list of relations (entities found and related wikipedia links)
+ : encountered in the text supplied as input.</p>
+ : <p>See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.</p>
:
: @param $text String to be analyzed
: @return Sequence of recognized relations
@@ -107,9 +107,9 @@
};
(:~
- : Uses Yahoo's Content Analysis webservice to return a list of concepts (entity found and the corresponding wikipedia link)
- : encountered in the text supplied as input.
- : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ : <p>Uses Yahoo's Content Analysis webservice to return a list of concepts (entity found and the corresponding wikipedia link)
+ : encountered in the text supplied as input.</p>
+ : <p>See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.</p>
:
: @param $text String to be analyzed
: @return Sequence of recognized concepts
@@ -136,8 +136,8 @@
};
(:~
- : Uses Yahoo's Content Analysis webservice to return the text supplied as input
- : together with entities recognized annotated as xml elements in the text.
+ : <p>Uses Yahoo's Content Analysis webservice to return the text supplied as input
+ : together with entities recognized annotated as xml elements in the text.</p>
:
: @param $text String to be analyzed
: @return Mixed sequence of strings and <ex:entity> elements
@@ -148,9 +148,9 @@
};
(:~
- : Uses Yahoo's Content Analysis webservice to return the text supplied as input
+ : <p>Uses Yahoo's Content Analysis webservice to return the text supplied as input
: together with concepts (entities with corresponding wikipedia link) annotated
- : as xml elements in the text.
+ : as xml elements in the text.</p>
:
: @param $text String to be analyzed
: @return Mixed sequence of strings and <ex:concept> elements
@@ -161,7 +161,7 @@
};
(:~
- : Creates entities inline annotations in a given string
+ : <p>Creates entities inline annotations in a given string</p>
:
: @param $text String to be analyzed
: @param $entities list of entities found in the given string
@@ -178,7 +178,7 @@
};
(:~
- : Creates concepts inline annotations in a given string
+ : <p>Creates concepts inline annotations in a given string</p>
:
: @param $text String to be analyzed
: @param $concepts list of concepts found in the given string
@@ -189,13 +189,13 @@
if ( count($concepts) = 0 ) then $text
else(substring($text, 0, ($concepts[1]/ex:entity/@start) +1 -$size),
if ( count( $concepts[1]/ex:wikipedia_url ) >= 1 )
- then <ex:concept xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="{$concepts[1]/ex:entity/@start}" end="{$concepts[1]/ex:entity/@end}" url="{$concepts[1]/ex:wikipedia_url[1]/text()}">{$concepts[1]/ex:entity/text()}</ex:concept>
+ then <ex:concept xmlns:ex="http://zorba.io/modules/info-extraction" start="{$concepts[1]/ex:entity/@start}" end="{$concepts[1]/ex:entity/@end}" url="{$concepts[1]/ex:wikipedia_url[1]/text()}">{$concepts[1]/ex:entity/text()}</ex:concept>
else $concepts[1]/ex:entity,
ex:concept-inline-annotation(substring($text, ($concepts[1]/ex:entity/@end) +2 -$size), $concepts[position() >1], ($concepts[1]/ex:entity/@end) +1))
};
(:~
- : Establishes connection with the Yahoo Server
+ : <p>Establishes connection with the Yahoo Server</p>
:
: @param $text String to be analyzed
: @return XML document returned by the Yahoo Server
=== modified file 'src/info-extraction-module.xsd'
--- src/info-extraction-module.xsd 2012-11-26 16:30:10 +0000
+++ src/info-extraction-module.xsd 2013-06-19 22:53:34 +0000
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
- xmlns="http://www.zorba-xquery.com/modules/info-extraction"
- targetNamespace="http://www.zorba-xquery.com/modules/info-extraction"
+ xmlns="http://zorba.io/modules/info-extraction"
+ targetNamespace="http://zorba.io/modules/info-extraction"
elementFormDefault="qualified">
<xs:element name="wikipedia_url" type="xs:string"/>
=== modified file 'test/Queries/categories.xq'
--- test/Queries/categories.xq 2012-11-26 16:30:10 +0000
+++ test/Queries/categories.xq 2013-06-19 22:53:34 +0000
@@ -1,6 +1,6 @@
-import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+import module namespace ex = 'http://zorba.io/modules/info-extraction';
-import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+import schema namespace schema = 'http://zorba.io/modules/info-extraction';
let $result := ex:categories("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
@@ -10,8 +10,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- The ex:categories function should return the following list of results for the input provided in the example -->
-<ex:category xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">Politics & Government</ex:category>
-<ex:category xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">Budget, Tax & Economy</ex:category>
-<ex:category xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">Government</ex:category>
+<ex:category xmlns:ex="http://zorba.io/modules/info-extraction">Politics & Government</ex:category>
+<ex:category xmlns:ex="http://zorba.io/modules/info-extraction">Budget, Tax & Economy</ex:category>
+<ex:category xmlns:ex="http://zorba.io/modules/info-extraction">Government</ex:category>
:)
=== modified file 'test/Queries/concepts-inline.xq'
--- test/Queries/concepts-inline.xq 2012-11-26 16:30:10 +0000
+++ test/Queries/concepts-inline.xq 2013-06-19 22:53:34 +0000
@@ -1,6 +1,6 @@
-import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+import module namespace ex = 'http://zorba.io/modules/info-extraction';
-import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+import schema namespace schema = 'http://zorba.io/modules/info-extraction';
let $result := ex:concepts-inline("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
=== modified file 'test/Queries/concepts.xq'
--- test/Queries/concepts.xq 2012-11-26 16:30:10 +0000
+++ test/Queries/concepts.xq 2013-06-19 22:53:34 +0000
@@ -1,6 +1,6 @@
-import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+import module namespace ex = 'http://zorba.io/modules/info-extraction';
-import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+import schema namespace schema = 'http://zorba.io/modules/info-extraction';
let $result := ex:concepts("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
@@ -10,11 +10,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- The ex:concepts function should return the following list of results for the input provided in the example -->
-<ex:concept xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">
+<ex:concept xmlns:ex="http://zorba.io/modules/info-extraction">
<ex:entity start="0" end="14">President Obama</ex:entity>
<ex:wikipedia_url>http://en.wikipedia.com/wiki/Barack_Obama</ex:wikipedia_url>
</ex:concept>
-<ex:concept xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">
+<ex:concept xmlns:ex="http://zorba.io/modules/info-extraction">
<ex:entity start="36" end="43">
<ex:type>organization</ex:type>Congress</ex:entity>
<ex:wikipedia_url>http://en.wikipedia.com/wiki/United_States_Congress</ex:wikipedia_url>
=== modified file 'test/Queries/entities-inline.xq'
--- test/Queries/entities-inline.xq 2012-11-26 16:30:10 +0000
+++ test/Queries/entities-inline.xq 2013-06-19 22:53:34 +0000
@@ -1,6 +1,6 @@
-import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+import module namespace ex = 'http://zorba.io/modules/info-extraction';
-import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+import schema namespace schema = 'http://zorba.io/modules/info-extraction';
let $result := ex:entities-inline("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
@@ -10,6 +10,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- The ex:entities-inline function should return the following list of results for the input provided in the example -->
-<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="0" end="14">President Obama</ex:entity> called Wednesday on <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="36" end="43" type="organization">Congress</ex:entity> to extend a <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="57" end="65">tax break</ex:entity> for students included in last year's <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="104" end="128">economic stimulus package</ex:entity>, arguing that the policy provides more <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="169" end="187">generous assistance</ex:entity>.
+<ex:entity xmlns:ex="http://zorba.io/modules/info-extraction" start="0" end="14">President Obama</ex:entity> called Wednesday on <ex:entity xmlns:ex="http://zorba.io/modules/info-extraction" start="36" end="43" type="organization">Congress</ex:entity> to extend a <ex:entity xmlns:ex="http://zorba.io/modules/info-extraction" start="57" end="65">tax break</ex:entity> for students included in last year's <ex:entity xmlns:ex="http://zorba.io/modules/info-extraction" start="104" end="128">economic stimulus package</ex:entity>, arguing that the policy provides more <ex:entity xmlns:ex="http://zorba.io/modules/info-extraction" start="169" end="187">generous assistance</ex:entity>.
:)
=== modified file 'test/Queries/entities.xq'
--- test/Queries/entities.xq 2012-11-26 16:30:10 +0000
+++ test/Queries/entities.xq 2013-06-19 22:53:34 +0000
@@ -1,6 +1,6 @@
-import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+import module namespace ex = 'http://zorba.io/modules/info-extraction';
-import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+import schema namespace schema = 'http://zorba.io/modules/info-extraction';
let $result := ex:entities("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
@@ -10,11 +10,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- The ex:entities function should return the following list of results for the input provided in the example -->
-<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="0" end="14">President Obama</ex:entity>
-<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="36" end="43">
+<ex:entity xmlns:ex="http://zorba.io/modules/info-extraction" start="0" end="14">President Obama</ex:entity>
+<ex:entity xmlns:ex="http://zorba.io/modules/info-extraction" start="36" end="43">
<ex:type>organization</ex:type>Congress</ex:entity>
-<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="57" end="65">tax break</ex:entity>
-<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="104" end="128">economic stimulus package</ex:entity>
-<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="169" end="187">generous assistance</ex:entity>
+<ex:entity xmlns:ex="http://zorba.io/modules/info-extraction" start="57" end="65">tax break</ex:entity>
+<ex:entity xmlns:ex="http://zorba.io/modules/info-extraction" start="104" end="128">economic stimulus package</ex:entity>
+<ex:entity xmlns:ex="http://zorba.io/modules/info-extraction" start="169" end="187">generous assistance</ex:entity>
:)
=== modified file 'test/Queries/relations.xq'
--- test/Queries/relations.xq 2012-11-26 16:30:10 +0000
+++ test/Queries/relations.xq 2013-06-19 22:53:34 +0000
@@ -1,6 +1,6 @@
-import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+import module namespace ex = 'http://zorba.io/modules/info-extraction';
-import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+import schema namespace schema = 'http://zorba.io/modules/info-extraction';
let $result := ex:relations("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
@@ -10,7 +10,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- The ex:relations function should return the following list of results for the input provided in the example -->
-<ex:relation xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">
+<ex:relation xmlns:ex="http://zorba.io/modules/info-extraction">
<ex:entity start="0" end="14">President Obama</ex:entity>
<ex:wikipedia_url>http://en.wikipedia.com/wiki/Gabrielle_Giffords</ex:wikipedia_url>
<ex:wikipedia_url>http://en.wikipedia.com/wiki/2011_Tucson_shooting</ex:wikipedia_url>
@@ -18,7 +18,7 @@
<ex:wikipedia_url>http://en.wikipedia.com/wiki/White_House</ex:wikipedia_url>
<ex:wikipedia_url>http://en.wikipedia.com/wiki/Recall_%28memory%29</ex:wikipedia_url>
</ex:relation>
-<ex:relation xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">
+<ex:relation xmlns:ex="http://zorba.io/modules/info-extraction">
<ex:entity start="36" end="43">
<ex:type>organization</ex:type>Congress</ex:entity>
<ex:wikipedia_url>http://en.wikipedia.com/wiki/Republican_Party_%28United_States%29</ex:wikipedia_url>
Follow ups