zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #23700
[Merge] lp:~zorba-coders/zorba/info-extraction-yahoo-broken into lp:zorba
Chris Hillery has proposed merging lp:~zorba-coders/zorba/info-extraction-yahoo-broken into lp:zorba.
Requested reviews:
Chris Hillery (ceejatec)
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/info-extraction-yahoo-broken/+merge/171045
--
https://code.launchpad.net/~zorba-coders/zorba/info-extraction-yahoo-broken/+merge/171045
Your team Zorba Coders is subscribed to branch lp:zorba.
=== added file 'CMakeLists.txt'
--- CMakeLists.txt 1970-01-01 00:00:00 +0000
+++ CMakeLists.txt 2013-06-24 09:52:30 +0000
@@ -0,0 +1,29 @@
+# Copyright 2006-2010 The FLWOR Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CMAKE_MINIMUM_REQUIRED (VERSION 2.6)
+
+PROJECT (zorba_info-extraction_module)
+
+ENABLE_TESTING ()
+INCLUDE (CTest)
+
+FIND_PACKAGE (Zorba REQUIRED HINTS "${ZORBA_BUILD_DIR}")
+INCLUDE ("${Zorba_USE_FILE}")
+
+ADD_SUBDIRECTORY("src")
+
+ADD_TEST_DIRECTORY("${PROJECT_SOURCE_DIR}/test")
+EXPECTED_FAILURE(zorba_info-extraction_module/relations.xq 1194069)
+DONE_DECLARING_ZORBA_URIS ()
=== renamed file 'CMakeLists.txt' => 'CMakeLists.txt.moved'
=== added directory 'src'
=== renamed directory 'src' => 'src.moved'
=== added file 'src/CMakeLists.txt'
--- src/CMakeLists.txt 1970-01-01 00:00:00 +0000
+++ src/CMakeLists.txt 2013-06-24 09:52:30 +0000
@@ -0,0 +1,17 @@
+# Copyright 2006-2008 The FLWOR Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/info-extraction" FILE "info-extraction-module.xq")
+
+DECLARE_ZORBA_SCHEMA (URI "http://www.zorba-xquery.com/modules/info-extraction" FILE "info-extraction-module.xsd")
=== added file 'src/info-extraction-module.xq'
--- src/info-extraction-module.xq 1970-01-01 00:00:00 +0000
+++ src/info-extraction-module.xq 2013-06-24 09:52:30 +0000
@@ -0,0 +1,217 @@
+xquery version "3.0";
+
+(:
+ : Copyright 2006-2009 The FLWOR Foundation.
+ :
+ : Licensed under the Apache License, Version 2.0 (the "License");
+ : you may not use this file except in compliance with the License.
+ : You may obtain a copy of the License at
+ :
+ : http://www.apache.org/licenses/LICENSE-2.0
+ :
+ : Unless required by applicable law or agreed to in writing, software
+ : distributed under the License is distributed on an "AS IS" BASIS,
+ : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ : See the License for the specific language governing permissions and
+ : limitations under the License.
+ :)
+
+(:~
+ : This library module provides data extraction functions that return a list
+ : of entities, relations, categories and concepts present in a given text.
+ :
+ : @author Pedro Antunes
+ : @project Zorba/Data Cleaning/Info Extraction
+ :)
+
+module namespace ex = "http://www.zorba-xquery.com/modules/info-extraction";
+
+declare namespace ann = "http://www.zorba-xquery.com/annotations";
+
+declare namespace yahoo = "urn:yahoo:cap";
+
+import module namespace http = "http://www.zorba-xquery.com/modules/http-client";
+
+import schema namespace h = "http://expath.org/ns/http-client";
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return a list of entities
+ : encountered in the text supplied as input.
+ : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ :
+ : @param $text String to be analyzed
+ : @return Sequence of recognized entities
+ : @example test/Queries/entities.xq
+ :)
+declare %ann:sequential function ex:entities($text as xs:string) as element(ex:entity)*{
+ let $response := ex:server-connection($text)
+ let $entities := $response/query/results/yahoo:entities/yahoo:entity
+ return if ( $entities ) then
+ for $entity in $entities
+ order by xs:integer($entity/yahoo:text/@start)
+ return <ex:entity start="{$entity/yahoo:text/@start}" end="{$entity/yahoo:text/@end}">{
+ if ( $entity/yahoo:types ) then
+ for $type in $entity/yahoo:types/yahoo:type
+ return <ex:type>{ replace($type/text(), '^/|^[a-zA-Z]*:/','') }</ex:type>
+ else ()
+ }
+ { $entity/yahoo:text/text() }</ex:entity>
+ else ()
+};
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return a list of categories (topics) related
+ : to the text supplied as input.
+ : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ :
+ : @param $text String to be analyzed
+ : @return Sequence of recognized categories
+ : @example test/Queries/categories.xq
+ :)
+declare %ann:sequential function ex:categories($text) as element(ex:category)*{
+ let $response := ex:server-connection($text)
+ let $categories := $response/query/results/yahoo:yctCategories/yahoo:yctCategory
+ return if ( $categories ) then
+ for $category in $categories
+ return <ex:category>{ $category/text() }</ex:category>
+ else ()
+};
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return a list of relations (entities found and related wikipedia links)
+ : encountered in the text supplied as input.
+ : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ :
+ : @param $text String to be analyzed
+ : @return Sequence of recognized relations
+ : @example test/Queries/relations.xq
+ :)
+declare %ann:sequential function ex:relations($text) as element(ex:relation)*{
+ let $response := ex:server-connection($text)
+ let $relations := $response/query/results/yahoo:entities/yahoo:entity/yahoo:related_entities
+ return if ( $relations ) then
+ for $relation in $relations
+ return <ex:relation>{
+ <ex:entity start="{$relation/../yahoo:text/@start}" end="{$relation/../yahoo:text/@end}">{
+ if ( $relation/../yahoo:types ) then
+ for $type in $relation/../yahoo:types/yahoo:type
+ return <ex:type>{ replace($type/text(), '^/|^[a-zA-Z]*:/','') }</ex:type>
+ else ()
+ }
+ { $relation/../yahoo:text/text() }</ex:entity>
+ union
+ (for $link in $relation/yahoo:wikipedia/yahoo:wiki_url
+ return <ex:wikipedia_url>{$link/text()}</ex:wikipedia_url>)
+ }</ex:relation>
+ else ()
+};
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return a list of concepts (entity found and the corresponding wikipedia link)
+ : encountered in the text supplied as input.
+ : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ :
+ : @param $text String to be analyzed
+ : @return Sequence of recognized concepts
+ : @example test/Queries/concepts.xq
+ :)
+declare %ann:sequential function ex:concepts($text) as element(ex:concept)*{
+ let $response := ex:server-connection($text)
+ let $concepts := $response/query/results/yahoo:entities/yahoo:entity/yahoo:wiki_url
+ return if ( $concepts ) then
+ for $link in $concepts
+ order by xs:integer($link/../yahoo:text/@start)
+ return <ex:concept>{
+ <ex:entity start="{$link/../yahoo:text/@start}" end="{$link/../yahoo:text/@end}">{
+ if ( $link/../yahoo:types ) then
+ for $type in $link/../yahoo:types/yahoo:type
+ return <ex:type>{ replace($type/text(), '^/|^[a-zA-Z]*:/','') }</ex:type>
+ else ()
+ }
+ { $link/../yahoo:text/text() }</ex:entity>
+ union
+ (<ex:wikipedia_url>{$link[1]/text()}</ex:wikipedia_url>)
+ }</ex:concept>
+ else ()
+};
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return the text supplied as input
+ : together with entities recognized annotated as xml elements in the text.
+ :
+ : @param $text String to be analyzed
+ : @return Mixed sequence of strings and <ex:entity> elements
+ : @example test/Queries/entities-inline.xq
+ :)
+declare %ann:sequential function ex:entities-inline($text) as item()*{
+ ex:entity-inline-annotation($text , ex:entities($text), 0)
+};
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return the text supplied as input
+ : together with concepts (entities with corresponding wikipedia link) annotated
+ : as xml elements in the text.
+ :
+ : @param $text String to be analyzed
+ : @return Mixed sequence of strings and <ex:concept> elements
+ : @example test/Queries/concepts-inline.xq
+ :)
+declare %ann:sequential function ex:concepts-inline($text) as item()*{
+ ex:concept-inline-annotation($text , ex:concepts($text), 0)
+};
+
+(:~
+ : Creates entities inline annotations in a given string
+ :
+ : @param $text String to be analyzed
+ : @param $entities list of entities found in the given string
+ : @param $size size of the remaining string
+ : @return Mixed sequence of strings and <ex:entity> elements
+ :)
+declare %private function ex:entity-inline-annotation($text, $entities, $size) as item()*{
+ if ( count($entities) = 0 ) then $text
+ else(substring($text, 0, ($entities[1]/@start) +1 -$size),
+ if ( count( $entities[1]/ex:type) >= 1 ) then
+ <ex:entity start="{$entities[1]/@start}" end="{$entities[1]/@end}" type="{$entities[1]/ex:type[1]}"> {$entities[1]/text()} </ex:entity>
+ else $entities[1],
+ ex:entity-inline-annotation(substring($text, ($entities[1]/@end)+2 -$size), $entities[position() >1], ($entities[1]/@end)+1))
+};
+
+(:~
+ : Creates concepts inline annotations in a given string
+ :
+ : @param $text String to be analyzed
+ : @param $concepts list of concepts found in the given string
+ : @param $size size of the remaining string
+ : @return Mixed sequence of strings and <ex:concept> elements
+ :)
+declare %private function ex:concept-inline-annotation($text, $concepts, $size) as item()*{
+ if ( count($concepts) = 0 ) then $text
+ else(substring($text, 0, ($concepts[1]/ex:entity/@start) +1 -$size),
+ if ( count( $concepts[1]/ex:wikipedia_url ) >= 1 )
+ then <ex:concept xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="{$concepts[1]/ex:entity/@start}" end="{$concepts[1]/ex:entity/@end}" url="{$concepts[1]/ex:wikipedia_url[1]/text()}">{$concepts[1]/ex:entity/text()}</ex:concept>
+ else $concepts[1]/ex:entity,
+ ex:concept-inline-annotation(substring($text, ($concepts[1]/ex:entity/@end) +2 -$size), $concepts[position() >1], ($concepts[1]/ex:entity/@end) +1))
+};
+
+(:~
+ : Establishes connection with the Yahoo Server
+ :
+ : @param $text String to be analyzed
+ : @return XML document returned by the Yahoo Server
+ :)
+declare %private %ann:sequential function ex:server-connection($text as xs:string){
+ let $uri := iri-to-uri(concat("q=select * from contentanalysis.analyze where text=",
+ concat(""", concat(replace(normalize-space($text), """, "'"), """))))
+ let $req :=
+ <h:request method="POST" href="http://query.yahooapis.com/v1/public/yql">
+ <h:header name="Connection" value="keep-alive"/>
+ <h:body media-type="application/x-www-form-urlencoded">
+ {$uri}
+ </h:body>
+ </h:request>
+ let $response := http:send-request($req, (), ())
+ return if ($response[1]/@status = 200)
+ then $response[2]
+ else ()
+};
=== added file 'src/info-extraction-module.xsd'
--- src/info-extraction-module.xsd 1970-01-01 00:00:00 +0000
+++ src/info-extraction-module.xsd 2013-06-24 09:52:30 +0000
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
+ xmlns="http://www.zorba-xquery.com/modules/info-extraction"
+ targetNamespace="http://www.zorba-xquery.com/modules/info-extraction"
+ elementFormDefault="qualified">
+
+ <xs:element name="wikipedia_url" type="xs:string"/>
+ <xs:element name="category" type="xs:string"/>
+
+ <xs:element name="entity">
+ <xs:complexType mixed="true">
+ <xs:sequence>
+ <xs:element name="type" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
+ </xs:sequence>
+ <xs:attribute name="start" type="xs:integer" use="required"/>
+ <xs:attribute name="end" type="xs:integer" use="required"/>
+ <xs:attribute name="type" type="xs:string"/>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="concept">
+ <xs:complexType mixed="true">
+ <xs:sequence>
+ <xs:element ref="entity" minOccurs="0" maxOccurs="1"/>
+ <xs:element ref="wikipedia_url" minOccurs="0" maxOccurs="1"/>
+ </xs:sequence>
+ <xs:attribute name="start" type="xs:integer"/>
+ <xs:attribute name="end" type="xs:integer"/>
+ <xs:attribute name="url" type="xs:string"/>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="relation">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element ref="entity" minOccurs="1" maxOccurs="1"/>
+ <xs:element ref="wikipedia_url" minOccurs="1" maxOccurs="unbounded"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+</xs:schema>
=== added directory 'test'
=== renamed directory 'test' => 'test.moved'
=== added directory 'test/ExpQueryResults'
=== added file 'test/ExpQueryResults/categories.xml.res'
--- test/ExpQueryResults/categories.xml.res 1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/categories.xml.res 2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true
\ No newline at end of file
=== added file 'test/ExpQueryResults/concepts-inline.xml.res'
--- test/ExpQueryResults/concepts-inline.xml.res 1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/concepts-inline.xml.res 2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true
=== added file 'test/ExpQueryResults/concepts.xml.res'
--- test/ExpQueryResults/concepts.xml.res 1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/concepts.xml.res 2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true
\ No newline at end of file
=== added file 'test/ExpQueryResults/entities-inline.xml.res'
--- test/ExpQueryResults/entities-inline.xml.res 1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/entities-inline.xml.res 2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true
=== added file 'test/ExpQueryResults/entities.xml.res'
--- test/ExpQueryResults/entities.xml.res 1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/entities.xml.res 2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true
\ No newline at end of file
=== added file 'test/ExpQueryResults/relations.xml.res'
--- test/ExpQueryResults/relations.xml.res 1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/relations.xml.res 2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true
\ No newline at end of file
=== added directory 'test/Queries'
=== added file 'test/Queries/categories.xq'
--- test/Queries/categories.xq 1970-01-01 00:00:00 +0000
+++ test/Queries/categories.xq 2013-06-24 09:52:30 +0000
@@ -0,0 +1,17 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:categories("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $c in $result return validate { $c } ) > 0
+
+(:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:categories function should return the following list of results for the input provided in the example -->
+<ex:category xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">Politics & Government</ex:category>
+<ex:category xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">Budget, Tax & Economy</ex:category>
+<ex:category xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">Government</ex:category>
+
+:)
=== added file 'test/Queries/concepts-inline.xq'
--- test/Queries/concepts-inline.xq 1970-01-01 00:00:00 +0000
+++ test/Queries/concepts-inline.xq 2013-06-24 09:52:30 +0000
@@ -0,0 +1,15 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:concepts-inline("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $c in <aux>{$result}</aux>/ex:concept return validate { $c } ) > 0
+
+(:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:concepts-inline function should return the following list of results for the input provided in the example -->
+President Obama called Wednesday on <concept start="36" end="43" url="http://en.wikipedia.com/wiki/United_States_Congress">Congress</concept> to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.
+
+:)
=== added file 'test/Queries/concepts.xq'
--- test/Queries/concepts.xq 1970-01-01 00:00:00 +0000
+++ test/Queries/concepts.xq 2013-06-24 09:52:30 +0000
@@ -0,0 +1,23 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:concepts("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $c in $result return validate { $c } ) > 0
+
+(:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:concepts function should return the following list of results for the input provided in the example -->
+<ex:concept xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">
+ <ex:entity start="0" end="14">President Obama</ex:entity>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/Barack_Obama</ex:wikipedia_url>
+</ex:concept>
+<ex:concept xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">
+ <ex:entity start="36" end="43">
+ <ex:type>organization</ex:type>Congress</ex:entity>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/United_States_Congress</ex:wikipedia_url>
+</ex:concept>
+
+:)
=== added file 'test/Queries/entities-inline.xq'
--- test/Queries/entities-inline.xq 1970-01-01 00:00:00 +0000
+++ test/Queries/entities-inline.xq 2013-06-24 09:52:30 +0000
@@ -0,0 +1,15 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:entities-inline("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $c in <aux>{$result}</aux>/ex:entity return validate { $c } ) > 0
+
+(:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:entities-inline function should return the following list of results for the input provided in the example -->
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="0" end="14">President Obama</ex:entity> called Wednesday on <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="36" end="43" type="organization">Congress</ex:entity> to extend a <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="57" end="65">tax break</ex:entity> for students included in last year's <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="104" end="128">economic stimulus package</ex:entity>, arguing that the policy provides more <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="169" end="187">generous assistance</ex:entity>.
+
+:)
=== added file 'test/Queries/entities.xq'
--- test/Queries/entities.xq 1970-01-01 00:00:00 +0000
+++ test/Queries/entities.xq 2013-06-24 09:52:30 +0000
@@ -0,0 +1,20 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:entities("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $e in $result return validate { $e } ) > 0
+
+(:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:entities function should return the following list of results for the input provided in the example -->
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="0" end="14">President Obama</ex:entity>
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="36" end="43">
+ <ex:type>organization</ex:type>Congress</ex:entity>
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="57" end="65">tax break</ex:entity>
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="104" end="128">economic stimulus package</ex:entity>
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction" start="169" end="187">generous assistance</ex:entity>
+
+:)
=== added file 'test/Queries/relations.xq'
--- test/Queries/relations.xq 1970-01-01 00:00:00 +0000
+++ test/Queries/relations.xq 2013-06-24 09:52:30 +0000
@@ -0,0 +1,31 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:relations("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $r in $result return validate { $r } ) > 0
+
+(:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:relations function should return the following list of results for the input provided in the example -->
+<ex:relation xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">
+ <ex:entity start="0" end="14">President Obama</ex:entity>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/Gabrielle_Giffords</ex:wikipedia_url>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/2011_Tucson_shooting</ex:wikipedia_url>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/Fray</ex:wikipedia_url>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/White_House</ex:wikipedia_url>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/Recall_%28memory%29</ex:wikipedia_url>
+</ex:relation>
+<ex:relation xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction">
+ <ex:entity start="36" end="43">
+ <ex:type>organization</ex:type>Congress</ex:entity>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/Republican_Party_%28United_States%29</ex:wikipedia_url>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/Barack_Obama</ex:wikipedia_url>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/Roger_Clemens</ex:wikipedia_url>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/Democratic_Party_%28United_States%29</ex:wikipedia_url>
+ <ex:wikipedia_url>http://en.wikipedia.com/wiki/Growth_hormone</ex:wikipedia_url>
+</ex:relation>
+
+:)
Follow ups