zorba-coders team mailing list archive

Thread
Date

[Merge] lp:~zorba-coders/zorba/info-extraction-yahoo-broken into lp:zorba

To: mp+171045@xxxxxxxxxxxxxxxxxx
From: Chris Hillery <chillery+launchpad@xxxxxxxxx>
Date: Mon, 24 Jun 2013 09:53:22 -0000
Reply-to: mp+171045@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Chris Hillery has proposed merging lp:~zorba-coders/zorba/info-extraction-yahoo-broken into lp:zorba.

Requested reviews:
  Chris Hillery (ceejatec)

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/info-extraction-yahoo-broken/+merge/171045
-- 
https://code.launchpad.net/~zorba-coders/zorba/info-extraction-yahoo-broken/+merge/171045
Your team Zorba Coders is subscribed to branch lp:zorba.

=== added file 'CMakeLists.txt'
--- CMakeLists.txt	1970-01-01 00:00:00 +0000
+++ CMakeLists.txt	2013-06-24 09:52:30 +0000
@@ -0,0 +1,29 @@
+# Copyright 2006-2010 The FLWOR Foundation.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CMAKE_MINIMUM_REQUIRED (VERSION 2.6)
+
+PROJECT (zorba_info-extraction_module)
+
+ENABLE_TESTING ()
+INCLUDE (CTest)
+
+FIND_PACKAGE (Zorba REQUIRED HINTS "${ZORBA_BUILD_DIR}")
+INCLUDE ("${Zorba_USE_FILE}")
+
+ADD_SUBDIRECTORY("src")
+
+ADD_TEST_DIRECTORY("${PROJECT_SOURCE_DIR}/test")
+EXPECTED_FAILURE(zorba_info-extraction_module/relations.xq 1194069)
+DONE_DECLARING_ZORBA_URIS ()

=== renamed file 'CMakeLists.txt' => 'CMakeLists.txt.moved'
=== added directory 'src'
=== renamed directory 'src' => 'src.moved'
=== added file 'src/CMakeLists.txt'
--- src/CMakeLists.txt	1970-01-01 00:00:00 +0000
+++ src/CMakeLists.txt	2013-06-24 09:52:30 +0000
@@ -0,0 +1,17 @@
+# Copyright 2006-2008 The FLWOR Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/info-extraction"; FILE "info-extraction-module.xq")
+
+DECLARE_ZORBA_SCHEMA (URI "http://www.zorba-xquery.com/modules/info-extraction"; FILE "info-extraction-module.xsd")

=== added file 'src/info-extraction-module.xq'
--- src/info-extraction-module.xq	1970-01-01 00:00:00 +0000
+++ src/info-extraction-module.xq	2013-06-24 09:52:30 +0000
@@ -0,0 +1,217 @@
+xquery version "3.0";
+
+(:
+ : Copyright 2006-2009 The FLWOR Foundation.
+ :
+ : Licensed under the Apache License, Version 2.0 (the "License");
+ : you may not use this file except in compliance with the License.
+ : You may obtain a copy of the License at
+ :
+ : http://www.apache.org/licenses/LICENSE-2.0
+ :
+ : Unless required by applicable law or agreed to in writing, software
+ : distributed under the License is distributed on an "AS IS" BASIS,
+ : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ : See the License for the specific language governing permissions and
+ : limitations under the License.
+ :)
+ 
+(:~
+ : This library module provides data extraction functions that return a list  
+ : of entities, relations, categories and concepts present in a given text.
+ :
+ : @author Pedro Antunes
+ : @project Zorba/Data Cleaning/Info Extraction
+ :)
+
+module namespace ex = "http://www.zorba-xquery.com/modules/info-extraction";;
+
+declare namespace ann = "http://www.zorba-xquery.com/annotations";;
+
+declare namespace yahoo = "urn:yahoo:cap";
+
+import module namespace http = "http://www.zorba-xquery.com/modules/http-client";;
+
+import schema namespace h = "http://expath.org/ns/http-client";;
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return a list of entities 
+ : encountered in the text supplied as input.
+ : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ :
+ : @param $text String to be analyzed
+ : @return Sequence of recognized entities
+ : @example test/Queries/entities.xq
+ :)
+declare %ann:sequential function ex:entities($text as xs:string) as element(ex:entity)*{
+    let $response := ex:server-connection($text)
+    let $entities := $response/query/results/yahoo:entities/yahoo:entity
+    return if ( $entities ) then
+        for $entity in $entities            
+        order by xs:integer($entity/yahoo:text/@start)
+        return <ex:entity start="{$entity/yahoo:text/@start}" end="{$entity/yahoo:text/@end}">{
+		    if ( $entity/yahoo:types ) then
+		    for $type in $entity/yahoo:types/yahoo:type
+		    return <ex:type>{ replace($type/text(), '^/|^[a-zA-Z]*:/','') }</ex:type>
+		    else ()
+	}
+	{ $entity/yahoo:text/text() }</ex:entity>
+    else ()
+};
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return a list of categories (topics) related
+ : to the text supplied as input.
+ : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ :
+ : @param $text String to be analyzed
+ : @return Sequence of recognized categories
+ : @example test/Queries/categories.xq
+ :)
+declare %ann:sequential function ex:categories($text) as element(ex:category)*{
+    let $response := ex:server-connection($text)
+    let $categories := $response/query/results/yahoo:yctCategories/yahoo:yctCategory
+    return if ( $categories ) then 
+        for $category in $categories
+        return <ex:category>{ $category/text() }</ex:category>
+    else ()
+};
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return a list of relations (entities found and related wikipedia links)
+ : encountered in the text supplied as input.
+ : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ :
+ : @param $text String to be analyzed
+ : @return Sequence of recognized relations
+ : @example test/Queries/relations.xq
+ :)
+declare %ann:sequential function ex:relations($text) as element(ex:relation)*{
+    let $response := ex:server-connection($text)
+    let $relations := $response/query/results/yahoo:entities/yahoo:entity/yahoo:related_entities
+    return if ( $relations ) then
+        for $relation in $relations
+        return <ex:relation>{        
+            <ex:entity start="{$relation/../yahoo:text/@start}" end="{$relation/../yahoo:text/@end}">{
+            	if ( $relation/../yahoo:types ) then
+                	for $type in $relation/../yahoo:types/yahoo:type
+		            return <ex:type>{ replace($type/text(), '^/|^[a-zA-Z]*:/','') }</ex:type>
+		        else ()
+            }
+            { $relation/../yahoo:text/text() }</ex:entity>
+            union
+            (for $link in $relation/yahoo:wikipedia/yahoo:wiki_url
+            return <ex:wikipedia_url>{$link/text()}</ex:wikipedia_url>)
+        }</ex:relation>
+    else ()
+};
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return a list of concepts (entity found and the corresponding wikipedia link) 
+ : encountered in the text supplied as input.
+ : See http://developer.yahoo.com/search/content/V2/contentAnalysis.html for more information.
+ :
+ : @param $text String to be analyzed
+ : @return Sequence of recognized concepts
+ : @example test/Queries/concepts.xq
+ :)
+declare %ann:sequential function ex:concepts($text) as element(ex:concept)*{
+    let $response := ex:server-connection($text)
+    let $concepts := $response/query/results/yahoo:entities/yahoo:entity/yahoo:wiki_url
+    return if ( $concepts ) then
+        for $link in $concepts
+        order by xs:integer($link/../yahoo:text/@start)
+        return <ex:concept>{
+            <ex:entity start="{$link/../yahoo:text/@start}" end="{$link/../yahoo:text/@end}">{
+            	if ( $link/../yahoo:types ) then
+            	for $type in $link/../yahoo:types/yahoo:type
+            	return <ex:type>{ replace($type/text(), '^/|^[a-zA-Z]*:/','') }</ex:type>
+		else ()
+            } 
+            { $link/../yahoo:text/text() }</ex:entity>
+            union 
+            (<ex:wikipedia_url>{$link[1]/text()}</ex:wikipedia_url>)
+        }</ex:concept>
+    else ()
+};
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return the text supplied as input
+ : together with entities recognized annotated as xml elements in the text.
+ :
+ : @param $text String to be analyzed
+ : @return Mixed sequence of strings and &lt;ex:entity&gt; elements
+ : @example test/Queries/entities-inline.xq
+ :)
+declare %ann:sequential function ex:entities-inline($text) as item()*{
+   ex:entity-inline-annotation($text , ex:entities($text), 0)
+};
+
+(:~
+ : Uses Yahoo's Content Analysis webservice to return the text supplied as input
+ : together with concepts (entities with corresponding wikipedia link) annotated
+ : as xml elements in the text.
+ :
+ : @param $text String to be analyzed
+ : @return Mixed sequence of strings and &lt;ex:concept&gt; elements
+ : @example test/Queries/concepts-inline.xq
+ :)
+declare %ann:sequential function ex:concepts-inline($text) as item()*{
+   ex:concept-inline-annotation($text , ex:concepts($text), 0)
+};
+
+(:~
+ : Creates entities inline annotations in a given string
+ :
+ : @param $text String to be analyzed
+ : @param $entities list of entities found in the given string
+ : @param $size size of the remaining string
+ : @return Mixed sequence of strings and &lt;ex:entity&gt; elements
+ :)
+declare %private function ex:entity-inline-annotation($text, $entities, $size) as item()*{
+    if ( count($entities) = 0 ) then $text 
+    else(substring($text, 0, ($entities[1]/@start) +1 -$size), 
+        if ( count( $entities[1]/ex:type) >= 1 ) then
+        <ex:entity start="{$entities[1]/@start}" end="{$entities[1]/@end}" type="{$entities[1]/ex:type[1]}"> {$entities[1]/text()} </ex:entity>
+        else $entities[1],
+        ex:entity-inline-annotation(substring($text, ($entities[1]/@end)+2 -$size), $entities[position() >1], ($entities[1]/@end)+1))
+};
+
+(:~
+ : Creates concepts inline annotations in a given string
+ :
+ : @param $text String to be analyzed
+ : @param $concepts list of concepts found in the given string
+ : @param $size size of the remaining string
+ : @return Mixed sequence of strings and &lt;ex:concept&gt; elements
+ :)
+declare %private function ex:concept-inline-annotation($text, $concepts, $size) as item()*{
+    if ( count($concepts) = 0 ) then $text 
+    else(substring($text, 0, ($concepts[1]/ex:entity/@start) +1 -$size),
+        if ( count( $concepts[1]/ex:wikipedia_url ) >= 1 ) 
+        then <ex:concept xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction"; start="{$concepts[1]/ex:entity/@start}" end="{$concepts[1]/ex:entity/@end}" url="{$concepts[1]/ex:wikipedia_url[1]/text()}">{$concepts[1]/ex:entity/text()}</ex:concept>
+        else $concepts[1]/ex:entity,
+        ex:concept-inline-annotation(substring($text, ($concepts[1]/ex:entity/@end) +2 -$size), $concepts[position() >1], ($concepts[1]/ex:entity/@end) +1))
+};
+
+(:~
+ : Establishes connection with the Yahoo Server
+ :
+ : @param $text String to be analyzed
+ : @return XML document returned by the Yahoo Server
+ :)
+declare %private %ann:sequential function ex:server-connection($text as xs:string){
+   let $uri := iri-to-uri(concat("q=select * from contentanalysis.analyze where text=", 
+      concat("&quot;", concat(replace(normalize-space($text), "&quot;", "&apos;"), "&quot;"))))
+   let $req := 
+      <h:request method="POST" href="http://query.yahooapis.com/v1/public/yql";>
+         <h:header name="Connection" value="keep-alive"/>
+         <h:body media-type="application/x-www-form-urlencoded">
+         {$uri}
+         </h:body>
+      </h:request>
+    let $response := http:send-request($req, (), ())
+    return if ($response[1]/@status = 200)
+    	then $response[2]
+    	else ()
+};

=== added file 'src/info-extraction-module.xsd'
--- src/info-extraction-module.xsd	1970-01-01 00:00:00 +0000
+++ src/info-extraction-module.xsd	2013-06-24 09:52:30 +0000
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"; 
+	xmlns="http://www.zorba-xquery.com/modules/info-extraction";
+     	targetNamespace="http://www.zorba-xquery.com/modules/info-extraction";
+    	elementFormDefault="qualified">
+	
+	<xs:element name="wikipedia_url" type="xs:string"/>
+	<xs:element name="category" type="xs:string"/>
+	
+	<xs:element name="entity">
+		<xs:complexType mixed="true">
+			<xs:sequence>
+				<xs:element name="type" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
+			</xs:sequence>
+			<xs:attribute name="start" type="xs:integer" use="required"/>
+			<xs:attribute name="end" type="xs:integer" use="required"/>
+			<xs:attribute name="type" type="xs:string"/>
+		</xs:complexType>
+	</xs:element>
+		
+	<xs:element name="concept">
+		<xs:complexType mixed="true">
+			<xs:sequence>
+				<xs:element ref="entity" minOccurs="0" maxOccurs="1"/>
+				<xs:element ref="wikipedia_url" minOccurs="0" maxOccurs="1"/>
+			</xs:sequence>
+		    <xs:attribute name="start" type="xs:integer"/>
+			<xs:attribute name="end" type="xs:integer"/>
+			<xs:attribute name="url" type="xs:string"/>
+		</xs:complexType>
+	</xs:element>
+	
+	<xs:element name="relation">
+		<xs:complexType>
+			<xs:sequence>
+				<xs:element ref="entity" minOccurs="1" maxOccurs="1"/>
+				<xs:element ref="wikipedia_url" minOccurs="1" maxOccurs="unbounded"/>
+			</xs:sequence>
+		</xs:complexType>
+	</xs:element>
+	
+</xs:schema>

=== added directory 'test'
=== renamed directory 'test' => 'test.moved'
=== added directory 'test/ExpQueryResults'
=== added file 'test/ExpQueryResults/categories.xml.res'
--- test/ExpQueryResults/categories.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/categories.xml.res	2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true
\ No newline at end of file

=== added file 'test/ExpQueryResults/concepts-inline.xml.res'
--- test/ExpQueryResults/concepts-inline.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/concepts-inline.xml.res	2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true

=== added file 'test/ExpQueryResults/concepts.xml.res'
--- test/ExpQueryResults/concepts.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/concepts.xml.res	2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true
\ No newline at end of file

=== added file 'test/ExpQueryResults/entities-inline.xml.res'
--- test/ExpQueryResults/entities-inline.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/entities-inline.xml.res	2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true

=== added file 'test/ExpQueryResults/entities.xml.res'
--- test/ExpQueryResults/entities.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/entities.xml.res	2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true
\ No newline at end of file

=== added file 'test/ExpQueryResults/relations.xml.res'
--- test/ExpQueryResults/relations.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/relations.xml.res	2013-06-24 09:52:30 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+true
\ No newline at end of file

=== added directory 'test/Queries'
=== added file 'test/Queries/categories.xq'
--- test/Queries/categories.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/categories.xq	2013-06-24 09:52:30 +0000
@@ -0,0 +1,17 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:categories("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $c in $result return validate { $c } ) > 0
+    
+(:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:categories function should return the following list of results for the input provided in the example -->
+<ex:category xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction";>Politics &amp; Government</ex:category>
+<ex:category xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction";>Budget, Tax &amp; Economy</ex:category>
+<ex:category xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction";>Government</ex:category>
+
+:)

=== added file 'test/Queries/concepts-inline.xq'
--- test/Queries/concepts-inline.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/concepts-inline.xq	2013-06-24 09:52:30 +0000
@@ -0,0 +1,15 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:concepts-inline("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $c in <aux>{$result}</aux>/ex:concept return validate { $c } ) > 0
+
+(:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:concepts-inline function should return the following list of results for the input provided in the example -->
+President Obama called Wednesday on <concept start="36" end="43" url="http://en.wikipedia.com/wiki/United_States_Congress";>Congress</concept> to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.
+
+:)

=== added file 'test/Queries/concepts.xq'
--- test/Queries/concepts.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/concepts.xq	2013-06-24 09:52:30 +0000
@@ -0,0 +1,23 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:concepts("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $c in $result return validate { $c } ) > 0
+    
+(:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:concepts function should return the following list of results for the input provided in the example -->
+<ex:concept xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction";>
+  <ex:entity start="0" end="14">President Obama</ex:entity>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/Barack_Obama</ex:wikipedia_url>
+</ex:concept>
+<ex:concept xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction";>
+  <ex:entity start="36" end="43">
+    <ex:type>organization</ex:type>Congress</ex:entity>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/United_States_Congress</ex:wikipedia_url>
+</ex:concept>
+
+:)

=== added file 'test/Queries/entities-inline.xq'
--- test/Queries/entities-inline.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/entities-inline.xq	2013-06-24 09:52:30 +0000
@@ -0,0 +1,15 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:entities-inline("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $c in <aux>{$result}</aux>/ex:entity return validate { $c } ) > 0
+
+(:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:entities-inline function should return the following list of results for the input provided in the example -->
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction"; start="0" end="14">President Obama</ex:entity> called Wednesday on <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction"; start="36" end="43" type="organization">Congress</ex:entity> to extend a <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction"; start="57" end="65">tax break</ex:entity> for students included in last year's <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction"; start="104" end="128">economic stimulus package</ex:entity>, arguing that the policy provides more <ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction"; start="169" end="187">generous assistance</ex:entity>.
+
+:)

=== added file 'test/Queries/entities.xq'
--- test/Queries/entities.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/entities.xq	2013-06-24 09:52:30 +0000
@@ -0,0 +1,20 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:entities("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $e in $result return validate { $e } ) > 0
+    
+(: 
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:entities function should return the following list of results for the input provided in the example -->
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction"; start="0" end="14">President Obama</ex:entity>
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction"; start="36" end="43">
+  <ex:type>organization</ex:type>Congress</ex:entity>
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction"; start="57" end="65">tax break</ex:entity>
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction"; start="104" end="128">economic stimulus package</ex:entity>
+<ex:entity xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction"; start="169" end="187">generous assistance</ex:entity>
+
+:)

=== added file 'test/Queries/relations.xq'
--- test/Queries/relations.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/relations.xq	2013-06-24 09:52:30 +0000
@@ -0,0 +1,31 @@
+import module namespace ex = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+import schema namespace schema = 'http://www.zorba-xquery.com/modules/info-extraction';
+
+let $result := ex:relations("President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance.")
+
+return count ( for $r in $result return validate { $r } ) > 0
+    
+(:
+
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- The ex:relations function should return the following list of results for the input provided in the example -->
+<ex:relation xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction";>
+  <ex:entity start="0" end="14">President Obama</ex:entity>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/Gabrielle_Giffords</ex:wikipedia_url>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/2011_Tucson_shooting</ex:wikipedia_url>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/Fray</ex:wikipedia_url>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/White_House</ex:wikipedia_url>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/Recall_%28memory%29</ex:wikipedia_url>
+</ex:relation>
+<ex:relation xmlns:ex="http://www.zorba-xquery.com/modules/info-extraction";>
+  <ex:entity start="36" end="43">
+    <ex:type>organization</ex:type>Congress</ex:entity>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/Republican_Party_%28United_States%29</ex:wikipedia_url>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/Barack_Obama</ex:wikipedia_url>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/Roger_Clemens</ex:wikipedia_url>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/Democratic_Party_%28United_States%29</ex:wikipedia_url>
+  <ex:wikipedia_url>http://en.wikipedia.com/wiki/Growth_hormone</ex:wikipedia_url>
+</ex:relation>
+
+:)

Follow ups

Re: [Merge] lp:~zorba-coders/zorba/info-extraction-yahoo-broken into lp:zorba
From: Chris Hillery, 2013-06-24