zeitgeist team mailing list archive
-
zeitgeist team
-
Mailing list archive
-
Message #00977
[Merge] lp:~kamstrup/zeitgeist/query-expansion into lp:zeitgeist
Mikkel Kamstrup Erlandsen has proposed merging lp:~kamstrup/zeitgeist/query-expansion into lp:zeitgeist.
Requested reviews:
Zeitgeist Framework Team (zeitgeist)
Huzzah! Smackeroo! I have query expansion fully working now all unit tests passing. Both on the SQL level and on our template matching level.
So what does "query expansion" mean. Consider a query for subject with interp. nfo:Media. Right that would only match stuff that has been explicitly identified as nfo:Media (which is not much since we usually can identify whther stuff is Audio, Image, or Video data).
With query expansion we'll also match any children of nfo:Media. Ie also nfo:Image, nfo:Audio, and nfo:Video. Also recursively matching children of these like nfo:RasterImage and nfo:Vector image.
The way it's implemented is really simple. We simply expand the tree of children and compile a big OR query with everything.
--
https://code.launchpad.net/~kamstrup/zeitgeist/query-expansion/+merge/25000
Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~kamstrup/zeitgeist/query-expansion into lp:zeitgeist.
=== modified file '_zeitgeist/engine/main.py'
--- _zeitgeist/engine/main.py 2010-05-01 22:18:55 +0000
+++ _zeitgeist/engine/main.py 2010-05-10 14:47:20 +0000
@@ -32,7 +32,7 @@
from collections import defaultdict
from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \
- ResultType, get_timestamp_for_now, Interpretation
+ ResultType, get_timestamp_for_now, Interpretation, Symbol
from _zeitgeist.engine.datamodel import Event, Subject
from _zeitgeist.engine.extension import ExtensionsCollection, load_class
from _zeitgeist.engine import constants
@@ -163,16 +163,51 @@
for (event_template, subject_template) in self._build_templates(templates):
subwhere = WhereClause(WhereClause.AND)
try:
- for key in ("interpretation", "manifestation", "actor"):
- value = getattr(event_template, key)
- if value:
- subwhere.add("%s = ?" % key,
- getattr(self, "_" + key).id(value))
- for key in ("interpretation", "manifestation", "mimetype"):
- value = getattr(subject_template, key)
- if value:
- subwhere.add("subj_%s = ?" % key,
- getattr(self, "_" + key).id(value))
+ # Expand event interpretation children
+ event_interp_where = WhereClause(WhereClause.OR)
+ for child_interp in (Symbol.find_child_uris_extended(event_template.interpretation)):
+ if child_interp:
+ event_interp_where.add("interpretation = ?",
+ self._interpretation.id(child_interp))
+ if event_interp_where:
+ subwhere.extend(event_interp_where)
+
+ # Expand event manifestation children
+ event_manif_where = WhereClause(WhereClause.OR)
+ for child_manif in (Symbol.find_child_uris_extended(event_template.manifestation)):
+ if child_manif:
+ event_manif_where.add("manifestation = ?",
+ self._manifestation.id(child_manif))
+ if event_manif_where:
+ subwhere.extend(event_manif_where)
+
+ # Expand subject interpretation children
+ su_interp_where = WhereClause(WhereClause.OR)
+ for child_interp in (Symbol.find_child_uris_extended(subject_template.interpretation)):
+ if child_interp:
+ su_interp_where.add("subj_interpretation = ?",
+ self._interpretation.id(child_interp))
+ if su_interp_where:
+ subwhere.extend(su_interp_where)
+
+ # Expand subject manifestation children
+ su_manif_where = WhereClause(WhereClause.OR)
+ for child_manif in (Symbol.find_child_uris_extended(subject_template.manifestation)):
+ if child_manif:
+ su_manif_where.add("subj_manifestation = ?",
+ self._manifestation.id(child_manif))
+ if su_manif_where:
+ subwhere.extend(su_manif_where)
+
+ # FIXME: Expand mime children as well.
+ # Right now we only do exact matching for mimetypes
+ if subject_template.mimetype:
+ subwhere.add("subj_mimetype = ?",
+ self._mimetype.id(subject_tempalte.mimetype))
+
+ if event_template.actor:
+ subwhere.add("actor = ?",
+ self._actor.id(event_template.actor))
except KeyError:
# Value not in DB
where_or.register_no_result()
@@ -183,6 +218,7 @@
subwhere.add("subj_%s = ?" % key, value)
where_or.extend(subwhere)
+ print "SQL: ", where_or.sql, where_or.arguments
return where_or
def _build_sql_event_filter(self, time_range, templates, storage_state):
=== modified file 'test/datamodel-test.py'
--- test/datamodel-test.py 2010-04-26 19:42:07 +0000
+++ test/datamodel-test.py 2010-05-10 14:47:20 +0000
@@ -51,6 +51,47 @@
self.assertTrue(f.display_name != None)
self.assertTrue(f.doc != None)
+class RelationshipTest (unittest.TestCase):
+ """
+ Tests for parent/child relationships in the loaded ontologies
+ """
+
+ def testDirectParents (self):
+ """
+ Tests relationship tracking for immediate parents
+ """
+ self.assertTrue(Interpretation.AUDIO.is_a(Interpretation.MEDIA))
+
+ def testSecondLevelParents (self):
+ """
+ Tests relationship tracking for second level parents
+ """
+ self.assertTrue(Interpretation.VECTOR_IMAGE.is_a(Interpretation.MEDIA))
+ self.assertTrue(Interpretation.VECTOR_IMAGE.is_a(Interpretation.IMAGE))
+
+ def testRootParents (self):
+ """
+ Tests relationship tracking for root nodes, ie Interpretation
+ and Manifestation
+ """
+ self.assertTrue(Interpretation.VECTOR_IMAGE.is_a(Interpretation))
+ self.assertTrue(Manifestation.FILE_DATA_OBJECT.is_a(Manifestation))
+ self.assertTrue(Manifestation.USER_ACTIVITY.is_a(Manifestation))
+
+ def testReflecsive (self):
+ """
+ Assert that a symbol is a child of itself
+ """
+ self.assertTrue(Manifestation.USER_ACTIVITY.is_a(Manifestation.USER_ACTIVITY))
+
+ def testFindExtendedChildren (self):
+ self.assertEquals(["foo://bar"], Symbol.find_child_uris_extended("foo://bar"))
+ self.assertEquals(["http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Icon",
+ "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#VectorImage",
+ "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Cursor",
+ "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#RasterImage",
+ "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Image"],
+ Symbol.find_child_uris_extended("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Image"))
class EventTest (unittest.TestCase):
def setUp(self):
@@ -116,6 +157,17 @@
e.manifestation="ILLEGAL SNAFU"
self.assertFalse(e.matches_template(template))
+ def testTemplateParentMatching(self):
+ template = Event.new_for_values(
+ manifestation=Manifestation.EVENT_MANIFESTATION,
+ subject_interpretation=Interpretation)
+
+ e = Event.new_for_values(
+ manifestation=Manifestation.USER_ACTIVITY,
+ subject_interpretation=Interpretation.TEXT_DOCUMENT,
+ subject_text="Foo")
+ self.assertTrue(e.matches_template(template))
+
def testTemplateFiltering(self):
template = Event.new_for_values(interpretation="stfu:OpenEvent")
events = parse_events("test/data/five_events.js")
=== added file 'test/test-sql.py'
--- test/test-sql.py 1970-01-01 00:00:00 +0000
+++ test/test-sql.py 2010-05-10 14:47:20 +0000
@@ -0,0 +1,51 @@
+#! /usr/bin/python
+# -.- coding: utf-8 -.-
+
+# Zeitgeist
+#
+# Copyright © 2010 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@xxxxxxxxx>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import sys, os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+import unittest
+from _zeitgeist.engine.sql import *
+
+class SQLTest (unittest.TestCase):
+
+ def testFlat (self):
+ where = WhereClause(WhereClause.AND)
+ where.add ("foo = %s", 10)
+ where.add ("bar = %s", 27)
+ self.assertEquals(where.sql % tuple(where.arguments),
+ "(foo = 10 AND bar = 27)")
+
+ def testNested (self):
+ where = WhereClause(WhereClause.AND)
+ where.add ("foo = %s", 10)
+
+ subwhere = WhereClause(WhereClause.OR)
+ subwhere.add ("subfoo = %s", 68)
+ subwhere.add ("subbar = %s", 69)
+ where.extend(subwhere)
+ where.add ("bar = %s", 11)
+
+ self.assertEquals(where.sql % tuple(where.arguments),
+ "(foo = 10 AND (subfoo = 68 OR subbar = 69) AND bar = 11)")
+
+if __name__ == "__main__":
+ unittest.main()
=== modified file 'zeitgeist/datamodel.py'
--- zeitgeist/datamodel.py 2010-04-29 08:28:44 +0000
+++ zeitgeist/datamodel.py 2010-05-10 14:47:20 +0000
@@ -185,6 +185,22 @@
dikt[self.name] = self
for child in self._children.itervalues():
child._visit(dikt)
+
+ @staticmethod
+ def find_child_uris_extended (uri):
+ """
+ Creates a list of all known child URIs of `uri`, including
+ `uri` itself in the list. Hence the "extended". If `uri`
+ is unknown a list containing only `uri` is returned.
+ """
+ try:
+ symbol = _SYMBOLS_BY_URI[uri]
+ children = [child.uri for child in symbol.get_all_children()]
+ children.append(uri)
+ return children
+ except KeyError, e:
+ return [uri]
+
@property
def uri(self):
@@ -236,7 +252,51 @@
Returns a list of immediate parent symbols
"""
return frozenset(self._parents.itervalues())
-
+
+ def is_a (self, parent):
+ """
+ Returns True if this symbol is a child of `parent`.
+ """
+ if not isinstance (parent, Symbol):
+ try:
+ parent = _SYMBOLS_BY_URI[parent]
+ except KeyError, e:
+ # Parent is not a known URI
+ print 11111111111, self.uri, parent
+ return self.uri == parent
+
+ # Invariant: parent is a Symbol
+ if self.uri == parent.uri : return True
+
+ parent._ensure_all_children()
+
+ # FIXME: We should really check that child.uri is in there,
+ # but that is not fast with the current code layout
+ return self.name in parent._all_children
+
+ @staticmethod
+ def uri_is_a (child, parent):
+ """
+ Returns True if `child` is a child of `parent`. Both `child`
+ and `parent` arguments must be any combination of
+ :class:`Symbol` and/or string.
+ """
+ if isinstance (child, basestring):
+ try:
+ child = _SYMBOLS_BY_URI[child]
+ except KeyError, e:
+ # Child is not a know URI
+ if isinstance (parent, basestring):
+ return child == parent
+ elif isinstance (parent, Symbol):
+ return child == parent.uri
+ else:
+ return False
+
+ if not isinstance (child, Symbol):
+ raise ValueError("Child argument must be a Symbol or string. Got %s" % type(child))
+
+ return child.is_a(parent)
class TimeRange(list):
"""
@@ -463,11 +523,13 @@
"""
Return True if this Subject matches *subject_template*. Empty
fields in the template are treated as wildcards.
+ Interpretations and manifestations are also matched if they are
+ children of the types specified in `subject_template`.
See also :meth:`Event.matches_template`
"""
for m in Subject.Fields:
- if subject_template[m] and subject_template[m] != self[m] :
+ if subject_template[m] and not Symbol.uri_is_a (self[m], subject_template[m]):
return False
return True
@@ -693,7 +755,9 @@
"""
Return True if this event matches *event_template*. The
matching is done where unset fields in the template is
- interpreted as wild cards. If the template has more than one
+ interpreted as wild cards. Interpretations and manifestations
+ are also matched if they are children of the types specified
+ in `event_template`. If the template has more than one
subject, this event matches if at least one of the subjects
on this event matches any single one of the subjects on the
template.
@@ -707,7 +771,7 @@
tdata = event_template[0]
for m in Event.Fields:
if m == Event.Timestamp : continue
- if tdata[m] and tdata[m] != data[m] : return False
+ if tdata[m] and not Symbol.uri_is_a (data[m], tdata[m]) : return False
# If template has no subjects we have a match
if len(event_template[1]) == 0 : return True
Follow ups