zeitgeist team mailing list archive
-
zeitgeist team
-
Mailing list archive
-
Message #00989
Re: [Merge] lp:~kamstrup/zeitgeist/query-expansion into lp:zeitgeist
Hey Mikkel,
thanks you for your works, it is working fine for me.
Feel free to merge this branch into lp:zeitgeist once you thought about my three comments ;)
Markus
> === modified file '_zeitgeist/engine/main.py'
> --- _zeitgeist/engine/main.py 2010-05-03 16:32:00 +0000
> +++ _zeitgeist/engine/main.py 2010-05-12 19:32:33 +0000
> @@ -32,7 +32,7 @@
> from collections import defaultdict
>
> from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \
> - ResultType, get_timestamp_for_now, Interpretation
> + ResultType, get_timestamp_for_now, Interpretation, Symbol
> from _zeitgeist.engine.datamodel import Event, Subject
> from _zeitgeist.engine.extension import ExtensionsCollection, load_class
> from _zeitgeist.engine import constants
> @@ -163,16 +163,51 @@
> for (event_template, subject_template) in self._build_templates(templates):
> subwhere = WhereClause(WhereClause.AND)
> try:
> - for key in ("interpretation", "manifestation", "actor"):
> - value = getattr(event_template, key)
> - if value:
> - subwhere.add("%s = ?" % key,
> - getattr(self, "_" + key).id(value))
> - for key in ("interpretation", "manifestation", "mimetype"):
> - value = getattr(subject_template, key)
> - if value:
> - subwhere.add("subj_%s = ?" % key,
> - getattr(self, "_" + key).id(value))
> + # Expand event interpretation children
> + event_interp_where = WhereClause(WhereClause.OR)
> + for child_interp in
> (Symbol.find_child_uris_extended(event_template.interpretation)):
> + if child_interp:
> + event_interp_where.add("interpretation = ?",
> + self._interpretation.id(child_interp))
> + if event_interp_where:
> + subwhere.extend(event_interp_where)
> +
> + # Expand event manifestation children
> + event_manif_where = WhereClause(WhereClause.OR)
> + for child_manif in
> (Symbol.find_child_uris_extended(event_template.manifestation)):
> + if child_manif:
> + event_manif_where.add("manifestation = ?",
> + self._manifestation.id(child_manif))
> + if event_manif_where:
> + subwhere.extend(event_manif_where)
> +
> + # Expand subject interpretation children
> + su_interp_where = WhereClause(WhereClause.OR)
> + for child_interp in
> (Symbol.find_child_uris_extended(subject_template.interpretation)):
> + if child_interp:
> + su_interp_where.add("subj_interpretation = ?",
> + self._interpretation.id(child_interp))
> + if su_interp_where:
> + subwhere.extend(su_interp_where)
> +
> + # Expand subject manifestation children
> + su_manif_where = WhereClause(WhereClause.OR)
> + for child_manif in
> (Symbol.find_child_uris_extended(subject_template.manifestation)):
> + if child_manif:
> + su_manif_where.add("subj_manifestation = ?",
> + self._manifestation.id(child_manif))
> + if su_manif_where:
> + subwhere.extend(su_manif_where)
> +
> + # FIXME: Expand mime children as well.
> + # Right now we only do exact matching for mimetypes
> + if subject_template.mimetype:
> + subwhere.add("subj_mimetype = ?",
> + self._mimetype.id(subject_tempalte.mimetype))
> +
> + if event_template.actor:
> + subwhere.add("actor = ?",
> + self._actor.id(event_template.actor))
> except KeyError:
> # Value not in DB
> where_or.register_no_result()
> @@ -183,6 +218,7 @@
> subwhere.add("subj_%s = ?" % key, value)
> where_or.extend(subwhere)
>
> + print "SQL: ", where_or.sql, where_or.arguments
Do you still need this output, if so, I think this should be converted into
logging.debug()
> return where_or
>
> def _build_sql_event_filter(self, time_range, templates, storage_state):
>
> === modified file 'test/datamodel-test.py'
> --- test/datamodel-test.py 2010-04-28 19:14:21 +0000
> +++ test/datamodel-test.py 2010-05-12 19:32:33 +0000
> @@ -51,6 +51,47 @@
> self.assertTrue(f.display_name != None)
> self.assertTrue(f.doc != None)
>
> +class RelationshipTest (unittest.TestCase):
> + """
> + Tests for parent/child relationships in the loaded ontologies
> + """
> +
> + def testDirectParents (self):
> + """
> + Tests relationship tracking for immediate parents
> + """
> + self.assertTrue(Interpretation.AUDIO.is_a(Interpretation.MEDIA))
> +
> + def testSecondLevelParents (self):
> + """
> + Tests relationship tracking for second level parents
> + """
> + self.assertTrue(Interpretation.VECTOR_IMAGE.is_a(Interpretation.MEDIA))
> + self.assertTrue(Interpretation.VECTOR_IMAGE.is_a(Interpretation.IMAGE))
> +
> + def testRootParents (self):
> + """
> + Tests relationship tracking for root nodes, ie Interpretation
> + and Manifestation
> + """
> + self.assertTrue(Interpretation.VECTOR_IMAGE.is_a(Interpretation))
> + self.assertTrue(Manifestation.FILE_DATA_OBJECT.is_a(Manifestation))
> + self.assertTrue(Manifestation.USER_ACTIVITY.is_a(Manifestation))
> +
> + def testReflecsive (self):
> + """
> + Assert that a symbol is a child of itself
> + """
> + self.assertTrue(Manifestation.USER_ACTIVITY.is_a(Manifestation.USER_ACTIVITY))
> +
> + def testFindExtendedChildren (self):
> + self.assertEquals(["foo://bar"],
> Symbol.find_child_uris_extended("foo://bar"))
> + self.assertEquals(["http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Icon",
> +
> "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#VectorImage",
> +
> "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Cursor",
> +
> "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#RasterImage",
> +
> "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Image"],
> +
> Symbol.find_child_uris_extended("http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Image"))
>
> class EventTest (unittest.TestCase):
> def setUp(self):
> @@ -116,6 +157,17 @@
> e.manifestation="ILLEGAL SNAFU"
> self.assertFalse(e.matches_template(template))
>
> + def testTemplateParentMatching(self):
> + template = Event.new_for_values(
> + manifestation=Manifestation.EVENT_MANIFESTATION,
> + subject_interpretation=Interpretation)
> +
> + e = Event.new_for_values(
> + manifestation=Manifestation.USER_ACTIVITY,
> + subject_interpretation=Interpretation.TEXT_DOCUMENT,
> + subject_text="Foo")
> + self.assertTrue(e.matches_template(template))
> +
> def testTemplateFiltering(self):
> template = Event.new_for_values(interpretation="stfu:OpenEvent")
> events = parse_events("test/data/five_events.js")
>
> === added file 'test/test-sql.py'
> --- test/test-sql.py 1970-01-01 00:00:00 +0000
> +++ test/test-sql.py 2010-05-12 19:32:33 +0000
> @@ -0,0 +1,51 @@
> +#! /usr/bin/python
> +# -.- coding: utf-8 -.-
> +
> +# Zeitgeist
> +#
> +# Copyright © 2010 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@xxxxxxxxx>
> +#
> +# This program is free software: you can redistribute it and/or modify
> +# it under the terms of the GNU Lesser General Public License as published by
> +# the Free Software Foundation, either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public License
> +# along with this program. If not, see <http://www.gnu.org/licenses/>.
> +#
> +
> +import sys, os
> +sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
> +
> +import unittest
> +from _zeitgeist.engine.sql import *
> +
> +class SQLTest (unittest.TestCase):
> +
> + def testFlat (self):
> + where = WhereClause(WhereClause.AND)
> + where.add ("foo = %s", 10)
> + where.add ("bar = %s", 27)
> + self.assertEquals(where.sql % tuple(where.arguments),
> + "(foo = 10 AND bar = 27)")
> +
> + def testNested (self):
> + where = WhereClause(WhereClause.AND)
> + where.add ("foo = %s", 10)
> +
> + subwhere = WhereClause(WhereClause.OR)
> + subwhere.add ("subfoo = %s", 68)
> + subwhere.add ("subbar = %s", 69)
> + where.extend(subwhere)
> + where.add ("bar = %s", 11)
> +
> + self.assertEquals(where.sql % tuple(where.arguments),
> + "(foo = 10 AND (subfoo = 68 OR subbar = 69) AND bar = 11)")
> +
> +if __name__ == "__main__":
> + unittest.main()
>
> === modified file 'zeitgeist/datamodel.py'
> --- zeitgeist/datamodel.py 2010-05-03 16:32:00 +0000
> +++ zeitgeist/datamodel.py 2010-05-12 19:32:33 +0000
> @@ -185,6 +185,22 @@
> dikt[self.name] = self
> for child in self._children.itervalues():
> child._visit(dikt)
> +
> + @staticmethod
> + def find_child_uris_extended (uri):
> + """
> + Creates a list of all known child URIs of `uri`, including
> + `uri` itself in the list. Hence the "extended". If `uri`
> + is unknown a list containing only `uri` is returned.
> + """
> + try:
> + symbol = _SYMBOLS_BY_URI[uri]
> + children = [child.uri for child in symbol.get_all_children()]
> + children.append(uri)
> + return children
> + except KeyError, e:
> + return [uri]
> +
For me it would make much more sense if this method returns the actual symbols
instead of their uri. We should use the symbols as 'long' as possible to not
loose information. SQLite for example should be able to handle our symbols, as
they seem to be just strings.
> @property
> def uri(self):
> @@ -236,7 +252,51 @@
> Returns a list of immediate parent symbols
> """
> return frozenset(self._parents.itervalues())
> -
> +
> + def is_a (self, parent):
> + """
> + Returns True if this symbol is a child of `parent`.
> + """
> + if not isinstance (parent, Symbol):
> + try:
> + parent = _SYMBOLS_BY_URI[parent]
> + except KeyError, e:
> + # Parent is not a known URI
> + print 11111111111, self.uri, parent
> + return self.uri == parent
> +
> + # Invariant: parent is a Symbol
> + if self.uri == parent.uri : return True
> +
> + parent._ensure_all_children()
> +
> + # FIXME: We should really check that child.uri is in there,
> + # but that is not fast with the current code layout
> + return self.name in parent._all_children
> +
> + @staticmethod
> + def uri_is_a (child, parent):
> + """
> + Returns True if `child` is a child of `parent`. Both `child`
> + and `parent` arguments must be any combination of
> + :class:`Symbol` and/or string.
> + """
> + if isinstance (child, basestring):
> + try:
> + child = _SYMBOLS_BY_URI[child]
> + except KeyError, e:
> + # Child is not a know URI
> + if isinstance (parent, basestring):
> + return child == parent
> + elif isinstance (parent, Symbol):
> + return child == parent.uri
> + else:
> + return False
> +
> + if not isinstance (child, Symbol):
> + raise ValueError("Child argument must be a Symbol or string. Got
> %s" % type(child))
> +
> + return child.is_a(parent)
I don't like the method names 'uri_is_a' and 'is_a', what about 'is_child_of'
or something event more meaningful?
> class TimeRange(list):
> """
> @@ -463,11 +523,13 @@
> """
> Return True if this Subject matches *subject_template*. Empty
> fields in the template are treated as wildcards.
> + Interpretations and manifestations are also matched if they are
> + children of the types specified in `subject_template`.
>
> See also :meth:`Event.matches_template`
> """
> for m in Subject.Fields:
> - if subject_template[m] and subject_template[m] != self[m] :
> + if subject_template[m] and not Symbol.uri_is_a (self[m],
> subject_template[m]):
> return False
> return True
>
> @@ -693,7 +755,9 @@
> """
> Return True if this event matches *event_template*. The
> matching is done where unset fields in the template is
> - interpreted as wild cards. If the template has more than one
> + interpreted as wild cards. Interpretations and manifestations
> + are also matched if they are children of the types specified
> + in `event_template`. If the template has more than one
> subject, this event matches if at least one of the subjects
> on this event matches any single one of the subjects on the
> template.
> @@ -707,7 +771,7 @@
> tdata = event_template[0]
> for m in Event.Fields:
> if m == Event.Timestamp : continue
> - if tdata[m] and tdata[m] != data[m] : return False
> + if tdata[m] and not Symbol.uri_is_a (data[m], tdata[m]) : return False
>
> # If template has no subjects we have a match
> if len(event_template[1]) == 0 : return True
>
--
https://code.launchpad.net/~kamstrup/zeitgeist/query-expansion/+merge/25000
Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~kamstrup/zeitgeist/query-expansion into lp:zeitgeist.
References