zeitgeist team mailing list archive
-
zeitgeist team
-
Mailing list archive
-
Message #01004
[Merge] lp:~thekorn/zeitgeist/negation_support into lp:zeitgeist
Markus Korn has proposed merging lp:~thekorn/zeitgeist/negation_support into lp:zeitgeist.
Requested reviews:
Zeitgeist Framework Team (zeitgeist)
Related bugs:
#485966 Using filters (text match / exclusion / etc)
https://bugs.launchpad.net/bugs/485966
This branch adds negation support as described in [0].
There are now some fields in a template which can be prefixed with the negation operator ("!"), which will result in a NOT condition in the search. Negation support is implemented on SQL level as well as the datamodel level.
Once this branch is landed I will work on the wildcards part of bug 485966.
[0] https://bugs.edge.launchpad.net/zeitgeist/+bug/485966/comments/13
--
https://code.launchpad.net/~thekorn/zeitgeist/negation_support/+merge/25299
Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~thekorn/zeitgeist/negation_support into lp:zeitgeist.
=== modified file '_zeitgeist/engine/main.py'
--- _zeitgeist/engine/main.py 2010-05-13 22:33:21 +0000
+++ _zeitgeist/engine/main.py 2010-05-14 08:09:25 +0000
@@ -32,8 +32,8 @@
from collections import defaultdict
from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \
- ResultType, get_timestamp_for_now, Interpretation, Symbol
-from _zeitgeist.engine.datamodel import Event, Subject
+ ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR
+from _zeitgeist.engine.datamodel import Event, Subject
from _zeitgeist.engine.extension import ExtensionsCollection, load_class
from _zeitgeist.engine import constants
from _zeitgeist.engine.sql import get_default_cursor, unset_cursor, \
@@ -44,6 +44,20 @@
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger("zeitgeist.engine")
+def parse_negation(kind, field, value, parse_negation=True):
+ """checks if value starts with the negation operator,
+ if value starts with the negation operator but the field does
+ not support negation a ValueError is raised.
+ This function returns a (value_without_negation, negation)-tuple
+ """
+ negation = False
+ if parse_negation and value.startswith(NEGATION_OPERATOR):
+ negation = True
+ value = value[len(NEGATION_OPERATOR):]
+ if negation and field not in kind.SUPPORTS_NEGATION:
+ raise ValueError("This field does not support negation")
+ return value, negation
+
class ZeitgeistEngine:
def __init__ (self):
@@ -167,36 +181,40 @@
subwhere.add("id = ?", event_template.id)
try:
+ value, negation = parse_negation(Event, Event.Interpretation, event_template.interpretation)
# Expand event interpretation children
- event_interp_where = WhereClause(WhereClause.OR)
- for child_interp in (Symbol.find_child_uris_extended(event_template.interpretation)):
+ event_interp_where = WhereClause(WhereClause.OR, negation)
+ for child_interp in (Symbol.find_child_uris_extended(value)):
if child_interp:
event_interp_where.add("interpretation = ?",
self._interpretation[child_interp])
if event_interp_where:
subwhere.extend(event_interp_where)
+ value, negation = parse_negation(Event, Event.Manifestation, event_template.manifestation)
# Expand event manifestation children
- event_manif_where = WhereClause(WhereClause.OR)
- for child_manif in (Symbol.find_child_uris_extended(event_template.manifestation)):
+ event_manif_where = WhereClause(WhereClause.OR, negation)
+ for child_manif in (Symbol.find_child_uris_extended(value)):
if child_manif:
event_manif_where.add("manifestation = ?",
self._manifestation[child_manif])
if event_manif_where:
subwhere.extend(event_manif_where)
+ value, negation = parse_negation(Subject, Subject.Interpretation, subject_template.interpretation)
# Expand subject interpretation children
- su_interp_where = WhereClause(WhereClause.OR)
- for child_interp in (Symbol.find_child_uris_extended(subject_template.interpretation)):
+ su_interp_where = WhereClause(WhereClause.OR, negation)
+ for child_interp in (Symbol.find_child_uris_extended(value)):
if child_interp:
su_interp_where.add("subj_interpretation = ?",
self._interpretation[child_interp])
if su_interp_where:
subwhere.extend(su_interp_where)
+ value, negation = parse_negation(Subject, Subject.Manifestation, subject_template.manifestation)
# Expand subject manifestation children
- su_manif_where = WhereClause(WhereClause.OR)
- for child_manif in (Symbol.find_child_uris_extended(subject_template.manifestation)):
+ su_manif_where = WhereClause(WhereClause.OR, negation)
+ for child_manif in (Symbol.find_child_uris_extended(value)):
if child_manif:
su_manif_where.add("subj_manifestation = ?",
self._manifestation[child_manif])
@@ -205,23 +223,37 @@
# FIXME: Expand mime children as well.
# Right now we only do exact matching for mimetypes
- if subject_template.mimetype:
- subwhere.add("subj_mimetype = ?",
- self._mimetype[subject_tempalte.mimetype])
+ # thekorn: this will be fixed when wildcards are supported
+ value, negation = parse_negation(Subject, Subject.Mimetype, subject_template.mimetype)
+ if value:
+ subwhere.add("subj_mimetype %s= ?" %(NEGATION_OPERATOR if negation else ""),
+ self._mimetype[value])
- if event_template.actor:
- subwhere.add("actor = ?",
- self._actor[event_template.actor])
+ value, negation = parse_negation(Event, Event.Actor, event_template.actor)
+ if value:
+ subwhere.add("actor %s= ?" %(NEGATION_OPERATOR if negation else ""),
+ self._actor[value])
except KeyError, e:
# Value not in DB
log.debug("Unknown entity in query: %s" % e)
where_or.register_no_result()
continue
+
for key in ("uri", "origin", "text"):
value = getattr(subject_template, key)
if value:
- subwhere.add("subj_%s = ?" % key, value)
-
+ try:
+ value, negation = parse_negation(Subject, getattr(Subject, key.title()), value)
+ except ValueError:
+ if key == "text":
+ # we do not support negation of the text field,
+ # the text field starts with the NEGATION_OPERATOR
+ # so we handle this string as the content instead
+ # of an operator
+ negation = False
+ else:
+ raise
+ subwhere.add("subj_%s %s= ?" %(key, NEGATION_OPERATOR if negation else ""), value)
where_or.extend(subwhere)
return where_or
=== modified file '_zeitgeist/engine/sql.py'
--- _zeitgeist/engine/sql.py 2010-04-13 12:56:06 +0000
+++ _zeitgeist/engine/sql.py 2010-05-14 08:09:25 +0000
@@ -346,12 +346,14 @@
AND = " AND "
OR = " OR "
+ NOT = "NOT "
- def __init__(self, relation):
+ def __init__(self, relation, negation=False):
self._conditions = []
self.arguments = []
self._relation = relation
self._no_result_member = False
+ self._negation = negation
def __len__(self):
return len(self._conditions)
@@ -375,7 +377,8 @@
@property
def sql(self):
if self: # Do not return "()" if there are no conditions
- return "(" + self._relation.join(self._conditions) + ")"
+ negation = self.NOT if self._negation else ""
+ return "%s(%s)" %(negation, self._relation.join(self._conditions))
def register_no_result(self):
self._no_result_member = True
=== modified file 'test/datamodel-test.py'
--- test/datamodel-test.py 2010-05-12 20:09:53 +0000
+++ test/datamodel-test.py 2010-05-14 08:09:25 +0000
@@ -179,6 +179,124 @@
self.assertTrue(ev.in_time_range(TimeRange(0, 20)))
self.assertFalse(ev.in_time_range(TimeRange(0, 5)))
self.assertFalse(ev.in_time_range(TimeRange(15, 20)))
+
+ def testNegationTemplateMatching(self):
+ event = Event.new_for_values(
+ subject_interpretation=Interpretation.AUDIO
+ )
+
+ template = Event.new_for_values(
+ subject_interpretation="!%s" %Interpretation.AUDIO
+ )
+ self.assertFalse(event.matches_template(template))
+
+ template = Event.new_for_values(
+ subject_interpretation="!%s" %Interpretation.MEDIA
+ )
+ self.assertFalse(event.matches_template(template))
+
+ template = Event.new_for_values(
+ subject_interpretation="!%s" %Interpretation.DOCUMENT
+ )
+ self.assertTrue(event.matches_template(template))
+
+ template = Event.new_for_values(
+ subject_interpretation="!somerandomtext"
+ )
+ self.assertTrue(event.matches_template(template))
+
+ event = Event.new_for_values(
+ subject_interpretation=Interpretation.MEDIA
+ )
+
+ template = Event.new_for_values(
+ subject_interpretation="!%s" %Interpretation.AUDIO
+ )
+ self.assertTrue(event.matches_template(template))
+
+ def testNegationFields(self):
+ events = parse_events("test/data/five_events.js")
+
+ template = Event.new_for_values(
+ interpretation = "!stfu:OpenEvent"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(3, len(filtered_events))
+
+ template = Event.new_for_values(
+ manifestation = "!stfu:YourActivity"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(4, len(filtered_events))
+
+ template = Event.new_for_values(
+ actor = "!firefox"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(2, len(filtered_events))
+
+ template = Event.new_for_values(
+ subject_uri = "!file:///tmp/foo.txt"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(3, len(filtered_events))
+
+ template = Event.new_for_values(
+ subject_interpretation = "!stfu:Document"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(4, len(filtered_events))
+
+ template = Event.new_for_values(
+ subject_manifestation = "!stfu:File"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(0, len(filtered_events))
+
+ template = Event.new_for_values(
+ subject_origin = "!file:///tmp"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(0, len(filtered_events))
+
+ template = Event.new_for_values(
+ subject_mimetype = "!text/plain"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(0, len(filtered_events))
+
+ # the next two fields do not support negation, '!' is treated as
+ # content
+
+ template = Event.new_for_values(
+ subject_text = "!boo"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(0, len(filtered_events))
+
+ template = Event.new_for_values(
+ subject_storage = "!boo"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(0, len(filtered_events))
+
+ def testNegationCombination(self):
+ events = parse_events("test/data/five_events.js")
+
+ template = Event.new_for_values(
+ interpretation = "!stfu:OpenEvent",
+ actor = "!firefox"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(2, len(filtered_events))
+
+ template = Event.new_for_values(
+ interpretation = "!stfu:OpenEvent",
+ manifestation = "!stfu:YourActivity"
+ )
+ filtered_events = filter(template.matches_event, events)
+ self.assertEquals(3, len(filtered_events))
+
class TimeRangeTest (unittest.TestCase):
=== modified file 'test/engine-test.py'
--- test/engine-test.py 2010-05-14 07:53:47 +0000
+++ test/engine-test.py 2010-05-14 08:09:25 +0000
@@ -641,6 +641,114 @@
self.assertEquals(1, len(ids))
self.assertEquals(_ids, ids)
+ def testNegation(self):
+ import_events("test/data/five_events.js", self.engine)
+
+ template = Event.new_for_values(
+ interpretation = "!stfu:OpenEvent"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(3, len(ids))
+
+ template = Event.new_for_values(
+ manifestation = "!stfu:YourActivity"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(4, len(ids))
+
+ template = Event.new_for_values(
+ actor = "!firefox"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(2, len(ids))
+
+ template = Event.new_for_values(
+ subject_uri = "!file:///tmp/foo.txt"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(3, len(ids))
+
+ template = Event.new_for_values(
+ subject_interpretation = "!stfu:Document"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(4, len(ids))
+
+ template = Event.new_for_values(
+ subject_manifestation = "!stfu:File"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(0, len(ids))
+
+ template = Event.new_for_values(
+ subject_origin = "!file:///tmp"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(0, len(ids))
+
+ template = Event.new_for_values(
+ subject_mimetype = "!text/plain"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(0, len(ids))
+
+ # the next two fields do not support negation, '!' is treated as
+ # content
+
+ template = Event.new_for_values(
+ subject_text = "!boo"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(0, len(ids))
+
+ # searching by subject_storage is not working
+ #~ template = Event.new_for_values(
+ #~ subject_storage = "!boo"
+ #~ )
+ #~ ids = self.engine.find_eventids(TimeRange.always(),
+ #~ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ #~ )
+ #~ self.assertEquals(0, len(ids))
+
+ def testNegationCombination(self):
+ import_events("test/data/five_events.js", self.engine)
+
+ template = Event.new_for_values(
+ interpretation = "!stfu:OpenEvent",
+ actor = "!firefox"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(2, len(ids))
+
+ template = Event.new_for_values(
+ interpretation = "!stfu:OpenEvent",
+ manifestation = "!stfu:YourActivity"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(3, len(ids))
+
def testBug580364(self):
events = [
Event.new_for_values(timestamp=1000, subject_storage="sometext"),
@@ -653,7 +761,6 @@
[template], StorageState.Any, 10, ResultType.MostRecentEvents)
self.assertEquals(0, len(ids_out))
-
if __name__ == "__main__":
unittest.main()
=== modified file 'test/test-sql.py'
--- test/test-sql.py 2010-05-05 21:37:51 +0000
+++ test/test-sql.py 2010-05-14 08:09:25 +0000
@@ -46,6 +46,27 @@
self.assertEquals(where.sql % tuple(where.arguments),
"(foo = 10 AND (subfoo = 68 OR subbar = 69) AND bar = 11)")
+
+ def testFlatNegation(self):
+ where = WhereClause(WhereClause.OR, negation=True)
+ where.add("foo = %s", 7)
+ where.add("bar = %s", 77)
+ self.assertEquals(where.sql %tuple(where.arguments),
+ "NOT (foo = 7 OR bar = 77)")
+
+ def testNestedNegation(self):
+ where = WhereClause(WhereClause.AND)
+ where.add ("foo = %s", 10)
+
+ subwhere = WhereClause(WhereClause.OR, negation=True)
+ subwhere.add ("subfoo = %s", 68)
+ subwhere.add ("subbar = %s", 69)
+ where.extend(subwhere)
+ where.add ("bar = %s", 11)
+
+ self.assertEquals(where.sql % tuple(where.arguments),
+ "(foo = 10 AND NOT (subfoo = 68 OR subbar = 69) AND bar = 11)")
+
if __name__ == "__main__":
unittest.main()
=== modified file 'zeitgeist/datamodel.py'
--- zeitgeist/datamodel.py 2010-05-13 08:23:07 +0000
+++ zeitgeist/datamodel.py 2010-05-14 08:09:25 +0000
@@ -36,8 +36,13 @@
'Event',
'Subject',
'NULL_EVENT',
+ 'NEGATION_OPERATOR',
]
+NEGATION_OPERATOR = "!"
+
+EQUAL = lambda x,y: x == y
+
# next() function is python >= 2.6
try:
next = next
@@ -262,7 +267,7 @@
parent = _SYMBOLS_BY_URI[parent]
except KeyError, e:
# Parent is not a known URI
- print 11111111111, self.uri, parent
+ #print 11111111111, self.uri, parent #debug output
return self.uri == parent
# Invariant: parent is a Symbol
@@ -427,6 +432,8 @@
Mimetype,
Text,
Storage) = range(7)
+
+ SUPPORTS_NEGATION = (Uri, Interpretation, Manifestation, Origin, Mimetype)
def __init__(self, data=None):
super(Subject, self).__init__([""]*len(Subject.Fields))
@@ -534,12 +541,21 @@
continue
if m in (Subject.Interpretation, Subject.Manifestation):
# symbols are treated differently
- if not Symbol.uri_is_child_of (self[m], subject_template[m]):
- return False
+ comp = Symbol.uri_is_child_of
else:
- if subject_template[m] != self[m]:
- return False
+ comp = EQUAL
+ if not self._check_field_match(m, subject_template[m], comp):
+ return False
return True
+
+ def _check_field_match(self, field_id, expression, comp):
+ """ Checks if an expression matches a field given by its `field_id`
+ using a `comp` comparison function """
+ if field_id in self.SUPPORTS_NEGATION \
+ and expression.startswith(NEGATION_OPERATOR):
+ return not self._check_field_match(field_id, expression[len(NEGATION_OPERATOR):], comp)
+ else:
+ return comp(self[field_id], expression)
class Event(list):
"""
@@ -561,6 +577,8 @@
Interpretation,
Manifestation,
Actor) = range(5)
+
+ SUPPORTS_NEGATION = (Interpretation, Manifestation, Actor)
def __init__(self, struct = None):
"""
@@ -784,11 +802,11 @@
continue
if m in (Event.Manifestation, Event.Interpretation):
# special check for symbols
- if not Symbol.uri_is_child_of(data[m], tdata[m]):
- return False
+ comp = Symbol.uri_is_child_of
else:
- if data[m] != tdata[m]:
- return False
+ comp = EQUAL
+ if not self._check_field_match(m, tdata[m], comp):
+ return False
# If template has no subjects we have a match
if len(event_template[1]) == 0 : return True
@@ -802,6 +820,15 @@
# Template has subjects, but we never found a match
return False
+
+ def _check_field_match(self, field_id, expression, comp):
+ """ Checks if an expression matches a field given by its `field_id`
+ using a `comp` comparison function """
+ if field_id in self.SUPPORTS_NEGATION \
+ and expression.startswith(NEGATION_OPERATOR):
+ return not self._check_field_match(field_id, expression[len(NEGATION_OPERATOR):], comp)
+ else:
+ return comp(self[0][field_id], expression)
def matches_event (self, event):
"""
Follow ups