← Back to team overview

zeitgeist team mailing list archive

[Merge] lp:~thekorn/zeitgeist/negation_support into lp:zeitgeist

 

Markus Korn has proposed merging lp:~thekorn/zeitgeist/negation_support into lp:zeitgeist.

Requested reviews:
  Zeitgeist Framework Team (zeitgeist)
Related bugs:
  #485966 Using filters (text match / exclusion / etc)
  https://bugs.launchpad.net/bugs/485966


This branch adds negation support as described in [0].
There are now some fields in a template which can be prefixed with the negation operator ("!"), which will result in a NOT condition in the search. Negation support is implemented on SQL level as well as the datamodel level.
Once this branch is landed I will work on the wildcards part of bug 485966.


[0] https://bugs.edge.launchpad.net/zeitgeist/+bug/485966/comments/13
-- 
https://code.launchpad.net/~thekorn/zeitgeist/negation_support/+merge/25299
Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~thekorn/zeitgeist/negation_support into lp:zeitgeist.
=== modified file '_zeitgeist/engine/main.py'
--- _zeitgeist/engine/main.py	2010-05-13 22:33:21 +0000
+++ _zeitgeist/engine/main.py	2010-05-14 08:09:25 +0000
@@ -32,8 +32,8 @@
 from collections import defaultdict
 
 from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \
-	ResultType, get_timestamp_for_now, Interpretation, Symbol
-from _zeitgeist.engine.datamodel import Event, Subject	
+	ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR
+from _zeitgeist.engine.datamodel import Event, Subject
 from _zeitgeist.engine.extension import ExtensionsCollection, load_class
 from _zeitgeist.engine import constants
 from _zeitgeist.engine.sql import get_default_cursor, unset_cursor, \
@@ -44,6 +44,20 @@
 logging.basicConfig(level=logging.DEBUG)
 log = logging.getLogger("zeitgeist.engine")
 
+def parse_negation(kind, field, value, parse_negation=True):
+	"""checks if value starts with the negation operator,
+	if value starts with the negation operator but the field does
+	not support negation a ValueError is raised.
+	This function returns a (value_without_negation, negation)-tuple
+	"""
+	negation = False
+	if parse_negation and value.startswith(NEGATION_OPERATOR):
+		negation = True
+		value = value[len(NEGATION_OPERATOR):]
+	if negation and field not in kind.SUPPORTS_NEGATION:
+		raise ValueError("This field does not support negation")
+	return value, negation
+
 class ZeitgeistEngine:
 	
 	def __init__ (self):
@@ -167,36 +181,40 @@
 				subwhere.add("id = ?", event_template.id)
 			
 			try:
+				value, negation = parse_negation(Event, Event.Interpretation, event_template.interpretation)
 				# Expand event interpretation children
-				event_interp_where = WhereClause(WhereClause.OR)
-				for child_interp in (Symbol.find_child_uris_extended(event_template.interpretation)):
+				event_interp_where = WhereClause(WhereClause.OR, negation)
+				for child_interp in (Symbol.find_child_uris_extended(value)):
 					if child_interp:
 						event_interp_where.add("interpretation = ?",
 						                       self._interpretation[child_interp])
 				if event_interp_where:
 					subwhere.extend(event_interp_where)
 				
+				value, negation = parse_negation(Event, Event.Manifestation, event_template.manifestation)
 				# Expand event manifestation children
-				event_manif_where = WhereClause(WhereClause.OR)
-				for child_manif in (Symbol.find_child_uris_extended(event_template.manifestation)):
+				event_manif_where = WhereClause(WhereClause.OR, negation)
+				for child_manif in (Symbol.find_child_uris_extended(value)):
 					if child_manif:
 						event_manif_where.add("manifestation = ?",
 						                      self._manifestation[child_manif])
 				if event_manif_where:
 					subwhere.extend(event_manif_where)
 				
+				value, negation = parse_negation(Subject, Subject.Interpretation, subject_template.interpretation)
 				# Expand subject interpretation children
-				su_interp_where = WhereClause(WhereClause.OR)
-				for child_interp in (Symbol.find_child_uris_extended(subject_template.interpretation)):
+				su_interp_where = WhereClause(WhereClause.OR, negation)
+				for child_interp in (Symbol.find_child_uris_extended(value)):
 					if child_interp:
 						su_interp_where.add("subj_interpretation = ?",
 						                    self._interpretation[child_interp])
 				if su_interp_where:
 					subwhere.extend(su_interp_where)
 				
+				value, negation = parse_negation(Subject, Subject.Manifestation, subject_template.manifestation)
 				# Expand subject manifestation children
-				su_manif_where = WhereClause(WhereClause.OR)
-				for child_manif in (Symbol.find_child_uris_extended(subject_template.manifestation)):
+				su_manif_where = WhereClause(WhereClause.OR, negation)
+				for child_manif in (Symbol.find_child_uris_extended(value)):
 					if child_manif:
 						su_manif_where.add("subj_manifestation = ?",
 						                   self._manifestation[child_manif])
@@ -205,23 +223,37 @@
 				
 				# FIXME: Expand mime children as well.
 				# Right now we only do exact matching for mimetypes
-				if subject_template.mimetype:
-					subwhere.add("subj_mimetype = ?",
-					             self._mimetype[subject_tempalte.mimetype])
+				# thekorn: this will be fixed when wildcards are supported
+				value, negation = parse_negation(Subject, Subject.Mimetype, subject_template.mimetype)
+				if value:
+					subwhere.add("subj_mimetype %s= ?" %(NEGATION_OPERATOR if negation else ""),
+					             self._mimetype[value])
 				
-				if event_template.actor:
-					subwhere.add("actor = ?",
-					             self._actor[event_template.actor])
+				value, negation = parse_negation(Event, Event.Actor, event_template.actor)
+				if value:
+					subwhere.add("actor %s= ?" %(NEGATION_OPERATOR if negation else ""),
+					             self._actor[value])
 			except KeyError, e:
 				# Value not in DB
 				log.debug("Unknown entity in query: %s" % e)
 				where_or.register_no_result()
 				continue
+				
 			for key in ("uri", "origin", "text"):
 				value = getattr(subject_template, key)
 				if value:
-					subwhere.add("subj_%s = ?" % key, value)
-			
+					try:
+						value, negation = parse_negation(Subject, getattr(Subject, key.title()), value)
+					except ValueError:
+						if key == "text":
+							# we do not support negation of the text field,
+							# the text field starts with the NEGATION_OPERATOR
+							# so we handle this string as the content instead
+							# of an operator
+							negation = False
+						else:
+							raise
+					subwhere.add("subj_%s %s= ?" %(key, NEGATION_OPERATOR if negation else ""), value)
 			where_or.extend(subwhere)
 		
 		return where_or

=== modified file '_zeitgeist/engine/sql.py'
--- _zeitgeist/engine/sql.py	2010-04-13 12:56:06 +0000
+++ _zeitgeist/engine/sql.py	2010-05-14 08:09:25 +0000
@@ -346,12 +346,14 @@
 	
 	AND = " AND "
 	OR = " OR "
+	NOT = "NOT "
 	
-	def __init__(self, relation):
+	def __init__(self, relation, negation=False):
 		self._conditions = []
 		self.arguments = []
 		self._relation = relation
 		self._no_result_member = False
+		self._negation = negation
 	
 	def __len__(self):
 		return len(self._conditions)
@@ -375,7 +377,8 @@
 	@property
 	def sql(self):
 		if self: # Do not return "()" if there are no conditions
-			return "(" + self._relation.join(self._conditions) + ")"
+			negation = self.NOT if self._negation else ""
+			return "%s(%s)" %(negation, self._relation.join(self._conditions))
 	
 	def register_no_result(self):
 		self._no_result_member = True

=== modified file 'test/datamodel-test.py'
--- test/datamodel-test.py	2010-05-12 20:09:53 +0000
+++ test/datamodel-test.py	2010-05-14 08:09:25 +0000
@@ -179,6 +179,124 @@
 		self.assertTrue(ev.in_time_range(TimeRange(0, 20)))
 		self.assertFalse(ev.in_time_range(TimeRange(0, 5)))
 		self.assertFalse(ev.in_time_range(TimeRange(15, 20)))
+		
+	def testNegationTemplateMatching(self):
+		event = Event.new_for_values(
+			subject_interpretation=Interpretation.AUDIO
+		)
+		
+		template = Event.new_for_values(
+			subject_interpretation="!%s" %Interpretation.AUDIO
+		)
+		self.assertFalse(event.matches_template(template))
+		
+		template = Event.new_for_values(
+			subject_interpretation="!%s" %Interpretation.MEDIA
+		)
+		self.assertFalse(event.matches_template(template))
+		
+		template = Event.new_for_values(
+			subject_interpretation="!%s" %Interpretation.DOCUMENT
+		)
+		self.assertTrue(event.matches_template(template))
+		
+		template = Event.new_for_values(
+			subject_interpretation="!somerandomtext"
+		)
+		self.assertTrue(event.matches_template(template))
+		
+		event = Event.new_for_values(
+			subject_interpretation=Interpretation.MEDIA
+		)
+		
+		template = Event.new_for_values(
+			subject_interpretation="!%s" %Interpretation.AUDIO
+		)
+		self.assertTrue(event.matches_template(template))
+		
+	def testNegationFields(self):
+		events = parse_events("test/data/five_events.js")
+		
+		template = Event.new_for_values(
+			interpretation = "!stfu:OpenEvent"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(3, len(filtered_events))
+		
+		template = Event.new_for_values(
+			manifestation = "!stfu:YourActivity"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(4, len(filtered_events))
+		
+		template = Event.new_for_values(
+			actor = "!firefox"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(2, len(filtered_events))
+		
+		template = Event.new_for_values(
+			subject_uri = "!file:///tmp/foo.txt"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(3, len(filtered_events))
+		
+		template = Event.new_for_values(
+			subject_interpretation = "!stfu:Document"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(4, len(filtered_events))
+		
+		template = Event.new_for_values(
+			subject_manifestation = "!stfu:File"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(0, len(filtered_events))
+		
+		template = Event.new_for_values(
+			subject_origin = "!file:///tmp"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(0, len(filtered_events))
+		
+		template = Event.new_for_values(
+			subject_mimetype = "!text/plain"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(0, len(filtered_events))
+		
+		# the next two fields do not support negation, '!' is treated as
+		# content
+		
+		template = Event.new_for_values(
+			subject_text = "!boo"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(0, len(filtered_events))
+		
+		template = Event.new_for_values(
+			subject_storage = "!boo"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(0, len(filtered_events))
+		
+	def testNegationCombination(self):
+		events = parse_events("test/data/five_events.js")
+		
+		template = Event.new_for_values(
+			interpretation = "!stfu:OpenEvent",
+			actor = "!firefox"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(2, len(filtered_events))
+		
+		template = Event.new_for_values(
+			interpretation = "!stfu:OpenEvent",
+			manifestation = "!stfu:YourActivity"
+		)
+		filtered_events = filter(template.matches_event, events)
+		self.assertEquals(3, len(filtered_events))
+
 
 class TimeRangeTest (unittest.TestCase):
 

=== modified file 'test/engine-test.py'
--- test/engine-test.py	2010-05-14 07:53:47 +0000
+++ test/engine-test.py	2010-05-14 08:09:25 +0000
@@ -641,6 +641,114 @@
 		self.assertEquals(1, len(ids))
 		self.assertEquals(_ids, ids)
 		
+	def testNegation(self):
+		import_events("test/data/five_events.js", self.engine)
+
+		template = Event.new_for_values(
+			interpretation = "!stfu:OpenEvent"
+		)
+		ids = self.engine.find_eventids(TimeRange.always(),
+			[template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		)
+		self.assertEquals(3, len(ids))
+		
+		template = Event.new_for_values(
+			manifestation = "!stfu:YourActivity"
+		)
+		ids = self.engine.find_eventids(TimeRange.always(),
+			[template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		)
+		self.assertEquals(4, len(ids))
+		
+		template = Event.new_for_values(
+			actor = "!firefox"
+		)
+		ids = self.engine.find_eventids(TimeRange.always(),
+			[template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		)
+		self.assertEquals(2, len(ids))
+		
+		template = Event.new_for_values(
+			subject_uri = "!file:///tmp/foo.txt"
+		)
+		ids = self.engine.find_eventids(TimeRange.always(),
+			[template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		)
+		self.assertEquals(3, len(ids))
+		
+		template = Event.new_for_values(
+			subject_interpretation = "!stfu:Document"
+		)
+		ids = self.engine.find_eventids(TimeRange.always(),
+			[template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		)
+		self.assertEquals(4, len(ids))
+		
+		template = Event.new_for_values(
+			subject_manifestation = "!stfu:File"
+		)
+		ids = self.engine.find_eventids(TimeRange.always(),
+			[template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		)
+		self.assertEquals(0, len(ids))
+		
+		template = Event.new_for_values(
+			subject_origin = "!file:///tmp"
+		)
+		ids = self.engine.find_eventids(TimeRange.always(),
+			[template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		)
+		self.assertEquals(0, len(ids))
+		
+		template = Event.new_for_values(
+			subject_mimetype = "!text/plain"
+		)
+		ids = self.engine.find_eventids(TimeRange.always(),
+			[template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		)
+		self.assertEquals(0, len(ids))
+		
+		# the next two fields do not support negation, '!' is treated as
+		# content
+		
+		template = Event.new_for_values(
+			subject_text = "!boo"
+		)
+		ids = self.engine.find_eventids(TimeRange.always(),
+			[template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		)
+		self.assertEquals(0, len(ids))
+		
+		# searching by subject_storage is not working
+		#~ template = Event.new_for_values(
+			#~ subject_storage = "!boo"
+		#~ )
+		#~ ids = self.engine.find_eventids(TimeRange.always(),
+			#~ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		#~ )
+		#~ self.assertEquals(0, len(ids))
+		
+	def testNegationCombination(self):
+		import_events("test/data/five_events.js", self.engine)
+		
+		template = Event.new_for_values(
+			interpretation = "!stfu:OpenEvent",
+			actor = "!firefox"
+		)
+		ids = self.engine.find_eventids(TimeRange.always(),
+			[template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		)
+		self.assertEquals(2, len(ids))
+		
+		template = Event.new_for_values(
+			interpretation = "!stfu:OpenEvent",
+			manifestation = "!stfu:YourActivity"
+		)
+		ids = self.engine.find_eventids(TimeRange.always(),
+			[template,], StorageState.Any, 10, ResultType.MostRecentEvents
+		)
+		self.assertEquals(3, len(ids))
+		
 	def testBug580364(self):
 		events = [
 			Event.new_for_values(timestamp=1000, subject_storage="sometext"),
@@ -653,7 +761,6 @@
 			[template], StorageState.Any, 10, ResultType.MostRecentEvents)
 		
 		self.assertEquals(0, len(ids_out))
-		
 
 if __name__ == "__main__":
 	unittest.main()

=== modified file 'test/test-sql.py'
--- test/test-sql.py	2010-05-05 21:37:51 +0000
+++ test/test-sql.py	2010-05-14 08:09:25 +0000
@@ -46,6 +46,27 @@
 		
 		self.assertEquals(where.sql % tuple(where.arguments),
 		                  "(foo = 10 AND (subfoo = 68 OR subbar = 69) AND bar = 11)")
+		                  
+	def testFlatNegation(self):
+		where = WhereClause(WhereClause.OR, negation=True)
+		where.add("foo = %s", 7)
+		where.add("bar = %s", 77)
+		self.assertEquals(where.sql %tuple(where.arguments),
+			"NOT (foo = 7 OR bar = 77)")
+			
+	def testNestedNegation(self):
+		where = WhereClause(WhereClause.AND)
+		where.add ("foo = %s", 10)
+		
+		subwhere = WhereClause(WhereClause.OR, negation=True)
+		subwhere.add ("subfoo = %s", 68)
+		subwhere.add ("subbar = %s", 69)
+		where.extend(subwhere)
+		where.add ("bar = %s", 11)
+		
+		self.assertEquals(where.sql % tuple(where.arguments),
+		                  "(foo = 10 AND NOT (subfoo = 68 OR subbar = 69) AND bar = 11)")
+		
 
 if __name__ == "__main__":
 	unittest.main()

=== modified file 'zeitgeist/datamodel.py'
--- zeitgeist/datamodel.py	2010-05-13 08:23:07 +0000
+++ zeitgeist/datamodel.py	2010-05-14 08:09:25 +0000
@@ -36,8 +36,13 @@
 	'Event',
 	'Subject',
 	'NULL_EVENT',
+	'NEGATION_OPERATOR',
 ]
 
+NEGATION_OPERATOR = "!"
+
+EQUAL = lambda x,y: x == y
+
 # next() function is python >= 2.6
 try:
 	next = next
@@ -262,7 +267,7 @@
 				parent = _SYMBOLS_BY_URI[parent]
 			except KeyError, e:
 				# Parent is not a known URI
-				print 11111111111, self.uri, parent
+				#print 11111111111, self.uri, parent #debug output
 				return self.uri == parent
 		
 		# Invariant: parent is a Symbol
@@ -427,6 +432,8 @@
 		Mimetype,
 		Text,
 		Storage) = range(7)
+		
+	SUPPORTS_NEGATION = (Uri, Interpretation, Manifestation, Origin, Mimetype)
 	
 	def __init__(self, data=None):
 		super(Subject, self).__init__([""]*len(Subject.Fields))
@@ -534,12 +541,21 @@
 				continue
 			if m in (Subject.Interpretation, Subject.Manifestation):
 				# symbols are treated differently
-				if not Symbol.uri_is_child_of (self[m], subject_template[m]):
-					return False
+				comp = Symbol.uri_is_child_of
 			else:
-				if subject_template[m] != self[m]:
-					return False
+				comp = EQUAL
+			if not self._check_field_match(m, subject_template[m], comp):
+				return False
 		return True
+		
+	def _check_field_match(self, field_id, expression, comp):
+		""" Checks if an expression matches a field given by its `field_id`
+		using a `comp` comparison function """
+		if field_id in self.SUPPORTS_NEGATION \
+				and expression.startswith(NEGATION_OPERATOR):
+			return not self._check_field_match(field_id, expression[len(NEGATION_OPERATOR):], comp)
+		else:
+			return comp(self[field_id], expression)
 
 class Event(list):
 	"""
@@ -561,6 +577,8 @@
 		Interpretation,
 		Manifestation,
 		Actor) = range(5)
+		
+	SUPPORTS_NEGATION = (Interpretation, Manifestation, Actor)
 	
 	def __init__(self, struct = None):
 		"""
@@ -784,11 +802,11 @@
 				continue
 			if m in (Event.Manifestation, Event.Interpretation):
 				# special check for symbols
-				if not Symbol.uri_is_child_of(data[m], tdata[m]):
-					return False
+				comp = Symbol.uri_is_child_of
 			else:
-				if data[m] != tdata[m]:
-					return False
+				comp = EQUAL
+			if not self._check_field_match(m, tdata[m], comp):
+				return False
 		
 		# If template has no subjects we have a match
 		if len(event_template[1]) == 0 : return True
@@ -802,6 +820,15 @@
 		
 		# Template has subjects, but we never found a match
 		return False
+		
+	def _check_field_match(self, field_id, expression, comp):
+		""" Checks if an expression matches a field given by its `field_id`
+		using a `comp` comparison function """
+		if field_id in self.SUPPORTS_NEGATION \
+				and expression.startswith(NEGATION_OPERATOR):
+			return not self._check_field_match(field_id, expression[len(NEGATION_OPERATOR):], comp)
+		else:
+			return comp(self[0][field_id], expression)
 	
 	def matches_event (self, event):
 		"""


Follow ups