zeitgeist team mailing list archive
-
zeitgeist team
-
Mailing list archive
-
Message #01015
[Merge] lp:~thekorn/zeitgeist/wildcard_support into lp:zeitgeist
Markus Korn has proposed merging lp:~thekorn/zeitgeist/wildcard_support into lp:zeitgeist.
Requested reviews:
Mikkel Kamstrup Erlandsen (kamstrup)
Zeitgeist Framework Team (zeitgeist)
This branch adds wildcard-support to some template-fields, and completes the fix of bug 485966
--
https://code.launchpad.net/~thekorn/zeitgeist/wildcard_support/+merge/25345
Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~thekorn/zeitgeist/wildcard_support into lp:zeitgeist.
=== modified file '_zeitgeist/engine/main.py'
--- _zeitgeist/engine/main.py 2010-05-14 11:54:52 +0000
+++ _zeitgeist/engine/main.py 2010-05-14 17:20:42 +0000
@@ -32,7 +32,7 @@
from collections import defaultdict
from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \
- ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR
+ ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR, WILDCARD
from _zeitgeist.engine.datamodel import Event, Subject
from _zeitgeist.engine.extension import ExtensionsCollection, load_class
from _zeitgeist.engine import constants
@@ -44,6 +44,12 @@
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger("zeitgeist.engine")
+class NegationNotSupported(ValueError):
+ pass
+
+class WildcardNotSupported(ValueError):
+ pass
+
def parse_negation(kind, field, value, parse_negation=True):
"""checks if value starts with the negation operator,
if value starts with the negation operator but the field does
@@ -55,8 +61,42 @@
negation = True
value = value[len(NEGATION_OPERATOR):]
if negation and field not in kind.SUPPORTS_NEGATION:
- raise ValueError("This field does not support negation")
+ raise NegationNotSupported("This field does not support negation")
return value, negation
+
+def parse_wildcard(kind, field, value):
+ """checks if value ends with the a wildcard,
+ if value ends with a wildcard but the field does not support wildcards
+ a ValueError is raised.
+ This function returns a (value_without_wildcard, wildcard)-tuple
+ """
+ wildcard = False
+ if value.endswith(WILDCARD):
+ wildcard = True
+ value = value[:-len(WILDCARD)]
+ if wildcard and field not in kind.SUPPORTS_WILDCARDS:
+ raise WildcardNotSupported("This field does not support wildcards")
+ return value, wildcard
+
+def parse_operators(kind, field, value):
+ """runs both (parse_negation and parse_wildcard) parser functions
+ on query values, and handles the special case of Subject.Text correctly.
+ returns a (value_without_negation_and_wildcard, negation, wildcard)-tuple
+ """
+ try:
+ value, negation = parse_negation(kind, field, value)
+ except ValueError:
+ if kind is Subject and field == Subject.Text:
+ # we do not support negation of the text field,
+ # the text field starts with the NEGATION_OPERATOR
+ # so we handle this string as the content instead
+ # of an operator
+ negation = False
+ else:
+ raise
+ value, wildcard = parse_wildcard(kind, field, value)
+ return value, negation, wildcard
+
class ZeitgeistEngine:
@@ -186,58 +226,57 @@
subwhere.add("id = ?", event_template.id)
try:
- value, negation = parse_negation(Event, Event.Interpretation, event_template.interpretation)
+ value, negation, wildcard = parse_operators(Event, Event.Interpretation, event_template.interpretation)
# Expand event interpretation children
event_interp_where = WhereClause(WhereClause.OR, negation)
for child_interp in (Symbol.find_child_uris_extended(value)):
if child_interp:
- event_interp_where.add("interpretation = ?",
- self._interpretation[child_interp])
+ event_interp_where.add_text_condition("interpretation",
+ child_interp, like=wildcard, cache=self._interpretation)
if event_interp_where:
subwhere.extend(event_interp_where)
- value, negation = parse_negation(Event, Event.Manifestation, event_template.manifestation)
+ value, negation, wildcard = parse_operators(Event, Event.Manifestation, event_template.manifestation)
# Expand event manifestation children
event_manif_where = WhereClause(WhereClause.OR, negation)
for child_manif in (Symbol.find_child_uris_extended(value)):
if child_manif:
- event_manif_where.add("manifestation = ?",
- self._manifestation[child_manif])
+ event_manif_where.add_text_condition("manifestation",
+ child_manif, like=wildcard, cache=self._manifestation)
if event_manif_where:
subwhere.extend(event_manif_where)
- value, negation = parse_negation(Subject, Subject.Interpretation, subject_template.interpretation)
+ value, negation, wildcard = parse_operators(Subject, Subject.Interpretation, subject_template.interpretation)
# Expand subject interpretation children
su_interp_where = WhereClause(WhereClause.OR, negation)
for child_interp in (Symbol.find_child_uris_extended(value)):
if child_interp:
- su_interp_where.add("subj_interpretation = ?",
- self._interpretation[child_interp])
+ su_interp_where.add_text_condition("subj_interpretation",
+ child_interp, like=wildcard, cache=self._interpretation)
if su_interp_where:
subwhere.extend(su_interp_where)
- value, negation = parse_negation(Subject, Subject.Manifestation, subject_template.manifestation)
+ value, negation, wildcard = parse_operators(Subject, Subject.Manifestation, subject_template.manifestation)
# Expand subject manifestation children
su_manif_where = WhereClause(WhereClause.OR, negation)
for child_manif in (Symbol.find_child_uris_extended(value)):
if child_manif:
- su_manif_where.add("subj_manifestation = ?",
- self._manifestation[child_manif])
+ su_manif_where.add_text_condition("subj_manifestation",
+ child_manif, like=wildcard, cache=self._manifestation)
if su_manif_where:
subwhere.extend(su_manif_where)
# FIXME: Expand mime children as well.
# Right now we only do exact matching for mimetypes
# thekorn: this will be fixed when wildcards are supported
- value, negation = parse_negation(Subject, Subject.Mimetype, subject_template.mimetype)
+ value, negation, wildcard = parse_operators(Subject, Subject.Mimetype, subject_template.mimetype)
if value:
- subwhere.add("subj_mimetype %s= ?" %(NEGATION_OPERATOR if negation else ""),
- self._mimetype[value])
+ subwhere.add_text_condition("subj_mimetype",
+ value, wildcard, negation, cache=self._mimetype)
- value, negation = parse_negation(Event, Event.Actor, event_template.actor)
+ value, negation, wildcard = parse_operators(Event, Event.Actor, event_template.actor)
if value:
- subwhere.add("actor %s= ?" %(NEGATION_OPERATOR if negation else ""),
- self._actor[value])
+ subwhere.add_text_condition("actor", value, wildcard, negation, cache=self._actor)
except KeyError, e:
# Value not in DB
log.debug("Unknown entity in query: %s" % e)
@@ -247,18 +286,8 @@
for key in ("uri", "origin", "text"):
value = getattr(subject_template, key)
if value:
- try:
- value, negation = parse_negation(Subject, getattr(Subject, key.title()), value)
- except ValueError:
- if key == "text":
- # we do not support negation of the text field,
- # the text field starts with the NEGATION_OPERATOR
- # so we handle this string as the content instead
- # of an operator
- negation = False
- else:
- raise
- subwhere.add("subj_%s %s= ?" %(key, NEGATION_OPERATOR if negation else ""), value)
+ value, negation, wildcard = parse_operators(Subject, getattr(Subject, key.title()), value)
+ subwhere.add_text_condition("subj_%s" %key, value, wildcard, negation)
where_or.extend(subwhere)
return where_or
=== modified file '_zeitgeist/engine/sql.py'
--- _zeitgeist/engine/sql.py 2010-05-13 11:46:31 +0000
+++ _zeitgeist/engine/sql.py 2010-05-14 17:20:42 +0000
@@ -28,6 +28,12 @@
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger("zeitgeist.sql")
+TABLE_MAP = {
+ "subj_mimetype": "mimetype",
+ "subj_origin": "uri",
+ "subj_uri": "uri",
+}
+
class UnicodeCursor(sqlite3.Cursor):
@staticmethod
@@ -366,6 +372,31 @@
self.arguments.append(arguments)
else:
self.arguments.extend(arguments)
+
+ def add_text_condition(self, column, value, like=False, negation=False, cache=None):
+ if like:
+ # thekorn: unfortunatly the data in event_view is a bit inconsistent
+ # e.g.:
+ # subj_uri and subj_origin are presented as string-values
+ # actor and subj_mimetype are ids
+ # (LP: #580601)
+ if column in ("subj_uri", "subj_origin"):
+ value_type = "value"
+ elif column in ("actor", "subj_mimetype"):
+ value_type = "id"
+ else:
+ raise AssertionError("We don't know how to handle this type of data")
+ # thekorn: this is a first (unoptimized version)
+ # see http://www.sqlite.org/optoverview.html '4.0 The LIKE optimization'
+ # for how this will look in the future
+ sql = "%s %sIN (SELECT %s FROM %s WHERE value GLOB ?)" \
+ %(column, self.NOT if negation else "", value_type, TABLE_MAP.get(column, column))
+ value += "*"
+ else:
+ sql = "%s %s= ?" %(column, "!" if negation else "")
+ if cache is not None:
+ value = cache[value]
+ self.add(sql, value)
def extend(self, where):
self.add(where.sql, where.arguments)
=== modified file 'test/datamodel-test.py'
--- test/datamodel-test.py 2010-05-14 11:54:52 +0000
+++ test/datamodel-test.py 2010-05-14 17:20:42 +0000
@@ -304,6 +304,42 @@
event = Event.new_for_values(timestamp=1000, subject_storage="sometext")
template = Event.new_for_values(subject_storage="xxxx")
self.assertRaises(ValueError, template.matches_event, event)
+
+ def testWildcardTemplateMatching(self):
+ event = Event.new_for_values(actor="boo bar")
+
+ template = Event.new_for_values(actor="boo*")
+ self.assertTrue(event.matches_template(template))
+
+ # wildcards are not supported in interpretation,
+ # so they are handled as content
+ event = Event.new_for_values(interpretation="boo bar")
+
+ template = Event.new_for_values(interpretation="boo*")
+ self.assertFalse(event.matches_template(template))
+
+ event = Event.new_for_values(subject_uri="boo bar")
+
+ template = Event.new_for_values(subject_uri="boo*")
+ self.assertTrue(event.matches_template(template))
+
+ event = Event.new_for_values(subject_origin="boo bar")
+
+ template = Event.new_for_values(subject_origin="boo*")
+ self.assertTrue(event.matches_template(template))
+
+ event = Event.new_for_values(subject_mimetype="boo bar")
+
+ template = Event.new_for_values(subject_mimetype="boo*")
+ self.assertTrue(event.matches_template(template))
+
+ def testNegationWildcardTemplateMatching(self):
+ event = Event.new_for_values(actor="boo bar")
+
+ template = Event.new_for_values(actor="!boo*")
+ self.assertFalse(event.matches_template(template))
+ template = Event.new_for_values(actor="!test*")
+ self.assertTrue(event.matches_template(template))
class TimeRangeTest (unittest.TestCase):
=== modified file 'test/engine-test.py'
--- test/engine-test.py 2010-05-14 11:54:52 +0000
+++ test/engine-test.py 2010-05-14 17:20:42 +0000
@@ -763,6 +763,51 @@
TimeRange.always(), [template], StorageState.Any, 10,
ResultType.MostRecentEvents
)
+
+ def testWildcard(self):
+ import_events("test/data/five_events.js", self.engine)
+
+ template = Event.new_for_values(
+ actor = "ge*"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(2, len(ids))
+
+ template = Event.new_for_values(
+ actor = "!ge*"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(3, len(ids))
+
+ template = Event.new_for_values(
+ subject_mimetype = "text/*"
+ )
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(5, len(ids))
+
+ template = Event.new_for_values(
+ subject_uri = "http://*"
+ )
+
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(1, len(ids))
+
+ template = Event.new_for_values(
+ subject_origin = "file://*"
+ )
+
+ ids = self.engine.find_eventids(TimeRange.always(),
+ [template,], StorageState.Any, 10, ResultType.MostRecentEvents
+ )
+ self.assertEquals(5, len(ids))
if __name__ == "__main__":
unittest.main()
=== modified file 'test/test-sql.py'
--- test/test-sql.py 2010-05-13 13:12:12 +0000
+++ test/test-sql.py 2010-05-14 17:20:42 +0000
@@ -66,6 +66,27 @@
self.assertEquals(where.sql % tuple(where.arguments),
"(foo = 10 AND NOT (subfoo = 68 OR subbar = 69) AND bar = 11)")
+
+ def testAddTextCondition(self):
+ where = WhereClause(WhereClause.AND)
+ where.add_text_condition("boo", "bar")
+ self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),
+ "(boo = bar)")
+
+ where = WhereClause(WhereClause.AND)
+ where.add_text_condition("boo", "bar", negation=True)
+ self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),
+ "(boo != bar)")
+
+ where = WhereClause(WhereClause.AND)
+ where.add_text_condition("boo", "bar", like=True)
+ self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),
+ "(boo IN (SELECT id FROM boo WHERE value GLOB bar*))")
+
+ where = WhereClause(WhereClause.AND)
+ where.add_text_condition("boo", "bar", like=True, negation=True)
+ self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),
+ "(boo NOT IN (SELECT id FROM boo WHERE value GLOB bar*))")
if __name__ == "__main__":
=== modified file 'zeitgeist/datamodel.py'
--- zeitgeist/datamodel.py 2010-05-14 11:54:52 +0000
+++ zeitgeist/datamodel.py 2010-05-14 17:20:42 +0000
@@ -40,10 +40,15 @@
]
NEGATION_OPERATOR = "!"
+WILDCARD = "*"
def EQUAL(x, y):
"""checks if both given arguments are equal"""
return x == y
+
+def STARTSWITH(x, y):
+ """checks if 'x' startswith 'y'"""
+ return x.startswith(y)
# next() function is python >= 2.6
try:
@@ -436,6 +441,7 @@
Storage) = range(7)
SUPPORTS_NEGATION = (Uri, Interpretation, Manifestation, Origin, Mimetype)
+ SUPPORTS_WILDCARDS = (Uri, Origin, Mimetype)
def __init__(self, data=None):
super(Subject, self).__init__([""]*len(Subject.Fields))
@@ -560,6 +566,10 @@
if field_id in self.SUPPORTS_NEGATION \
and expression.startswith(NEGATION_OPERATOR):
return not self._check_field_match(field_id, expression[len(NEGATION_OPERATOR):], comp)
+ elif field_id in self.SUPPORTS_WILDCARDS \
+ and expression.endswith(WILDCARD):
+ assert comp == EQUAL, "wildcards only work for pure text fields"
+ return self._check_field_match(field_id, expression[:-len(WILDCARD)], STARTSWITH)
else:
return comp(self[field_id], expression)
@@ -585,6 +595,7 @@
Actor) = range(5)
SUPPORTS_NEGATION = (Interpretation, Manifestation, Actor)
+ SUPPORTS_WILDCARDS = (Actor,)
def __init__(self, struct = None):
"""
@@ -833,6 +844,10 @@
if field_id in self.SUPPORTS_NEGATION \
and expression.startswith(NEGATION_OPERATOR):
return not self._check_field_match(field_id, expression[len(NEGATION_OPERATOR):], comp)
+ elif field_id in self.SUPPORTS_WILDCARDS \
+ and expression.endswith(WILDCARD):
+ assert comp == EQUAL, "wildcards only work for pure text fields"
+ return self._check_field_match(field_id, expression[:-len(WILDCARD)], STARTSWITH)
else:
return comp(self[0][field_id], expression)
Follow ups