← Back to team overview

zeitgeist team mailing list archive

[Merge] lp:~thekorn/zeitgeist/db_schema_3 into lp:zeitgeist

 

Markus Korn has proposed merging lp:~thekorn/zeitgeist/db_schema_3 into lp:zeitgeist.

Requested reviews:
  Zeitgeist Framework Team (zeitgeist)
Related bugs:
  #673394 Queries for subj_uri and subj_origin are using no index
  https://bugs.launchpad.net/bugs/673394
  #673452 Using the subj_origin column of event_view is slower than it should be
  https://bugs.launchpad.net/bugs/673452


This branch introduces a new db schema (version 3) and bundles a few performance related fixes, [0] compares the performance of this branch lp:zeitgeist (each query is run ~10 times, the plot shows the average).
Changes in detail:
 * renamed event_view.subj_origin to event_view.subj_origin_uri and event_view.subj_origin_id to event_view.subj_origin, this makes "subj_origin" point to an id in the uri table, and "subj_origin_uri" the resolved value. Also we are not using the redundant "SELECT id FROM uri WHERE uri.id=event.subj_origin" query anymore. (LP: #673452)
 * removed event_view.subj_uri_id and added event.subj_id, both values are in fact the same, but we again sting on another "SELECT" query in the event_view.
 * Optimization in FindEvent queries: we are not ordering and grouping by resolved values anymore, we are doing it on integer ids, which is much faster (LP: #673394)

[0] https://dl.dropbox.com/u/174479/compare_queries.svg
-- 
https://code.launchpad.net/~thekorn/zeitgeist/db_schema_3/+merge/40877
Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~thekorn/zeitgeist/db_schema_3 into lp:zeitgeist.
=== modified file '_zeitgeist/engine/__init__.py'
--- _zeitgeist/engine/__init__.py	2010-10-19 13:54:12 +0000
+++ _zeitgeist/engine/__init__.py	2010-11-15 15:11:50 +0000
@@ -55,7 +55,7 @@
 	
 	# Required version of DB schema
 	CORE_SCHEMA="core"
-	CORE_SCHEMA_VERSION = 2
+	CORE_SCHEMA_VERSION = 3
 	
 	USER_EXTENSION_PATH = os.path.join(DATA_PATH, "extensions")
 

=== modified file '_zeitgeist/engine/main.py'
--- _zeitgeist/engine/main.py	2010-11-12 17:39:28 +0000
+++ _zeitgeist/engine/main.py	2010-11-15 15:11:50 +0000
@@ -148,8 +148,9 @@
 	
 	def _get_subject_from_row(self, row):
 		subject = Subject()
-		for field in ("uri", "origin", "text", "storage"):
+		for field in ("uri", "text", "storage"):
 			setattr(subject, field, row["subj_" + field])
+		setattr(subject, "origin", row["subj_origin_uri"])
 		for field in ("interpretation", "manifestation", "mimetype"):
 			setattr(subject, field,
 				getattr(self, "_" + field).value(row["subj_" + field]))
@@ -353,10 +354,13 @@
 		
 		sql += (" ORDER BY timestamp DESC",
 			" ORDER BY timestamp ASC",
-			" GROUP BY subj_uri ORDER BY timestamp DESC",
-			" GROUP BY subj_uri ORDER BY timestamp ASC",
-			" GROUP BY subj_uri ORDER BY COUNT(subj_uri) DESC, timestamp DESC",
-			" GROUP BY subj_uri ORDER BY COUNT(subj_uri) ASC, timestamp ASC",
+			# thekorn: please note, event.subj_id == uri.id, as in
+			# the subj_id points directly to an entry in the uri table,
+			# so we are in fact grouping by subj_uris here
+			" GROUP BY subj_id ORDER BY timestamp DESC",
+			" GROUP BY subj_id ORDER BY timestamp ASC",
+			" GROUP BY subj_id ORDER BY COUNT(subj_id) DESC, timestamp DESC",
+			" GROUP BY subj_id ORDER BY COUNT(subj_id) ASC, timestamp ASC",
 			" GROUP BY actor ORDER BY COUNT(actor) DESC, timestamp DESC", 
 			" GROUP BY actor ORDER BY COUNT(actor) ASC, timestamp ASC",
 			" GROUP BY actor ORDER BY timestamp DESC",

=== modified file '_zeitgeist/engine/sql.py'
--- _zeitgeist/engine/sql.py	2010-10-25 20:26:03 +0000
+++ _zeitgeist/engine/sql.py	2010-11-15 15:11:50 +0000
@@ -369,14 +369,12 @@
 					AS payload,
 				(SELECT value FROM uri WHERE uri.id=event.subj_id)
 					AS subj_uri,
-				(SELECT id FROM uri WHERE uri.id=event.subj_id)
-					AS subj_uri_id,
+				event.subj_id, -- #this directly points to an uri
 				event.subj_interpretation,
 				event.subj_manifestation,
+				event.subj_origin,
 				(SELECT value FROM uri WHERE uri.id=event.subj_origin)
-					AS subj_origin,
-				(SELECT id FROM uri WHERE uri.id=event.subj_origin)
-					AS subj_origin_id,
+					AS subj_origin_uri,
 				event.subj_mimetype,
 				(SELECT value FROM text WHERE text.id = event.subj_text)
 					AS subj_text,
@@ -528,14 +526,17 @@
 	def add_text_condition(self, column, value, like=False, negation=False, cache=None):
 		if like:
 			assert column in ("subj_uri", "subj_origin", "actor", "subj_mimetype"), \
-				"prefix search on the %r column is not supported by zeitgeist"
-			if column in ("subj_uri", "subj_origin"):
-				view_column = "%s_id" %column
+				"prefix search on the %r column is not supported by zeitgeist" %column
+			if column == "subj_uri":
+				# subj_id directly points to the id of an uri entry
+				view_column = "subj_id"
 			else:
 				view_column = column
 			optimized_glob, value = self.optimize_glob("id", TABLE_MAP.get(column, column), value)
 			sql = "%s %sIN (%s)" %(view_column, self.NOT if negation else "", optimized_glob)
 		else:
+			if column == "subj_origin":
+				column = "subj_origin_uri"
 			sql = "%s %s= ?" %(column, "!" if negation else "")
 			if cache is not None:
 				value = cache[value]

=== added file '_zeitgeist/engine/upgrades/core_2_3.py'
--- _zeitgeist/engine/upgrades/core_2_3.py	1970-01-01 00:00:00 +0000
+++ _zeitgeist/engine/upgrades/core_2_3.py	2010-11-15 15:11:50 +0000
@@ -0,0 +1,11 @@
+# upgrading from db version 2 to 1
+# this requires no update to the actual data in the database
+# it is only a schema change of event_view. This change is done
+# in sql.create_db()
+
+# the schema change is renaming 'subj_uri_id' column to 'subj_id', as
+# both values are the same. Also 'subj_origin' gets renamed to
+# 'subj_origin_uri' and 'subj_origin_id' to 'subj_origin'.
+
+def run(cursor):
+    pass

=== modified file 'test/sql-test.py'
--- test/sql-test.py	2010-09-22 17:15:03 +0000
+++ test/sql-test.py	2010-11-15 15:11:50 +0000
@@ -97,12 +97,12 @@
 		where = WhereClause(WhereClause.AND)
 		where.add_text_condition("subj_uri", "bar", like=True)
 		self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),
-			"(subj_uri_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")
+			"(subj_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")
 			
 		where = WhereClause(WhereClause.AND)
 		where.add_text_condition("subj_origin", "bar", like=True)
 		self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),
-			"(subj_origin_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")
+			"(subj_origin IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")
 			
 		where = WhereClause(WhereClause.AND)
 		where.add_text_condition("actor", "bar", like=True, negation=True)