zeitgeist team mailing list archive
-
zeitgeist team
-
Mailing list archive
-
Message #02426
[Merge] lp:~thekorn/zeitgeist/db_schema_3 into lp:zeitgeist
Markus Korn has proposed merging lp:~thekorn/zeitgeist/db_schema_3 into lp:zeitgeist.
Requested reviews:
Zeitgeist Framework Team (zeitgeist)
Related bugs:
#673394 Queries for subj_uri and subj_origin are using no index
https://bugs.launchpad.net/bugs/673394
#673452 Using the subj_origin column of event_view is slower than it should be
https://bugs.launchpad.net/bugs/673452
This branch introduces a new db schema (version 3) and bundles a few performance related fixes, [0] compares the performance of this branch lp:zeitgeist (each query is run ~10 times, the plot shows the average).
Changes in detail:
* renamed event_view.subj_origin to event_view.subj_origin_uri and event_view.subj_origin_id to event_view.subj_origin, this makes "subj_origin" point to an id in the uri table, and "subj_origin_uri" the resolved value. Also we are not using the redundant "SELECT id FROM uri WHERE uri.id=event.subj_origin" query anymore. (LP: #673452)
* removed event_view.subj_uri_id and added event.subj_id, both values are in fact the same, but we again sting on another "SELECT" query in the event_view.
* Optimization in FindEvent queries: we are not ordering and grouping by resolved values anymore, we are doing it on integer ids, which is much faster (LP: #673394)
[0] https://dl.dropbox.com/u/174479/compare_queries.svg
--
https://code.launchpad.net/~thekorn/zeitgeist/db_schema_3/+merge/40877
Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~thekorn/zeitgeist/db_schema_3 into lp:zeitgeist.
=== modified file '_zeitgeist/engine/__init__.py'
--- _zeitgeist/engine/__init__.py 2010-10-19 13:54:12 +0000
+++ _zeitgeist/engine/__init__.py 2010-11-15 15:11:50 +0000
@@ -55,7 +55,7 @@
# Required version of DB schema
CORE_SCHEMA="core"
- CORE_SCHEMA_VERSION = 2
+ CORE_SCHEMA_VERSION = 3
USER_EXTENSION_PATH = os.path.join(DATA_PATH, "extensions")
=== modified file '_zeitgeist/engine/main.py'
--- _zeitgeist/engine/main.py 2010-11-12 17:39:28 +0000
+++ _zeitgeist/engine/main.py 2010-11-15 15:11:50 +0000
@@ -148,8 +148,9 @@
def _get_subject_from_row(self, row):
subject = Subject()
- for field in ("uri", "origin", "text", "storage"):
+ for field in ("uri", "text", "storage"):
setattr(subject, field, row["subj_" + field])
+ setattr(subject, "origin", row["subj_origin_uri"])
for field in ("interpretation", "manifestation", "mimetype"):
setattr(subject, field,
getattr(self, "_" + field).value(row["subj_" + field]))
@@ -353,10 +354,13 @@
sql += (" ORDER BY timestamp DESC",
" ORDER BY timestamp ASC",
- " GROUP BY subj_uri ORDER BY timestamp DESC",
- " GROUP BY subj_uri ORDER BY timestamp ASC",
- " GROUP BY subj_uri ORDER BY COUNT(subj_uri) DESC, timestamp DESC",
- " GROUP BY subj_uri ORDER BY COUNT(subj_uri) ASC, timestamp ASC",
+ # thekorn: please note, event.subj_id == uri.id, as in
+ # the subj_id points directly to an entry in the uri table,
+ # so we are in fact grouping by subj_uris here
+ " GROUP BY subj_id ORDER BY timestamp DESC",
+ " GROUP BY subj_id ORDER BY timestamp ASC",
+ " GROUP BY subj_id ORDER BY COUNT(subj_id) DESC, timestamp DESC",
+ " GROUP BY subj_id ORDER BY COUNT(subj_id) ASC, timestamp ASC",
" GROUP BY actor ORDER BY COUNT(actor) DESC, timestamp DESC",
" GROUP BY actor ORDER BY COUNT(actor) ASC, timestamp ASC",
" GROUP BY actor ORDER BY timestamp DESC",
=== modified file '_zeitgeist/engine/sql.py'
--- _zeitgeist/engine/sql.py 2010-10-25 20:26:03 +0000
+++ _zeitgeist/engine/sql.py 2010-11-15 15:11:50 +0000
@@ -369,14 +369,12 @@
AS payload,
(SELECT value FROM uri WHERE uri.id=event.subj_id)
AS subj_uri,
- (SELECT id FROM uri WHERE uri.id=event.subj_id)
- AS subj_uri_id,
+ event.subj_id, -- #this directly points to an uri
event.subj_interpretation,
event.subj_manifestation,
+ event.subj_origin,
(SELECT value FROM uri WHERE uri.id=event.subj_origin)
- AS subj_origin,
- (SELECT id FROM uri WHERE uri.id=event.subj_origin)
- AS subj_origin_id,
+ AS subj_origin_uri,
event.subj_mimetype,
(SELECT value FROM text WHERE text.id = event.subj_text)
AS subj_text,
@@ -528,14 +526,17 @@
def add_text_condition(self, column, value, like=False, negation=False, cache=None):
if like:
assert column in ("subj_uri", "subj_origin", "actor", "subj_mimetype"), \
- "prefix search on the %r column is not supported by zeitgeist"
- if column in ("subj_uri", "subj_origin"):
- view_column = "%s_id" %column
+ "prefix search on the %r column is not supported by zeitgeist" %column
+ if column == "subj_uri":
+ # subj_id directly points to the id of an uri entry
+ view_column = "subj_id"
else:
view_column = column
optimized_glob, value = self.optimize_glob("id", TABLE_MAP.get(column, column), value)
sql = "%s %sIN (%s)" %(view_column, self.NOT if negation else "", optimized_glob)
else:
+ if column == "subj_origin":
+ column = "subj_origin_uri"
sql = "%s %s= ?" %(column, "!" if negation else "")
if cache is not None:
value = cache[value]
=== added file '_zeitgeist/engine/upgrades/core_2_3.py'
--- _zeitgeist/engine/upgrades/core_2_3.py 1970-01-01 00:00:00 +0000
+++ _zeitgeist/engine/upgrades/core_2_3.py 2010-11-15 15:11:50 +0000
@@ -0,0 +1,11 @@
+# upgrading from db version 2 to 1
+# this requires no update to the actual data in the database
+# it is only a schema change of event_view. This change is done
+# in sql.create_db()
+
+# the schema change is renaming 'subj_uri_id' column to 'subj_id', as
+# both values are the same. Also 'subj_origin' gets renamed to
+# 'subj_origin_uri' and 'subj_origin_id' to 'subj_origin'.
+
+def run(cursor):
+ pass
=== modified file 'test/sql-test.py'
--- test/sql-test.py 2010-09-22 17:15:03 +0000
+++ test/sql-test.py 2010-11-15 15:11:50 +0000
@@ -97,12 +97,12 @@
where = WhereClause(WhereClause.AND)
where.add_text_condition("subj_uri", "bar", like=True)
self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),
- "(subj_uri_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")
+ "(subj_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")
where = WhereClause(WhereClause.AND)
where.add_text_condition("subj_origin", "bar", like=True)
self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments),
- "(subj_origin_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")
+ "(subj_origin IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))")
where = WhereClause(WhereClause.AND)
where.add_text_condition("actor", "bar", like=True, negation=True)