zeitgeist team mailing list archive
-
zeitgeist team
-
Mailing list archive
-
Message #03604
[Merge] lp:~seif/zeitgeist/memory into lp:zeitgeist
Seif Lotfy has proposed merging lp:~seif/zeitgeist/memory into lp:zeitgeist.
Requested reviews:
Zeitgeist Framework Team (zeitgeist)
For more details, see:
https://code.launchpad.net/~seif/zeitgeist/memory/+merge/63848
Reduce memory consumption by:
1) Using generators
2) disable SQL Cache (no real performance decline)
3) Use arrays for storing ids instead of lists
( 4) use tuples instead of lists when possible)
Results for this is less memory consumption. I think more can be done if we start using slots... But this is a clean hack without messing up the API/ABI
--
https://code.launchpad.net/~seif/zeitgeist/memory/+merge/63848
Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~seif/zeitgeist/memory into lp:zeitgeist.
=== modified file '_zeitgeist/engine/datamodel.py'
--- _zeitgeist/engine/datamodel.py 2011-01-17 15:54:47 +0000
+++ _zeitgeist/engine/datamodel.py 2011-06-08 11:26:25 +0000
@@ -78,4 +78,4 @@
}.iteritems():
for prop in props:
datasource[prop] = plaintype(datasource[prop])
- return list(datasource)
+ return tuple(datasource)
=== modified file '_zeitgeist/engine/main.py'
--- _zeitgeist/engine/main.py 2011-06-04 14:49:19 +0000
+++ _zeitgeist/engine/main.py 2011-06-08 11:26:25 +0000
@@ -29,6 +29,7 @@
import os
import logging
from collections import defaultdict
+from array import array
from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \
ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR, WILDCARD
@@ -199,8 +200,8 @@
return []
# Split ids into cached and uncached
- uncached_ids = []
- cached_ids = []
+ uncached_ids = array("i")
+ cached_ids = array("i")
# If ids batch greater than MAX_CACHE_BATCH_SIZE ids ignore cache
use_cache = True
@@ -238,19 +239,19 @@
sorted_events[n] = event
# Get uncached events
- rows = tuple(row for row in self._cursor.execute("""
- SELECT * FROM event_view
- WHERE id IN (%s)
- """ % ",".join("%d" % id for id in uncached_ids)))
+ rows = self._cursor.execute(""" SELECT * FROM event_view WHERE id IN (%s)
+ """ % ",".join("%d" % id for id in uncached_ids))
- log.debug("Got %d raw events in %fs" % (len(rows), time.time()-t))
+ time_get_uncached = time.time() - t
t = time.time()
t_get_event = 0
t_get_subject = 0
t_apply_get_hooks = 0
+ row_counter = 0
for row in rows:
+ row_counter += 1
# Assumption: all rows of a same event for its different
# subjects are in consecutive order.
t_get_event -= time.time()
@@ -286,6 +287,7 @@
# at a decent level
+ log.debug("Got %d raw events in %fs" % (row_counter, time_get_uncached))
log.debug("Got %d events in %fs" % (len(sorted_events), time.time()-t))
log.debug(" Where time spent in _get_event_from_row in %fs" % (t_get_event))
log.debug(" Where time spent in _get_subject_from_row in %fs" % (t_get_subject))
@@ -561,13 +563,12 @@
if max_events > 0:
sql += " LIMIT %d" % max_events
-
- result = tuple(r[0] for r in self._cursor.execute(sql, where.arguments))
+ result = array("i", self._cursor.execute(sql, where.arguments).fetch(0))
if return_mode == 0:
log.debug("Found %d event IDs in %fs" % (len(result), time.time()- t))
elif return_mode == 1:
- log.debug("Found %d events IDs in %fs" % (len(result), time.time()- t))
+ log.debug("Found %d events in %fs" % (len(result), time.time()- t))
result = self.get_events(ids=result, sender=sender)
else:
raise Exception("%d" % return_mode)
=== modified file '_zeitgeist/engine/remote.py'
--- _zeitgeist/engine/remote.py 2011-06-02 20:15:11 +0000
+++ _zeitgeist/engine/remote.py 2011-06-08 11:26:25 +0000
@@ -77,7 +77,7 @@
for event in events:
if event is not None:
event._make_dbus_sendable()
- return [NULL_EVENT if event is None else event for event in events]
+ return tuple(NULL_EVENT if event is None else event for event in events)
# Reading stuff
=== modified file '_zeitgeist/engine/sql.py'
--- _zeitgeist/engine/sql.py 2011-05-18 20:48:13 +0000
+++ _zeitgeist/engine/sql.py 2011-06-08 11:26:25 +0000
@@ -75,6 +75,14 @@
explain_query(super(UnicodeCursor, self), statement, parameters)
return super(UnicodeCursor, self).execute(statement, parameters)
+ def fetch(self, index=-1):
+ if index >= 0:
+ for row in self:
+ yield row[index]
+ else:
+ for row in self:
+ yield row
+
def _get_schema_version (cursor, schema_name):
"""
Returns the schema version for schema_name or returns 0 in case
@@ -206,6 +214,8 @@
# we decided to set locking_mode to EXCLUSIVE, from now on only
# one connection to the database is allowed to revert this setting set locking_mode to NORMAL.
cursor.execute("PRAGMA locking_mode = EXCLUSIVE")
+ # Seif: Disable cache since we already kinda support our own cache (LRUCache)
+ cursor.execute("PRAGMA cache_size = 0")
# thekorn: as part of the workaround for (LP: #598666) we need to
# create the '_fix_cache' TEMP table on every start,
=== modified file 'test/engine-test.py'
--- test/engine-test.py 2011-05-07 12:00:54 +0000
+++ test/engine-test.py 2011-06-08 11:26:25 +0000
@@ -446,7 +446,7 @@
event = Event.new_for_values(subjects=[subj1, subj2])
orig_ids = self.engine.insert_events([event])
result_ids = self.engine.find_eventids(TimeRange.always(), [Event()], StorageState.Any, 0, 1)
- self.assertEquals(orig_ids, result_ids)
+ self.assertEquals(orig_ids, list(result_ids))
def testFindEventsEventTemplate(self):
import_events("test/data/five_events.js", self.engine)
@@ -603,7 +603,7 @@
[tmpl], StorageState.Any, 10, ResultType.MostRecentEvents)
self.assertEquals(1, len(ids))
- self.assertEquals(_ids, ids)
+ self.assertEquals(_ids, list(ids))
def testNegation(self):
import_events("test/data/five_events.js", self.engine)
@@ -1035,7 +1035,7 @@
reverse=True
)
]
- self.assertEquals(ids, sorted_event_ids)
+ self.assertEquals(list(ids), sorted_event_ids)
def testResultTypesLeastRecentEvents(self):
import_events("test/data/five_events.js", self.engine)
@@ -1049,7 +1049,7 @@
event.id for event in sorted(events,
cmp=lambda x, y: cmp(int(x.timestamp), int(y.timestamp)))
]
- self.assertEquals(ids, sorted_event_ids)
+ self.assertEquals(list(ids), sorted_event_ids)
def testResultTypesMostPopularActor(self):
import_events("test/data/twenty_events.js", self.engine)
@@ -1185,20 +1185,20 @@
# Get the least recent actors
ids = self.engine.find_eventids(TimeRange.always(),
[], StorageState.Any, 0, ResultType.OldestActor)
- self.assertEquals(ids, [1, 3, 4])
+ self.assertEquals(list(ids), [1, 3, 4])
# Get the least recent actors for "home/boo"
template = Event.new_for_values(subject_uri="home/boo")
ids = self.engine.find_eventids(TimeRange.always(),
[template], StorageState.Any, 0, ResultType.OldestActor)
- self.assertEquals(ids, [2])
+ self.assertEquals(list(ids), [2])
# Let's also try the same with MostRecentActor... Although there
# should be no problem here.
template = Event.new_for_values(subject_uri="home/boo")
ids = self.engine.find_eventids(TimeRange.always(),
[template], StorageState.Any, 0, ResultType.OldestActor)
- self.assertEquals(ids, [2])
+ self.assertEquals(list(ids), [2])
def testResultTypesOldestActor(self):
import_events("test/data/twenty_events.js", self.engine)
Follow ups