← Back to team overview

zeitgeist team mailing list archive

[Merge] lp:~seif/zeitgeist/memory into lp:zeitgeist

 

Seif Lotfy has proposed merging lp:~seif/zeitgeist/memory into lp:zeitgeist.

Requested reviews:
  Zeitgeist Framework Team (zeitgeist)

For more details, see:
https://code.launchpad.net/~seif/zeitgeist/memory/+merge/63848

Reduce memory consumption by:
1) Using generators
2) disable SQL Cache (no real performance decline)
3) Use arrays for storing ids instead of lists
( 4) use tuples instead of lists when possible)

Results for this is less memory consumption. I think more can be done if we start using slots... But this is a clean hack without messing up the API/ABI


-- 
https://code.launchpad.net/~seif/zeitgeist/memory/+merge/63848
Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~seif/zeitgeist/memory into lp:zeitgeist.
=== modified file '_zeitgeist/engine/datamodel.py'
--- _zeitgeist/engine/datamodel.py	2011-01-17 15:54:47 +0000
+++ _zeitgeist/engine/datamodel.py	2011-06-08 11:26:25 +0000
@@ -78,4 +78,4 @@
 			}.iteritems():
 			for prop in props:
 				datasource[prop] = plaintype(datasource[prop])
-		return list(datasource)
+		return tuple(datasource)

=== modified file '_zeitgeist/engine/main.py'
--- _zeitgeist/engine/main.py	2011-06-04 14:49:19 +0000
+++ _zeitgeist/engine/main.py	2011-06-08 11:26:25 +0000
@@ -29,6 +29,7 @@
 import os
 import logging
 from collections import defaultdict
+from array import array
 
 from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \
 	ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR, WILDCARD
@@ -199,8 +200,8 @@
 			return []
 		
 		# Split ids into cached and uncached
-		uncached_ids = []
-		cached_ids = []
+		uncached_ids = array("i")
+		cached_ids = array("i")
 		
 		# If ids batch greater than MAX_CACHE_BATCH_SIZE ids ignore cache
 		use_cache = True
@@ -238,19 +239,19 @@
 						sorted_events[n] = event
 		
 		# Get uncached events
-		rows = tuple(row for row in self._cursor.execute("""
-			SELECT * FROM event_view
-			WHERE id IN (%s)
-			""" % ",".join("%d" % id for id in uncached_ids)))
+		rows = self._cursor.execute("""	SELECT * FROM event_view WHERE id IN (%s)
+			""" % ",".join("%d" % id for id in uncached_ids))
 		
-		log.debug("Got %d raw events in %fs" % (len(rows), time.time()-t))
+		time_get_uncached = time.time() - t
 		t = time.time()
 		
 		t_get_event = 0
 		t_get_subject = 0
 		t_apply_get_hooks = 0
 		
+		row_counter = 0
 		for row in rows:
+			row_counter += 1
 			# Assumption: all rows of a same event for its different
 			# subjects are in consecutive order.
 			t_get_event -= time.time()
@@ -286,6 +287,7 @@
 					# at a decent level
 					
 
+		log.debug("Got %d raw events in %fs" % (row_counter, time_get_uncached))
 		log.debug("Got %d events in %fs" % (len(sorted_events), time.time()-t))
 		log.debug("    Where time spent in _get_event_from_row in %fs" % (t_get_event))
 		log.debug("    Where time spent in _get_subject_from_row in %fs" % (t_get_subject))
@@ -561,13 +563,12 @@
 		
 		if max_events > 0:
 			sql += " LIMIT %d" % max_events
-		
-		result = tuple(r[0] for r in self._cursor.execute(sql, where.arguments))
+		result = array("i", self._cursor.execute(sql, where.arguments).fetch(0))
 		
 		if return_mode == 0:
 			log.debug("Found %d event IDs in %fs" % (len(result), time.time()- t))
 		elif return_mode == 1:
-			log.debug("Found %d events IDs in %fs" % (len(result), time.time()- t))
+			log.debug("Found %d events in %fs" % (len(result), time.time()- t))
 			result = self.get_events(ids=result, sender=sender)	
 		else:
 			raise Exception("%d" % return_mode)

=== modified file '_zeitgeist/engine/remote.py'
--- _zeitgeist/engine/remote.py	2011-06-02 20:15:11 +0000
+++ _zeitgeist/engine/remote.py	2011-06-08 11:26:25 +0000
@@ -77,7 +77,7 @@
 		for event in events:
 			if event is not None:
 				event._make_dbus_sendable()
-		return [NULL_EVENT if event is None else event for event in events]
+		return tuple(NULL_EVENT if event is None else event for event in events)
 	
 	# Reading stuff
 	

=== modified file '_zeitgeist/engine/sql.py'
--- _zeitgeist/engine/sql.py	2011-05-18 20:48:13 +0000
+++ _zeitgeist/engine/sql.py	2011-06-08 11:26:25 +0000
@@ -75,6 +75,14 @@
 			explain_query(super(UnicodeCursor, self), statement, parameters)
 		return super(UnicodeCursor, self).execute(statement, parameters)
 
+	def fetch(self, index=-1):
+		if index >= 0:
+			for row in self:
+				yield row[index]
+		else:
+			for row in self:
+				yield row
+
 def _get_schema_version (cursor, schema_name):
 	"""
 	Returns the schema version for schema_name or returns 0 in case
@@ -206,6 +214,8 @@
 	# we decided to set locking_mode to EXCLUSIVE, from now on only
 	# one connection to the database is allowed to revert this setting set locking_mode to NORMAL.
 	cursor.execute("PRAGMA locking_mode = EXCLUSIVE")
+	# Seif: Disable cache since we already kinda support our own cache (LRUCache)
+	cursor.execute("PRAGMA cache_size = 0")
 	
 	# thekorn: as part of the workaround for (LP: #598666) we need to
 	# create the '_fix_cache' TEMP table on every start,

=== modified file 'test/engine-test.py'
--- test/engine-test.py	2011-05-07 12:00:54 +0000
+++ test/engine-test.py	2011-06-08 11:26:25 +0000
@@ -446,7 +446,7 @@
 		event = Event.new_for_values(subjects=[subj1, subj2])
 		orig_ids = self.engine.insert_events([event])
 		result_ids = self.engine.find_eventids(TimeRange.always(), [Event()], StorageState.Any, 0, 1)
-		self.assertEquals(orig_ids, result_ids)
+		self.assertEquals(orig_ids, list(result_ids))
 
 	def testFindEventsEventTemplate(self):
 		import_events("test/data/five_events.js", self.engine)
@@ -603,7 +603,7 @@
 			[tmpl], StorageState.Any, 10, ResultType.MostRecentEvents)
 		
 		self.assertEquals(1, len(ids))
-		self.assertEquals(_ids, ids)
+		self.assertEquals(_ids, list(ids))
 		
 	def testNegation(self):
 		import_events("test/data/five_events.js", self.engine)
@@ -1035,7 +1035,7 @@
 				reverse=True
 			)
 		]
-		self.assertEquals(ids, sorted_event_ids)
+		self.assertEquals(list(ids), sorted_event_ids)
 		
 	def testResultTypesLeastRecentEvents(self):
 		import_events("test/data/five_events.js", self.engine)
@@ -1049,7 +1049,7 @@
 			event.id for event in sorted(events,
 				cmp=lambda x, y: cmp(int(x.timestamp), int(y.timestamp)))
 		]
-		self.assertEquals(ids, sorted_event_ids)
+		self.assertEquals(list(ids), sorted_event_ids)
 	
 	def testResultTypesMostPopularActor(self):
 		import_events("test/data/twenty_events.js", self.engine)
@@ -1185,20 +1185,20 @@
 		# Get the least recent actors
 		ids = self.engine.find_eventids(TimeRange.always(),
 			[], StorageState.Any, 0, ResultType.OldestActor)
-		self.assertEquals(ids, [1, 3, 4])
+		self.assertEquals(list(ids), [1, 3, 4])
 		
 		# Get the least recent actors for "home/boo"
 		template = Event.new_for_values(subject_uri="home/boo")
 		ids = self.engine.find_eventids(TimeRange.always(),
 			[template], StorageState.Any, 0, ResultType.OldestActor)
-		self.assertEquals(ids, [2])
+		self.assertEquals(list(ids), [2])
 		
 		# Let's also try the same with MostRecentActor... Although there
 		# should be no problem here.
 		template = Event.new_for_values(subject_uri="home/boo")
 		ids = self.engine.find_eventids(TimeRange.always(),
 			[template], StorageState.Any, 0, ResultType.OldestActor)
-		self.assertEquals(ids, [2])
+		self.assertEquals(list(ids), [2])
 	
 	def testResultTypesOldestActor(self):
 		import_events("test/data/twenty_events.js", self.engine)


Follow ups