launchpad-reviewers team mailing list archive

Thread
Date

[Merge] lp:~lifeless/launchpad/bug-784988 into lp:launchpad

To: mp+61697@xxxxxxxxxxxxxxxxxx
From: Robert Collins <robertc@xxxxxxxxxxxxxxxxx>
Date: Fri, 20 May 2011 05:41:36 -0000
Reply-to: mp+61697@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Robert Collins has proposed merging lp:~lifeless/launchpad/bug-784988 into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #784988 in Launchpad itself: "Sprint:+temp-meeting-export timeouts"
  https://bugs.launchpad.net/launchpad/+bug/784988

For more details, see:
https://code.launchpad.net/~lifeless/launchpad/bug-784988/+merge/61697

temp-sprint-export was late evaluating all the subscribers to all the events - a large amount of data, and timing out regularly. I've changed that to eager load and avoid some storm double-dereferencing a little.

I added a new bulk load helper to avoid Yet Another sql expression, I think its reasonably tasteful but be your own judge.
-- 
https://code.launchpad.net/~lifeless/launchpad/bug-784988/+merge/61697
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~lifeless/launchpad/bug-784988 into lp:launchpad.

=== modified file 'lib/lp/blueprints/browser/sprint.py'
--- lib/lp/blueprints/browser/sprint.py	2011-03-18 00:26:10 +0000
+++ lib/lp/blueprints/browser/sprint.py	2011-05-20 05:41:33 +0000
@@ -22,6 +22,7 @@
     'SprintView',
     ]
 
+from collections import defaultdict
 import csv
 from StringIO import StringIO
 
@@ -67,11 +68,15 @@
     ISprint,
     ISprintSet,
     )
+from lp.blueprints.model.specificationsubscription import (
+    SpecificationSubscription,
+    )
 from lp.registry.browser.branding import BrandingChangeView
 from lp.registry.browser.menu import (
     IRegistryCollectionNavigationMenu,
     RegistryCollectionActionMenuBase,
     )
+from lp.services.database.bulk import load_referencing
 from lp.services.propertycache import cachedproperty
 
 
@@ -368,7 +373,7 @@
         self.status_message = None
         self.process_form()
         self.attendee_ids = set(
-            attendance.attendee.id for attendance in self.context.attendances)
+            attendance.attendeeID for attendance in self.context.attendances)
 
 
     @cachedproperty
@@ -448,16 +453,14 @@
                 displayname=attendance.attendee.displayname,
                 start=attendance.time_starts.strftime('%Y-%m-%dT%H:%M:%SZ'),
                 end=attendance.time_ends.strftime('%Y-%m-%dT%H:%M:%SZ')))
-            attendee_set.add(attendance.attendee)
+            attendee_set.add(attendance.attendeeID)
 
-        self.specifications = []
-        for speclink in self.context.specificationLinks(
+        model_specs = []
+        for spec in self.context.specifications(
             filter=[SpecificationFilter.ACCEPTED]):
-            spec = speclink.specification
 
             # skip sprints with no priority or less than low:
-            if (spec.priority is None or
-                spec.priority < SpecificationPriority.UNDEFINED):
+            if spec.priority < SpecificationPriority.UNDEFINED:
                 continue
 
             if (spec.definition_status not in
@@ -465,24 +468,35 @@
                  SpecificationDefinitionStatus.DISCUSSION,
                  SpecificationDefinitionStatus.DRAFT]):
                 continue
+            model_specs.append(spec)
 
+        people = defaultdict(dict)
+        # Attendees per specification
+        for subscription in load_referencing(SpecificationSubscription,
+                model_specs, 'specificationID'):
+            if subscription.personID not in attendee_set:
+                continue
+            people[subscription.specificationID][subscription.personID] = \
+                subscription.essential
+        # Spec specials - drafter/assignee. Don't need approver for performance
+        # as specifications() above eager loaded the people, and approvers
+        # don't count as a 'required person'.
+        for spec in model_specs:
             # get the list of attendees that will attend the sprint
-            is_required = dict((sub.person, sub.essential)
-                               for sub in spec.subscriptions)
-            interested = set(is_required.keys()).intersection(attendee_set)
-            if spec.assignee is not None:
-                interested.add(spec.assignee)
-                is_required[spec.assignee] = True
-            if spec.drafter is not None:
-                interested.add(spec.drafter)
-                is_required[spec.drafter] = True
-            interested = [dict(name=person.name,
-                               required=is_required[person])
-                          for person in interested]
-
-            self.specifications.append(dict(
-                spec=spec,
-                interested=interested))
+            spec_people = people[spec.id]
+            if spec.assigneeID is not None:
+                spec_people[spec.assigneeID] = True
+                attendee_set.add(spec.assigneeID)
+            if spec.drafterID is not None:
+                spec_people[spec.drafterID] = True
+                attendee_set.add(spec.drafterID)
+        people_by_id = dict((person.id, person) for person in
+            getUtility(IPersonSet).getPrecachedPersonsFromIDs(attendee_set))
+        self.specifications = [
+            dict(spec=spec, interested=[
+                    dict(name=people_by_id[person_id].name, required=required)
+                    for (person_id, required) in people[spec.id].items()]
+                ) for spec in model_specs]
 
     def render(self):
         self.request.response.setHeader('content-type',

=== modified file 'lib/lp/blueprints/model/specification.py'
--- lib/lp/blueprints/model/specification.py	2011-03-16 08:41:17 +0000
+++ lib/lp/blueprints/model/specification.py	2011-05-20 05:41:33 +0000
@@ -182,7 +182,7 @@
     date_started = UtcDateTimeCol(notNull=False, default=None)
 
     # useful joins
-    subscriptions = SQLMultipleJoin('SpecificationSubscription',
+    _subscriptions = SQLMultipleJoin('SpecificationSubscription',
         joinColumn='specification', orderBy='id')
     subscribers = SQLRelatedJoin('Person',
         joinColumn='specification', otherColumn='person',

=== modified file 'lib/lp/services/database/bulk.py'
--- lib/lp/services/database/bulk.py	2011-04-01 14:46:21 +0000
+++ lib/lp/services/database/bulk.py	2011-05-20 05:41:33 +0000
@@ -6,6 +6,7 @@
 __metaclass__ = type
 __all__ = [
     'load',
+    'load_referencing',
     'load_related',
     'reload',
     ]
@@ -13,7 +14,9 @@
 
 from collections import defaultdict
 from functools import partial
+from operator import attrgetter
 
+from storm.expr import Or
 from storm.info import get_cls_info
 from storm.store import Store
 from zope.security.proxy import removeSecurityProxy
@@ -64,14 +67,23 @@
         list(query)
 
 
-def load(object_type, primary_keys, store=None):
-    """Load a large number of objects efficiently."""
+def _primary_key(object_type):
+    """Get a primary key our helpers can use.
+    
+    :raises AssertionError if the key is missing or unusable.
+    """
     primary_key = get_cls_info(object_type).primary_key
     if len(primary_key) != 1:
         raise AssertionError(
             "Compound primary keys are not supported: %s." %
             object_type.__name__)
-    primary_key_column = primary_key[0]
+    return primary_key[0]
+
+
+def load(object_type, primary_keys, store=None):
+    """Load a large number of objects efficiently."""
+    primary_key = _primary_key(object_type)
+    primary_key_column = primary_key
     primary_keys = set(primary_keys)
     primary_keys.discard(None)
     if not primary_keys:
@@ -82,6 +94,39 @@
     return list(store.find(object_type, condition))
 
 
+def load_referencing(object_type, owning_objects, reference_keys):
+    """Load objects of object_type that reference owning_objects.
+    
+    Note that complex types like Person are best loaded through dedicated
+    helpers that can eager load other related things (e.g. validity for
+    Person).
+
+    :param object_type: The object type to load - e.g. BranchSubscription.
+    :param owning_objects: The objects which are referenced. E.g. [Branch()]
+        At this point, all the objects should be of the same type, but that
+        constraint could be lifted in future.
+    :param reference_keys: A list of attributes that should be used to select
+        object_type keys. e.g. ['branchID']
+    :return: A list of object_type where any of reference_keys refered to the
+        primary key of any of owning_objects.
+    """
+    store = IStore(object_type)
+    if type(owning_objects) not in (list, tuple):
+        owning_objects = tuple(owning_objects)
+    if not owning_objects:
+        return []
+    exemplar = owning_objects[0]
+    primary_key = _primary_key(get_type(exemplar))
+    attribute = primary_key.name
+    ids = set(map(attrgetter(attribute), owning_objects))
+    conditions = []
+    # Note to future self doing perf tuning: may want to make ids a WITH
+    # clause.
+    for column in map(partial(getattr, object_type), reference_keys):
+        conditions.append(column.is_in(ids))
+    return list(store.find(object_type, Or(conditions)))
+
+
 def load_related(object_type, owning_objects, foreign_keys):
     """Load objects of object_type referred to by owning_objects.
 

=== modified file 'lib/lp/services/database/tests/test_bulk.py'
--- lib/lp/services/database/tests/test_bulk.py	2011-03-29 00:11:57 +0000
+++ lib/lp/services/database/tests/test_bulk.py	2011-05-20 05:41:33 +0000
@@ -21,6 +21,7 @@
     )
 from canonical.testing.layers import DatabaseFunctionalLayer
 from lp.bugs.model.bug import BugAffectsPerson
+from lp.code.model.branchsubscription import BranchSubscription
 from lp.registry.model.person import Person
 from lp.services.database import bulk
 from lp.soyuz.model.component import Component
@@ -202,4 +203,14 @@
         self.assertEqual(expected,
             set(bulk.load_related(Person, owning_objects, ['ownerID'])))
 
-
+    def test_load_referencing(self):
+        owned_objects = [
+            self.factory.makeBranch(),
+            self.factory.makeBranch(),
+            ]
+        expected = set(list(owned_objects[0].subscriptions) + 
+            list(owned_objects[1].subscriptions))
+        self.assertNotEqual(0, len(expected))
+        self.assertEqual(expected,
+            set(bulk.load_referencing(BranchSubscription, owned_objects,
+                ['branchID'])))