← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wallyworld/launchpad/ppa-packages-timeout-1071581 into lp:launchpad

 

Ian Booth has proposed merging lp:~wallyworld/launchpad/ppa-packages-timeout-1071581 into lp:launchpad.

Commit message:
Improve the query used to find person related software

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1071581 in Launchpad itself: "+ppa-packages timeout"
  https://bugs.launchpad.net/launchpad/+bug/1071581

For more details, see:
https://code.launchpad.net/~wallyworld/launchpad/ppa-packages-timeout-1071581/+merge/132236

== Pre Implementation ==

Discussed with wgrant.

== Implementation ==

So Postgres cannot efficiently execute the required DISTINCT ON query needed to only find the latest published packages. So we need to dumb down the query and perform the grouping in Python. The functionality is split into 2 parts:
1. Perform a query with the required WHERE conditions, iterate over the result in batches, and gather the required SourcePackageRelease ids.
2. Set up a result set using SourcePackageRelease.id IN (...) and return this one. Thus client batching and pre iteration and eager loading of only the batched records' information works as before.

New indexes on SourcePackageRelease are required - these can be added live, so are included in this mp. The first 2 are for the queries to find the spr ids to include, the last is for the query which loads the records.

== Tests ==

Internal change - use existing tests.

== Lint ==

Checking for conflicts and issues in changed files.

Linting changed files:
  database/schema/patch-2209-38-0.sql
  lib/lp/registry/browser/person.py
  lib/lp/registry/model/person.py
-- 
https://code.launchpad.net/~wallyworld/launchpad/ppa-packages-timeout-1071581/+merge/132236
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wallyworld/launchpad/ppa-packages-timeout-1071581 into lp:launchpad.
=== added file 'database/schema/patch-2209-38-0.sql'
--- database/schema/patch-2209-38-0.sql	1970-01-01 00:00:00 +0000
+++ database/schema/patch-2209-38-0.sql	2012-10-31 02:48:18 +0000
@@ -0,0 +1,12 @@
+-- Copyright 2012 Canonical Ltd.  This software is licensed under the
+-- GNU Affero General Public License version 3 (see the file LICENSE).
+
+SET client_min_messages=ERROR;
+
+CREATE INDEX creator__dateuploaded__id__idx ON sourcepackagerelease USING btree (creator, dateuploaded DESC, id) WHERE (dateuploaded IS NOT NULL);
+
+CREATE INDEX maintainer__dateuploaded__id__idx ON sourcepackagerelease USING btree (maintainer, dateuploaded DESC, id) WHERE (dateuploaded IS NOT NULL);
+
+CREATE INDEX dateuploaded__id__idx ON sourcepackagerelease USING btree (dateuploaded DESC, id) WHERE (dateuploaded IS NOT NULL);
+
+INSERT INTO LaunchpadDatabaseRevision VALUES (2209, 38, 0);

=== modified file 'lib/lp/registry/browser/person.py'
--- lib/lp/registry/browser/person.py	2012-10-26 07:15:49 +0000
+++ lib/lp/registry/browser/person.py	2012-10-31 02:48:18 +0000
@@ -138,12 +138,8 @@
     LaunchpadRadioWidgetWithDescription,
     )
 from lp.bugs.interfaces.bugsupervisor import IHasBugSupervisor
-from lp.bugs.interfaces.bugtask import (
-    BugTaskStatus,
-    IBugTaskSet,
-    )
+from lp.bugs.interfaces.bugtask import BugTaskStatus
 from lp.bugs.interfaces.bugtasksearch import BugTaskSearchParams
-from lp.bugs.model.bugtask import BugTaskSet
 from lp.buildmaster.enums import BuildStatus
 from lp.code.browser.sourcepackagerecipelisting import HasRecipesMenuMixin
 from lp.code.errors import InvalidNamespace
@@ -3470,14 +3466,8 @@
         is_driver, is_bugsupervisor.
         """
         projects = []
-        user = getUtility(ILaunchBag).user
         max_projects = self.max_results_to_display
         pillarnames = self._related_projects[:max_projects]
-        products = [pillarname.pillar for pillarname in pillarnames
-                    if IProduct.providedBy(pillarname.pillar)]
-        bugtask_set = getUtility(IBugTaskSet)
-        product_bugtask_counts = bugtask_set.getOpenBugTasksPerProduct(
-            user, products)
         for pillarname in pillarnames:
             pillar = pillarname.pillar
             project = {}
@@ -3599,7 +3589,8 @@
         Results are filtered according to the permission of the requesting
         user to see private archives.
         """
-        packages = self.context.getLatestUploadedPPAPackages()
+        packages = self.context.getLatestUploadedPPAPackages(
+            self.max_results_to_display)
         results, header_message = self._getDecoratedPackagesSummary(packages)
         self.ppa_packages_header_message = header_message
         return self.filterPPAPackageList(results)
@@ -3607,7 +3598,8 @@
     @property
     def latest_maintained_packages_with_stats(self):
         """Return the latest maintained packages, including stats."""
-        packages = self.context.getLatestMaintainedPackages()
+        packages = self.context.getLatestMaintainedPackages(
+            self.max_results_to_display)
         results, header_message = self._getDecoratedPackagesSummary(packages)
         self.maintained_packages_header_message = header_message
         return results
@@ -3618,7 +3610,8 @@
 
         Don't include packages that are maintained by the user.
         """
-        packages = self.context.getLatestUploadedButNotMaintainedPackages()
+        packages = self.context.getLatestUploadedButNotMaintainedPackages(
+            self.max_results_to_display)
         results, header_message = self._getDecoratedPackagesSummary(packages)
         self.uploaded_packages_header_message = header_message
         return results

=== modified file 'lib/lp/registry/model/person.py'
--- lib/lp/registry/model/person.py	2012-10-26 18:23:39 +0000
+++ lib/lp/registry/model/person.py	2012-10-31 02:48:18 +0000
@@ -2796,17 +2796,18 @@
         """See `IPerson`."""
         return self._hasReleasesQuery(uploader_only=True, ppa_only=True)
 
-    def getLatestMaintainedPackages(self):
-        """See `IPerson`."""
-        return self._latestReleasesQuery()
-
-    def getLatestUploadedButNotMaintainedPackages(self):
-        """See `IPerson`."""
-        return self._latestReleasesQuery(uploader_only=True)
-
-    def getLatestUploadedPPAPackages(self):
-        """See `IPerson`."""
-        return self._latestReleasesQuery(uploader_only=True, ppa_only=True)
+    def getLatestMaintainedPackages(self, max_results=None):
+        """See `IPerson`."""
+        return self._latestReleasesQuery(max_results)
+
+    def getLatestUploadedButNotMaintainedPackages(self, max_results=None):
+        """See `IPerson`."""
+        return self._latestReleasesQuery(max_results, uploader_only=True)
+
+    def getLatestUploadedPPAPackages(self, max_results=None):
+        """See `IPerson`."""
+        return self._latestReleasesQuery(
+            max_results, uploader_only=True, ppa_only=True)
 
     def _releasesQueryFilter(self, uploader_only=False, ppa_only=False):
         """Return the filter used to find sourcepackagereleases (SPRs)
@@ -2826,7 +2827,16 @@
         'uploader_only' because there shouldn't be any sense of maintainership
         for packages uploaded to PPAs by someone else than the user himself.
         """
-        clauses = [SourcePackageRelease.upload_archive == Archive.id]
+
+        clauses = [
+            Exists(
+                Select(1,
+                    And(SourcePackagePublishingHistory.sourcepackagerelease ==
+                        SourcePackageRelease.id,
+                        SourcePackagePublishingHistory.archiveID ==
+                        SourcePackageRelease.upload_archiveID),
+                    SourcePackagePublishingHistory)),
+            SourcePackageRelease.upload_archive == Archive.id]
 
         if uploader_only:
             clauses.append(SourcePackageRelease.creator == self)
@@ -2851,51 +2861,61 @@
         See `_releasesQueryFilter` for details on the criteria used.
         """
         clauses = self._releasesQueryFilter(uploader_only, ppa_only)
-        spph = ClassAlias(SourcePackagePublishingHistory, "spph")
         tables = (
             SourcePackageRelease,
-            Join(
-                spph, spph.sourcepackagereleaseID == SourcePackageRelease.id),
-            Join(Archive, Archive.id == spph.archiveID))
+            Join(Archive, Archive.id == SourcePackageRelease.upload_archiveID))
         rs = Store.of(self).using(*tables).find(
             SourcePackageRelease.id, clauses)
         return not rs.is_empty()
 
-    def _latestReleasesQuery(self, uploader_only=False, ppa_only=False):
+    def _latestReleasesQuery(self, max_results=None, uploader_only=False,
+                             ppa_only=False):
         """Return the sourcepackagereleases (SPRs) related to this person.
         See `_releasesQueryFilter` for details on the criteria used."""
         clauses = self._releasesQueryFilter(uploader_only, ppa_only)
-        spph = ClassAlias(SourcePackagePublishingHistory, "spph")
+        clauses.append(Archive.id == SourcePackageRelease.upload_archiveID)
         rs = Store.of(self).find(
-            SourcePackageRelease,
-            SourcePackageRelease.id.is_in(
-                Select(
-                    SourcePackageRelease.id,
-                    tables=[
-                        SourcePackageRelease,
-                        Join(
-                            spph,
-                            spph.sourcepackagereleaseID ==
-                            SourcePackageRelease.id),
-                        Join(Archive, Archive.id == spph.archiveID)],
-                    where=And(*clauses),
-                    order_by=[SourcePackageRelease.upload_distroseriesID,
-                              SourcePackageRelease.sourcepackagenameID,
-                              SourcePackageRelease.upload_archiveID,
-                              Desc(SourcePackageRelease.dateuploaded)],
-                    distinct=(
-                        SourcePackageRelease.upload_distroseriesID,
-                        SourcePackageRelease.sourcepackagenameID,
-                        SourcePackageRelease.upload_archiveID)))
-        ).order_by(
+            (SourcePackageRelease.upload_distroseriesID,
+             SourcePackageRelease.upload_archiveID,
+             SourcePackageRelease.sourcepackagenameID,
+             SourcePackageRelease.id),
+             *clauses).order_by(
             Desc(SourcePackageRelease.dateuploaded), SourcePackageRelease.id)
 
+        # Postgres cannot efficiently perform the required DISTINCT ON query
+        # which is needed to find only the latest uploads. So we first need to
+        # iterate over the entire result set and do the grouping ourselves,
+        # recording the required ids. We'll do it in batches of 75.
+        ids = []
+        unique_releases = set()
+        start = 0
+        batch = 75
+        done = False
+        while not done:
+            done = True
+            for (distroseries, archive, spn, id) in rs[start:start + batch]:
+                done = False
+                key = (distroseries, archive, spn)
+                if key in unique_releases:
+                    continue
+                unique_releases.add(key)
+                ids.append(id)
+                if max_results and len(ids) >= max_results:
+                    break
+            start += batch
+        if not ids:
+            return []
+
         def load_related_objects(rows):
             list(getUtility(IPersonSet).getPrecachedPersonsFromIDs(
                 set(map(attrgetter("maintainerID"), rows))))
             bulk.load_related(SourcePackageName, rows, ['sourcepackagenameID'])
             bulk.load_related(Archive, rows, ['upload_archiveID'])
 
+        rs = Store.of(self).find(
+            SourcePackageRelease, SourcePackageRelease.id.is_in(ids)).order_by(
+            Desc(SourcePackageRelease.dateuploaded), SourcePackageRelease.id)
+
         return DecoratedResultSet(rs, pre_iter_hook=load_related_objects)
 
     def hasSynchronisedPublishings(self):


Follow ups