← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wgrant/launchpad/faster-gCSR into lp:launchpad

 

William Grant has proposed merging lp:~wgrant/launchpad/faster-gCSR into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1021118 in Launchpad itself: "Distribution.getCurrentSourceReleases is slow"
  https://bugs.launchpad.net/launchpad/+bug/1021118

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/faster-gCSR/+merge/113538

This branch fixes bug #1021118 by refactoring DistributionSet.getCurrentSourceReleases. It was very similar to DistroSeries.getCurrentSourceReleases, which was already fixed, so I Stormified and merged most of the logic into a single function.
-- 
https://code.launchpad.net/~wgrant/launchpad/faster-gCSR/+merge/113538
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/faster-gCSR into lp:launchpad.
=== modified file 'lib/lp/registry/model/distribution.py'
--- lib/lp/registry/model/distribution.py	2012-06-08 02:57:12 +0000
+++ lib/lp/registry/model/distribution.py	2012-07-05 10:41:20 +0000
@@ -11,10 +11,7 @@
     ]
 
 import itertools
-from operator import (
-    attrgetter,
-    itemgetter,
-    )
+from operator import itemgetter
 
 from sqlobject import (
     BoolCol,
@@ -23,8 +20,7 @@
     StringCol,
     )
 from sqlobject.sqlbuilder import SQLConstant
-from storm.info import ClassAlias
-from storm.locals import (
+from storm.expr import (
     And,
     Desc,
     Join,
@@ -32,13 +28,13 @@
     Or,
     SQL,
     )
+from storm.info import ClassAlias
 from storm.store import Store
 from zope.component import getUtility
 from zope.interface import (
     alsoProvides,
     implements,
     )
-from zope.security.proxy import removeSecurityProxy
 
 from lp.answers.enums import QUESTION_STATUS_DEFAULT_SEARCH
 from lp.answers.interfaces.faqtarget import IFAQTarget
@@ -178,7 +174,6 @@
 from lp.soyuz.interfaces.archivepermission import IArchivePermissionSet
 from lp.soyuz.interfaces.binarypackagebuild import IBinaryPackageBuildSet
 from lp.soyuz.interfaces.buildrecords import IHasBuildRecords
-from lp.soyuz.interfaces.publishing import active_publishing_status
 from lp.soyuz.model.archive import Archive
 from lp.soyuz.model.binarypackagename import BinaryPackageName
 from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
@@ -191,6 +186,7 @@
     )
 from lp.soyuz.model.publishing import (
     BinaryPackagePublishingHistory,
+    get_current_source_releases,
     SourcePackagePublishingHistory,
     )
 from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
@@ -1763,55 +1759,18 @@
 
     def getCurrentSourceReleases(self, distro_source_packagenames):
         """See `IDistributionSet`."""
-        # Builds one query for all the distro_source_packagenames.
-        # This may need tuning: its possible that grouping by the common
-        # archives may yield better efficiency: the current code is
-        # just a direct push-down of the previous in-python lookup to SQL.
-        series_clauses = []
-        distro_lookup = {}
-        for distro, package_names in distro_source_packagenames.items():
-            source_package_ids = map(attrgetter('id'), package_names)
-            # all_distro_archive_ids is just a list of ints, but it gets
-            # wrapped anyway - and sqlvalues goes boom.
-            archives = removeSecurityProxy(
-                distro.all_distro_archive_ids)
-            clause = """(spr.sourcepackagename IN %s AND
-                spph.archive IN %s AND
-                ds.distribution = %s)
-                """ % sqlvalues(source_package_ids, archives, distro.id)
-            series_clauses.append(clause)
-            distro_lookup[distro.id] = distro
-        if not len(series_clauses):
-            return {}
-        combined_clause = "(" + " OR ".join(series_clauses) + ")"
-
-        releases = IStore(SourcePackageRelease).find(
-            (SourcePackageRelease, Distribution.id), SQL("""
-                (SourcePackageRelease.id, Distribution.id) IN (
-                    SELECT DISTINCT ON (
-                        spr.sourcepackagename, ds.distribution)
-                        spr.id, ds.distribution
-                    FROM
-                        SourcePackageRelease AS spr,
-                        SourcePackagePublishingHistory AS spph,
-                        DistroSeries AS ds
-                    WHERE
-                        spph.sourcepackagerelease = spr.id
-                        AND spph.distroseries = ds.id
-                        AND spph.status IN %s
-                        AND %s
-                    ORDER BY
-                        spr.sourcepackagename, ds.distribution, spph.id DESC
-                    )
-                """
-                % (sqlvalues(active_publishing_status) + (combined_clause,))))
+        releases = get_current_source_releases(
+            distro_source_packagenames,
+            lambda distro: distro.all_distro_archive_ids,
+            lambda distro: DistroSeries.distribution == distro,
+            [SourcePackagePublishingHistory.distroseriesID
+                == DistroSeries.id],
+            DistroSeries.distributionID)
         result = {}
-        for sp_release, distro_id in releases:
-            distro = distro_lookup[distro_id]
-            sourcepackage = distro.getSourcePackage(
-                sp_release.sourcepackagename)
-            result[sourcepackage] = DistributionSourcePackageRelease(
-                distro, sp_release)
+        for spr, distro_id in releases:
+            distro = getUtility(IDistributionSet).get(distro_id)
+            result[distro.getSourcePackage(spr.sourcepackagename)] = (
+                DistributionSourcePackageRelease(distro, spr))
         return result
 
     def getDerivedDistributions(self):

=== modified file 'lib/lp/registry/model/distroseries.py'
--- lib/lp/registry/model/distroseries.py	2012-06-29 08:40:05 +0000
+++ lib/lp/registry/model/distroseries.py	2012-07-05 10:41:20 +0000
@@ -15,7 +15,6 @@
 import collections
 from cStringIO import StringIO
 import logging
-from operator import attrgetter
 
 import apt_pkg
 from sqlobject import (
@@ -39,7 +38,6 @@
     )
 from zope.component import getUtility
 from zope.interface import implements
-from zope.security.proxy import removeSecurityProxy
 
 from lp.app.enums import service_uses_launchpad
 from lp.app.errors import NotFoundError
@@ -162,6 +160,7 @@
     )
 from lp.soyuz.model.publishing import (
     BinaryPackagePublishingHistory,
+    get_current_source_releases,
     SourcePackagePublishingHistory,
     )
 from lp.soyuz.model.queue import (
@@ -1767,57 +1766,17 @@
 
     def getCurrentSourceReleases(self, distro_series_source_packagenames):
         """See `IDistroSeriesSet`."""
-        # Builds one query for all the distro_series_source_packagenames.
-        # This may need tuning: its possible that grouping by the common
-        # archives may yield better efficiency: the current code is
-        # just a direct push-down of the previous in-python lookup to SQL.
-        series_clauses = []
-        distroseries_lookup = {}
-        for distroseries, package_names in \
-            distro_series_source_packagenames.items():
-            source_package_ids = map(attrgetter('id'), package_names)
-            # all_distro_archive_ids is just a list of ints, but it gets
-            # wrapped anyway - and sqlvalues goes boom.
-            archives = removeSecurityProxy(
-                distroseries.distribution.all_distro_archive_ids)
-            clause = """(spph.sourcepackagename IN %s AND
-                spph.archive IN %s AND
-                spph.distroseries = %s)
-                """ % sqlvalues(source_package_ids, archives, distroseries.id)
-            series_clauses.append(clause)
-            distroseries_lookup[distroseries.id] = distroseries
-        if not len(series_clauses):
-            return {}
-        combined_clause = "(" + " OR ".join(series_clauses) + ")"
-
-        releases = IStore(SourcePackageRelease).find(
-            (SourcePackageRelease, DistroSeries.id), SQL("""
-                (SourcePackageRelease.id, DistroSeries.id) IN (
-                    SELECT
-                        DISTINCT ON (
-                            spph.sourcepackagename, spph.distroseries)
-                        spr.id, spph.distroseries
-                    FROM
-                        SourcePackageRelease AS spr,
-                        SourcePackagePublishingHistory AS spph
-                    WHERE
-                        spph.sourcepackagerelease = spr.id
-                        AND spph.status IN %s
-                        AND %s
-                    ORDER BY
-                        spph.sourcepackagename,
-                        spph.distroseries,
-                        spph.id DESC
-                    )
-                """
-                % (sqlvalues(active_publishing_status) + (combined_clause,))))
+        releases = get_current_source_releases(
+            distro_series_source_packagenames,
+            lambda series: series.distribution.all_distro_archive_ids,
+            (lambda series:
+                SourcePackagePublishingHistory.distroseries == series),
+            [], SourcePackagePublishingHistory.distroseriesID)
         result = {}
-        for sp_release, distroseries_id in releases:
-            distroseries = distroseries_lookup[distroseries_id]
-            sourcepackage = distroseries.getSourcePackage(
-                sp_release.sourcepackagename)
-            result[sourcepackage] = DistroSeriesSourcePackageRelease(
-                distroseries, sp_release)
+        for spr, series_id in releases:
+            series = getUtility(IDistroSeriesSet).get(series_id)
+            result[series.getSourcePackage(spr.sourcepackagename)] = (
+                DistroSeriesSourcePackageRelease(series, spr))
         return result
 
     def search(self, distribution=None, isreleased=None, orderBy=None):

=== modified file 'lib/lp/soyuz/model/publishing.py'
--- lib/lp/soyuz/model/publishing.py	2012-06-29 08:40:05 +0000
+++ lib/lp/soyuz/model/publishing.py	2012-07-05 10:41:20 +0000
@@ -6,10 +6,11 @@
 __metaclass__ = type
 
 __all__ = [
-    'makePoolPath',
     'BinaryPackageFilePublishing',
     'BinaryPackagePublishingHistory',
+    'get_current_source_releases',
     'IndexStanzaFields',
+    'makePoolPath',
     'PublishingSet',
     'SourcePackageFilePublishing',
     'SourcePackagePublishingHistory',
@@ -53,7 +54,10 @@
 from lp.services.database.datetimecol import UtcDateTimeCol
 from lp.services.database.decoratedresultset import DecoratedResultSet
 from lp.services.database.enumcol import EnumCol
-from lp.services.database.lpstorm import IMasterStore
+from lp.services.database.lpstorm import (
+    IMasterStore,
+    IStore,
+    )
 from lp.services.database.sqlbase import SQLBase
 from lp.services.librarian.browser import ProxiedLibraryFileAlias
 from lp.services.librarian.model import (
@@ -2079,3 +2083,47 @@
             return ancestries[0]
         except IndexError:
             return None
+
+
+def get_current_source_releases(context_sourcepackagenames, archive_ids_func,
+                                package_clause_func, extra_clauses, key_col):
+    """Get the current source package releases in a context.
+
+    You probably don't want to use this directly; try
+    (Distribution|DistroSeries)(Set)?.getCurrentSourceReleases instead.
+    """
+    # Builds one query for all the distro_source_packagenames.
+    # This may need tuning: its possible that grouping by the common
+    # archives may yield better efficiency: the current code is
+    # just a direct push-down of the previous in-python lookup to SQL.
+    from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
+
+    series_clauses = []
+    for context, package_names in context_sourcepackagenames.items():
+        clause = And(
+            SourcePackagePublishingHistory.sourcepackagenameID.is_in(
+                map(operator.attrgetter('id'), package_names)),
+            SourcePackagePublishingHistory.archiveID.is_in(
+                archive_ids_func(context)),
+            package_clause_func(context),
+            )
+        series_clauses.append(clause)
+    if not len(series_clauses):
+        return {}
+
+    releases = IStore(SourcePackageRelease).find(
+        (SourcePackageRelease, key_col),
+        SourcePackagePublishingHistory.sourcepackagereleaseID
+            == SourcePackageRelease.id,
+        SourcePackagePublishingHistory.status.is_in(
+            active_publishing_status),
+        Or(*series_clauses),
+        *extra_clauses).config(
+            distinct=(
+                SourcePackageRelease.sourcepackagenameID,
+                key_col)
+        ).order_by(
+            SourcePackageRelease.sourcepackagenameID,
+            key_col,
+            Desc(SourcePackagePublishingHistory.id))
+    return releases


Follow ups