← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wallyworld/launchpad/ppa-packages-timeout-1071581 into lp:launchpad

 

Ian Booth has proposed merging lp:~wallyworld/launchpad/ppa-packages-timeout-1071581 into lp:launchpad.

Commit message:
Introduce a new reporting table (populated by a frequent garbo job) to allow efficient loading of source package releases related to a person.

Requested reviews:
  Curtis Hovey (sinzui): code
Related bugs:
  Bug #1071581 in Launchpad itself: "+ppa-packages timeout"
  https://bugs.launchpad.net/launchpad/+bug/1071581

For more details, see:
https://code.launchpad.net/~wallyworld/launchpad/ppa-packages-timeout-1071581/+merge/132646

== Pre Implementation ==

Discussed with wgrant.

== Implementation ==

So Postgres cannot efficiently execute the required DISTINCT ON query needed to only find the latest published packages. 
After a previous attempt to solve this, the be3st approach is to introduce a new denormalised reporting table and populate the table with a garbo job. The data does not have to be super up-to-date, so a frequently run garbo job will suffice.

== Tests ==


== Lint ==

Checking for conflicts and issues in changed files.

Linting changed files:
  database/schema/security.cfg
  lib/lp/registry/browser/person.py
  lib/lp/registry/model/person.py
  lib/lp/scripts/garbo.py
  lib/lp/soyuz/configure.zcml
  lib/lp/soyuz/interfaces/reporting.py
  lib/lp/soyuz/model/reporting.py

-- 
https://code.launchpad.net/~wallyworld/launchpad/ppa-packages-timeout-1071581/+merge/132646
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
=== modified file 'database/schema/security.cfg'
--- database/schema/security.cfg	2012-11-01 16:14:00 +0000
+++ database/schema/security.cfg	2012-11-02 04:34:19 +0000
@@ -217,6 +217,7 @@
 public.karmatotalcache                  = SELECT, DELETE, UPDATE
 public.language                         = SELECT
 public.languagepack                     = SELECT, INSERT, UPDATE
+public.latestpersonsourcepackagereleasecache = SELECT
 public.launchpadstatistic               = SELECT
 public.libraryfilealias                 = SELECT, INSERT, UPDATE, DELETE
 public.libraryfiledownloadcount         = SELECT, INSERT, UPDATE
@@ -2255,8 +2256,10 @@
 public.codeimportresult                 = SELECT, DELETE
 public.commercialsubscription           = SELECT, UPDATE
 public.emailaddress                     = SELECT, UPDATE, DELETE
+public.garbojobstate                    = SELECT, INSERT, UPDATE, DELETE
 public.hwsubmission                     = SELECT, UPDATE
 public.job                              = SELECT, INSERT, DELETE
+public.latestpersonsourcepackagereleasecache = SELECT, INSERT, UPDATE
 public.logintoken                       = SELECT, DELETE
 public.mailinglistsubscription          = SELECT, DELETE
 public.milestonetag                     = SELECT

=== modified file 'lib/lp/registry/browser/person.py'
--- lib/lp/registry/browser/person.py	2012-10-26 07:15:49 +0000
+++ lib/lp/registry/browser/person.py	2012-11-02 04:34:19 +0000
@@ -3654,17 +3654,17 @@
         builds_by_package = {}
         needs_build_by_package = {}
         for package in package_releases:
-            builds_by_package[package] = []
-            needs_build_by_package[package] = False
+            builds_by_package[package.id] = []
+            needs_build_by_package[package.id] = False
         for build in all_builds:
             if build.status == BuildStatus.FAILEDTOBUILD:
-                builds_by_package[build.source_package_release].append(build)
+                builds_by_package[build.source_package_release.id].append(build)
             needs_build = build.status in [
                 BuildStatus.NEEDSBUILD,
                 BuildStatus.MANUALDEPWAIT,
                 BuildStatus.CHROOTWAIT,
                 ]
-            needs_build_by_package[build.source_package_release] = needs_build
+            needs_build_by_package[build.source_package_release.id] = needs_build
 
         return (builds_by_package, needs_build_by_package)
 
@@ -3675,8 +3675,8 @@
 
         return [
             SourcePackageReleaseWithStats(
-                package, builds_by_package[package],
-                needs_build_by_package[package])
+                package, builds_by_package[package.id],
+                needs_build_by_package[package.id])
             for package in package_releases]
 
     def _addStatsToPublishings(self, publishings):
@@ -3689,8 +3689,8 @@
 
         return [
             SourcePackagePublishingHistoryWithStats(
-                spph, builds_by_package[spph.sourcepackagerelease],
-                needs_build_by_package[spph.sourcepackagerelease])
+                spph, builds_by_package[spph.sourcepackagerelease.id],
+                needs_build_by_package[spph.sourcepackagerelease.id])
             for spph in filtered_spphs]
 
     def setUpBatch(self, packages):

=== modified file 'lib/lp/registry/model/person.py'
--- lib/lp/registry/model/person.py	2012-11-01 20:33:27 +0000
+++ lib/lp/registry/model/person.py	2012-11-02 04:34:19 +0000
@@ -327,6 +327,7 @@
     Archive,
     validate_ppa,
     )
+from lp.soyuz.model.reporting import LatestPersonSourcepackageReleaseCache
 from lp.soyuz.model.publishing import SourcePackagePublishingHistory
 from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
 from lp.translations.model.hastranslationimports import (
@@ -2810,8 +2811,8 @@
         return self._latestReleasesQuery(uploader_only=True, ppa_only=True)
 
     def _releasesQueryFilter(self, uploader_only=False, ppa_only=False):
-        """Return the filter used to find sourcepackagereleases (SPRs)
-        related to this person.
+        """Return the filter used to find latest published source package
+        releases (SPRs) related to this person.
 
         :param uploader_only: controls if we are interested in SPRs where
             the person in question is only the uploader (creator) and not the
@@ -2827,24 +2828,27 @@
         'uploader_only' because there shouldn't be any sense of maintainership
         for packages uploaded to PPAs by someone else than the user himself.
         """
-        clauses = [SourcePackageRelease.upload_archive == Archive.id]
-
+        clauses = []
         if uploader_only:
-            clauses.append(SourcePackageRelease.creator == self)
-
+            clauses.append(
+                LatestPersonSourcepackageReleaseCache.creator == self)
         if ppa_only:
             # Source maintainer is irrelevant for PPA uploads.
             pass
         elif uploader_only:
-            clauses.append(SourcePackageRelease.maintainer != self)
+            clauses.append(
+                LatestPersonSourcepackageReleaseCache.maintainer != self)
         else:
-            clauses.append(SourcePackageRelease.maintainer == self)
-
+            clauses.append(
+                LatestPersonSourcepackageReleaseCache.maintainer == self)
         if ppa_only:
-            clauses.append(Archive.purpose == ArchivePurpose.PPA)
+            clauses.append(
+                LatestPersonSourcepackageReleaseCache.archive_purpose ==
+                ArchivePurpose.PPA)
         else:
-            clauses.append(Archive.purpose != ArchivePurpose.PPA)
-
+            clauses.append(
+                LatestPersonSourcepackageReleaseCache.archive_purpose !=
+                ArchivePurpose.PPA)
         return clauses
 
     def _hasReleasesQuery(self, uploader_only=False, ppa_only=False):
@@ -2852,50 +2856,26 @@
         See `_releasesQueryFilter` for details on the criteria used.
         """
         clauses = self._releasesQueryFilter(uploader_only, ppa_only)
-        spph = ClassAlias(SourcePackagePublishingHistory, "spph")
-        tables = (
-            SourcePackageRelease,
-            Join(
-                spph, spph.sourcepackagereleaseID == SourcePackageRelease.id),
-            Join(Archive, Archive.id == spph.archiveID))
-        rs = Store.of(self).using(*tables).find(
-            SourcePackageRelease.id, clauses)
+        rs = Store.of(self).using(LatestPersonSourcepackageReleaseCache).find(
+            LatestPersonSourcepackageReleaseCache.publication_id, clauses)
         return not rs.is_empty()
 
     def _latestReleasesQuery(self, uploader_only=False, ppa_only=False):
-        """Return the sourcepackagereleases (SPRs) related to this person.
+        """Return the sourcepackagereleases records related to this person.
         See `_releasesQueryFilter` for details on the criteria used."""
         clauses = self._releasesQueryFilter(uploader_only, ppa_only)
-        spph = ClassAlias(SourcePackagePublishingHistory, "spph")
         rs = Store.of(self).find(
-            SourcePackageRelease,
-            SourcePackageRelease.id.is_in(
-                Select(
-                    SourcePackageRelease.id,
-                    tables=[
-                        SourcePackageRelease,
-                        Join(
-                            spph,
-                            spph.sourcepackagereleaseID ==
-                            SourcePackageRelease.id),
-                        Join(Archive, Archive.id == spph.archiveID)],
-                    where=And(*clauses),
-                    order_by=[SourcePackageRelease.upload_distroseriesID,
-                              SourcePackageRelease.sourcepackagenameID,
-                              SourcePackageRelease.upload_archiveID,
-                              Desc(SourcePackageRelease.dateuploaded)],
-                    distinct=(
-                        SourcePackageRelease.upload_distroseriesID,
-                        SourcePackageRelease.sourcepackagenameID,
-                        SourcePackageRelease.upload_archiveID)))
-        ).order_by(
-            Desc(SourcePackageRelease.dateuploaded), SourcePackageRelease.id)
+            LatestPersonSourcepackageReleaseCache, *clauses).order_by(
+            Desc(LatestPersonSourcepackageReleaseCache.dateuploaded))
 
         def load_related_objects(rows):
             list(getUtility(IPersonSet).getPrecachedPersonsFromIDs(
-                set(map(attrgetter("maintainerID"), rows))))
-            bulk.load_related(SourcePackageName, rows, ['sourcepackagenameID'])
-            bulk.load_related(Archive, rows, ['upload_archiveID'])
+                set(map(attrgetter("maintainer_id"), rows))))
+            bulk.load_related(
+                SourcePackageName, rows, ['sourcepackagename_id'])
+            bulk.load_related(
+                SourcePackageRelease, rows, ['sourcepackagerelease_id'])
+            bulk.load_related(Archive, rows, ['upload_archive_id'])
 
         return DecoratedResultSet(rs, pre_iter_hook=load_related_objects)
 

=== modified file 'lib/lp/scripts/garbo.py'
--- lib/lp/scripts/garbo.py	2012-10-22 02:30:44 +0000
+++ lib/lp/scripts/garbo.py	2012-11-02 04:34:19 +0000
@@ -17,6 +17,7 @@
 import logging
 import multiprocessing
 import os
+import simplejson
 import threading
 import time
 
@@ -28,11 +29,16 @@
 from psycopg2 import IntegrityError
 import pytz
 from storm.expr import (
+    Alias,
+    And,
+    Desc,
     In,
     Like,
     Select,
     Update,
+    Join,
     )
+from storm.info import ClassAlias
 from storm.locals import (
     Max,
     Min,
@@ -105,6 +111,10 @@
     )
 from lp.services.session.model import SessionData
 from lp.services.verification.model.logintoken import LoginToken
+from lp.soyuz.model.archive import Archive
+from lp.soyuz.model.publishing import SourcePackagePublishingHistory
+from lp.soyuz.model.reporting import LatestPersonSourcepackageReleaseCache
+from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
 from lp.translations.interfaces.potemplate import IPOTemplateSet
 from lp.translations.model.potmsgset import POTMsgSet
 from lp.translations.model.potranslation import POTranslation
@@ -423,6 +433,103 @@
         transaction.commit()
 
 
+class PopulateLatestPersonSourcepackageReleaseCache(TunableLoop):
+    """Populate the LatestPersonSourcepackageReleaseCache table."""
+    maximum_chunk_size = 5000
+
+    def __init__(self, log, abort_time=None):
+        super_cl = super(PopulateLatestPersonSourcepackageReleaseCache, self)
+        super_cl.__init__(log, abort_time)
+        self.store = getUtility(IStoreSelector).get(MAIN_STORE, MASTER_FLAVOR)
+        self.next_id = 0
+        self.job_name = self.__class__.__name__
+        job_data = self.store.execute(
+            "SELECT json_data FROM GarboJobState WHERE name = '%s'"
+            % self.job_name).get_one()
+        if job_data:
+            json_data = simplejson.loads(job_data[0])
+            self.next_id = json_data['next_id']
+        else:
+            json_data = simplejson.dumps({'next_id': 0})
+            self.store.execute(
+                "INSERT INTO GarboJobState(name, json_data) "
+                "VALUES ('%s', '%s')" % (self.job_name, json_data))
+
+    def getPendingUpdates(self):
+        spph = ClassAlias(SourcePackagePublishingHistory, "spph")
+        origin = [
+            SourcePackageRelease,
+            Join(
+                spph,
+                And(spph.sourcepackagereleaseID == SourcePackageRelease.id,
+                    spph.archiveID == SourcePackageRelease.upload_archiveID))]
+        spr_select = self.store.using(*origin).find(
+            (SourcePackageRelease.id, Alias(spph.id, 'spph_id')),
+            SourcePackageRelease.upload_archiveID == spph.archiveID,
+            SourcePackageRelease.id > self.next_id
+        ).order_by(
+            SourcePackageRelease.upload_distroseriesID,
+            SourcePackageRelease.sourcepackagenameID,
+            SourcePackageRelease.upload_archiveID,
+            Desc(SourcePackageRelease.dateuploaded),
+            SourcePackageRelease.id
+        ).config(distinct=(
+            SourcePackageRelease.upload_distroseriesID,
+            SourcePackageRelease.sourcepackagenameID,
+            SourcePackageRelease.upload_archiveID))._get_select()
+
+        spr = Alias(spr_select, 'spr')
+        origin = [
+            SourcePackageRelease,
+            Join(spr, SQL('spr.id') == SourcePackageRelease.id),
+            Join(Archive, Archive.id == SourcePackageRelease.upload_archiveID)]
+        rs = self.store.using(*origin).find(
+            (SourcePackageRelease.id,
+            SourcePackageRelease.creatorID, SourcePackageRelease.maintainerID,
+            SourcePackageRelease.upload_archiveID,
+            Archive.purpose,
+            SourcePackageRelease.sourcepackagenameID,
+            SourcePackageRelease.upload_distroseriesID,
+            SourcePackageRelease.dateuploaded, SQL('spph_id'))
+        ).order_by(SourcePackageRelease.id)
+        return rs
+
+    def isDone(self):
+        return self.getPendingUpdates().count() == 0
+
+    def update_cache(self, spr_id, creator_id, maintainer_id, archive_id,
+                     purpose, spn_id, distroseries_id, dateuploaded, spph_id):
+        lpr = LatestPersonSourcepackageReleaseCache()
+        lpr.publication = spph_id
+        lpr.upload_distroseries_id = distroseries_id
+        lpr.archive_purpose = purpose
+        lpr.creator_id = creator_id
+        lpr.maintainer_id = maintainer_id
+        lpr.sourcepackagename_id = spn_id
+        lpr.sourcepackagerelease = spr_id
+        lpr.upload_archive = archive_id
+        lpr.dateuploaded = dateuploaded
+        self.store.add(lpr)
+
+    def __call__(self, chunk_size):
+        max_id = 0
+        for (spr_id, creator_id, maintainer_id, archive_id, purpose,
+             distroseries_id, spn_id, dateuploaded, spph_id) in (
+            self.getPendingUpdates()[:chunk_size]):
+            self.update_cache(
+                spr_id, creator_id, maintainer_id, archive_id, purpose,
+                distroseries_id, spn_id, dateuploaded, spph_id)
+            max_id = spr_id
+
+        self.next_id = max_id
+        self.store.flush()
+        json_data = simplejson.dumps({'next_id': max_id})
+        self.store.execute(
+            "UPDATE GarboJobState SET json_data = '%s' WHERE name = '%s'"
+            % (json_data, self.job_name))
+        transaction.commit()
+
+
 class OpenIDConsumerNoncePruner(TunableLoop):
     """An ITunableLoop to prune old OpenIDConsumerNonce records.
 
@@ -1339,6 +1446,7 @@
         OpenIDConsumerAssociationPruner,
         AntiqueSessionPruner,
         VoucherRedeemer,
+        PopulateLatestPersonSourcepackageReleaseCache
         ]
     experimental_tunable_loops = []
 

=== modified file 'lib/lp/soyuz/configure.zcml'
--- lib/lp/soyuz/configure.zcml	2012-09-28 14:48:20 +0000
+++ lib/lp/soyuz/configure.zcml	2012-11-02 04:34:19 +0000
@@ -1002,6 +1002,12 @@
       <allow interface="lp.soyuz.adapters.overrides.IOverridePolicy" />
     </class>
 
+    <class
+        class="lp.soyuz.model.reporting.LatestPersonSourcepackageReleaseCache">
+        <allow
+            interface="lp.soyuz.interfaces.reporting.ILatestPersonSourcepackageReleaseCache"/>
+    </class>
+
     <!-- ProcessAcceptedBugsJobSource -->
     <securedutility
 	component=".model.processacceptedbugsjob.ProcessAcceptedBugsJob"

=== added file 'lib/lp/soyuz/interfaces/reporting.py'
--- lib/lp/soyuz/interfaces/reporting.py	1970-01-01 00:00:00 +0000
+++ lib/lp/soyuz/interfaces/reporting.py	2012-11-02 04:34:19 +0000
@@ -0,0 +1,25 @@
+# Copyright 2012 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+__metaclass__ = type
+__all__ = [
+    'ILatestPersonSourcepackageReleaseCache',
+    ]
+
+
+from zope.interface import Attribute
+from lp.soyuz.interfaces.sourcepackagerelease import ISourcePackageRelease
+
+
+class ILatestPersonSourcepackageReleaseCache(ISourcePackageRelease):
+    """Published source package release information for a person.
+
+    The records represented by this object are the latest published source
+    package releases for a given sourcepackage, distroseries, archive.
+    """
+
+    id = Attribute("The id of the associated SourcePackageRelease.")
+    sourcepackagerelease = Attribute(
+        "The SourcePackageRelease which this object represents.")
+    publication = Attribute(
+        "The publication record for the associated SourcePackageRelease.")

=== added file 'lib/lp/soyuz/model/reporting.py'
--- lib/lp/soyuz/model/reporting.py	1970-01-01 00:00:00 +0000
+++ lib/lp/soyuz/model/reporting.py	2012-11-02 04:34:19 +0000
@@ -0,0 +1,51 @@
+# Copyright 2012 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+__metaclass__ = type
+__all__ = [
+    'LatestPersonSourcepackageReleaseCache',
+    ]
+
+from lazr.delegates import delegates
+from storm.base import Storm
+from storm.locals import (
+    Int,
+    Reference,
+    )
+from storm.properties import DateTime
+from zope.interface import implements
+
+from lp.services.database.enumcol import EnumCol
+from lp.soyuz.enums import ArchivePurpose
+from lp.soyuz.interfaces.reporting import (
+    ILatestPersonSourcepackageReleaseCache,
+    )
+from lp.soyuz.interfaces.sourcepackagerelease import ISourcePackageRelease
+
+
+class LatestPersonSourcepackageReleaseCache(Storm):
+    """See `LatestPersonSourcepackageReleaseCache`."""
+    implements(ILatestPersonSourcepackageReleaseCache)
+    delegates(ISourcePackageRelease, context='sourcepackagerelease')
+
+    __storm_table__ = 'LatestPersonSourcepackageReleaseCache'
+
+    id = Int(name='id', primary=True)
+    publication_id = Int(name='publication')
+    publication = Reference(
+        publication_id, 'SourcePackagePublishingHistory.id')
+    dateuploaded = DateTime(name='date_uploaded')
+    creator_id = Int(name='creator')
+    creator = Reference(creator_id, 'Person.id')
+    maintainer_id = Int(name='maintainer')
+    maintainer = Reference(maintainer_id, 'Person.id')
+    upload_archive_id = Int(name='upload_archive')
+    upload_archive = Reference(upload_archive_id, 'Archive.id')
+    archive_purpose = EnumCol(schema=ArchivePurpose)
+    upload_distroseries_id = Int(name='upload_distroseries')
+    upload_distroseries = Reference(upload_distroseries_id, 'DistroSeries.id')
+    sourcepackagename_id = Int(name='sourcepackagename')
+    sourcepackagename = Reference(sourcepackagename_id, 'SourcePackageName.id')
+    sourcepackagerelease_id = Int(name='sourcepackagerelease')
+    sourcepackagerelease = Reference(
+        sourcepackagerelease_id, 'SourcePackageRelease.id')


Follow ups