launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #13975
[Merge] lp:~wallyworld/launchpad/ppa-packages-timeout-1071581 into lp:launchpad
Ian Booth has proposed merging lp:~wallyworld/launchpad/ppa-packages-timeout-1071581 into lp:launchpad.
Commit message:
Introduce a new reporting table (populated by a frequent garbo job) to allow efficient loading of source package releases related to a person.
Requested reviews:
Curtis Hovey (sinzui): code
Related bugs:
Bug #1071581 in Launchpad itself: "+ppa-packages timeout"
https://bugs.launchpad.net/launchpad/+bug/1071581
For more details, see:
https://code.launchpad.net/~wallyworld/launchpad/ppa-packages-timeout-1071581/+merge/132646
== Pre Implementation ==
Discussed with wgrant.
== Implementation ==
So Postgres cannot efficiently execute the required DISTINCT ON query needed to only find the latest published packages.
After a previous attempt to solve this, the be3st approach is to introduce a new denormalised reporting table and populate the table with a garbo job. The data does not have to be super up-to-date, so a frequently run garbo job will suffice.
== Tests ==
== Lint ==
Checking for conflicts and issues in changed files.
Linting changed files:
database/schema/security.cfg
lib/lp/registry/browser/person.py
lib/lp/registry/model/person.py
lib/lp/scripts/garbo.py
lib/lp/soyuz/configure.zcml
lib/lp/soyuz/interfaces/reporting.py
lib/lp/soyuz/model/reporting.py
--
https://code.launchpad.net/~wallyworld/launchpad/ppa-packages-timeout-1071581/+merge/132646
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
=== modified file 'database/schema/security.cfg'
--- database/schema/security.cfg 2012-11-01 16:14:00 +0000
+++ database/schema/security.cfg 2012-11-02 04:34:19 +0000
@@ -217,6 +217,7 @@
public.karmatotalcache = SELECT, DELETE, UPDATE
public.language = SELECT
public.languagepack = SELECT, INSERT, UPDATE
+public.latestpersonsourcepackagereleasecache = SELECT
public.launchpadstatistic = SELECT
public.libraryfilealias = SELECT, INSERT, UPDATE, DELETE
public.libraryfiledownloadcount = SELECT, INSERT, UPDATE
@@ -2255,8 +2256,10 @@
public.codeimportresult = SELECT, DELETE
public.commercialsubscription = SELECT, UPDATE
public.emailaddress = SELECT, UPDATE, DELETE
+public.garbojobstate = SELECT, INSERT, UPDATE, DELETE
public.hwsubmission = SELECT, UPDATE
public.job = SELECT, INSERT, DELETE
+public.latestpersonsourcepackagereleasecache = SELECT, INSERT, UPDATE
public.logintoken = SELECT, DELETE
public.mailinglistsubscription = SELECT, DELETE
public.milestonetag = SELECT
=== modified file 'lib/lp/registry/browser/person.py'
--- lib/lp/registry/browser/person.py 2012-10-26 07:15:49 +0000
+++ lib/lp/registry/browser/person.py 2012-11-02 04:34:19 +0000
@@ -3654,17 +3654,17 @@
builds_by_package = {}
needs_build_by_package = {}
for package in package_releases:
- builds_by_package[package] = []
- needs_build_by_package[package] = False
+ builds_by_package[package.id] = []
+ needs_build_by_package[package.id] = False
for build in all_builds:
if build.status == BuildStatus.FAILEDTOBUILD:
- builds_by_package[build.source_package_release].append(build)
+ builds_by_package[build.source_package_release.id].append(build)
needs_build = build.status in [
BuildStatus.NEEDSBUILD,
BuildStatus.MANUALDEPWAIT,
BuildStatus.CHROOTWAIT,
]
- needs_build_by_package[build.source_package_release] = needs_build
+ needs_build_by_package[build.source_package_release.id] = needs_build
return (builds_by_package, needs_build_by_package)
@@ -3675,8 +3675,8 @@
return [
SourcePackageReleaseWithStats(
- package, builds_by_package[package],
- needs_build_by_package[package])
+ package, builds_by_package[package.id],
+ needs_build_by_package[package.id])
for package in package_releases]
def _addStatsToPublishings(self, publishings):
@@ -3689,8 +3689,8 @@
return [
SourcePackagePublishingHistoryWithStats(
- spph, builds_by_package[spph.sourcepackagerelease],
- needs_build_by_package[spph.sourcepackagerelease])
+ spph, builds_by_package[spph.sourcepackagerelease.id],
+ needs_build_by_package[spph.sourcepackagerelease.id])
for spph in filtered_spphs]
def setUpBatch(self, packages):
=== modified file 'lib/lp/registry/model/person.py'
--- lib/lp/registry/model/person.py 2012-11-01 20:33:27 +0000
+++ lib/lp/registry/model/person.py 2012-11-02 04:34:19 +0000
@@ -327,6 +327,7 @@
Archive,
validate_ppa,
)
+from lp.soyuz.model.reporting import LatestPersonSourcepackageReleaseCache
from lp.soyuz.model.publishing import SourcePackagePublishingHistory
from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
from lp.translations.model.hastranslationimports import (
@@ -2810,8 +2811,8 @@
return self._latestReleasesQuery(uploader_only=True, ppa_only=True)
def _releasesQueryFilter(self, uploader_only=False, ppa_only=False):
- """Return the filter used to find sourcepackagereleases (SPRs)
- related to this person.
+ """Return the filter used to find latest published source package
+ releases (SPRs) related to this person.
:param uploader_only: controls if we are interested in SPRs where
the person in question is only the uploader (creator) and not the
@@ -2827,24 +2828,27 @@
'uploader_only' because there shouldn't be any sense of maintainership
for packages uploaded to PPAs by someone else than the user himself.
"""
- clauses = [SourcePackageRelease.upload_archive == Archive.id]
-
+ clauses = []
if uploader_only:
- clauses.append(SourcePackageRelease.creator == self)
-
+ clauses.append(
+ LatestPersonSourcepackageReleaseCache.creator == self)
if ppa_only:
# Source maintainer is irrelevant for PPA uploads.
pass
elif uploader_only:
- clauses.append(SourcePackageRelease.maintainer != self)
+ clauses.append(
+ LatestPersonSourcepackageReleaseCache.maintainer != self)
else:
- clauses.append(SourcePackageRelease.maintainer == self)
-
+ clauses.append(
+ LatestPersonSourcepackageReleaseCache.maintainer == self)
if ppa_only:
- clauses.append(Archive.purpose == ArchivePurpose.PPA)
+ clauses.append(
+ LatestPersonSourcepackageReleaseCache.archive_purpose ==
+ ArchivePurpose.PPA)
else:
- clauses.append(Archive.purpose != ArchivePurpose.PPA)
-
+ clauses.append(
+ LatestPersonSourcepackageReleaseCache.archive_purpose !=
+ ArchivePurpose.PPA)
return clauses
def _hasReleasesQuery(self, uploader_only=False, ppa_only=False):
@@ -2852,50 +2856,26 @@
See `_releasesQueryFilter` for details on the criteria used.
"""
clauses = self._releasesQueryFilter(uploader_only, ppa_only)
- spph = ClassAlias(SourcePackagePublishingHistory, "spph")
- tables = (
- SourcePackageRelease,
- Join(
- spph, spph.sourcepackagereleaseID == SourcePackageRelease.id),
- Join(Archive, Archive.id == spph.archiveID))
- rs = Store.of(self).using(*tables).find(
- SourcePackageRelease.id, clauses)
+ rs = Store.of(self).using(LatestPersonSourcepackageReleaseCache).find(
+ LatestPersonSourcepackageReleaseCache.publication_id, clauses)
return not rs.is_empty()
def _latestReleasesQuery(self, uploader_only=False, ppa_only=False):
- """Return the sourcepackagereleases (SPRs) related to this person.
+ """Return the sourcepackagereleases records related to this person.
See `_releasesQueryFilter` for details on the criteria used."""
clauses = self._releasesQueryFilter(uploader_only, ppa_only)
- spph = ClassAlias(SourcePackagePublishingHistory, "spph")
rs = Store.of(self).find(
- SourcePackageRelease,
- SourcePackageRelease.id.is_in(
- Select(
- SourcePackageRelease.id,
- tables=[
- SourcePackageRelease,
- Join(
- spph,
- spph.sourcepackagereleaseID ==
- SourcePackageRelease.id),
- Join(Archive, Archive.id == spph.archiveID)],
- where=And(*clauses),
- order_by=[SourcePackageRelease.upload_distroseriesID,
- SourcePackageRelease.sourcepackagenameID,
- SourcePackageRelease.upload_archiveID,
- Desc(SourcePackageRelease.dateuploaded)],
- distinct=(
- SourcePackageRelease.upload_distroseriesID,
- SourcePackageRelease.sourcepackagenameID,
- SourcePackageRelease.upload_archiveID)))
- ).order_by(
- Desc(SourcePackageRelease.dateuploaded), SourcePackageRelease.id)
+ LatestPersonSourcepackageReleaseCache, *clauses).order_by(
+ Desc(LatestPersonSourcepackageReleaseCache.dateuploaded))
def load_related_objects(rows):
list(getUtility(IPersonSet).getPrecachedPersonsFromIDs(
- set(map(attrgetter("maintainerID"), rows))))
- bulk.load_related(SourcePackageName, rows, ['sourcepackagenameID'])
- bulk.load_related(Archive, rows, ['upload_archiveID'])
+ set(map(attrgetter("maintainer_id"), rows))))
+ bulk.load_related(
+ SourcePackageName, rows, ['sourcepackagename_id'])
+ bulk.load_related(
+ SourcePackageRelease, rows, ['sourcepackagerelease_id'])
+ bulk.load_related(Archive, rows, ['upload_archive_id'])
return DecoratedResultSet(rs, pre_iter_hook=load_related_objects)
=== modified file 'lib/lp/scripts/garbo.py'
--- lib/lp/scripts/garbo.py 2012-10-22 02:30:44 +0000
+++ lib/lp/scripts/garbo.py 2012-11-02 04:34:19 +0000
@@ -17,6 +17,7 @@
import logging
import multiprocessing
import os
+import simplejson
import threading
import time
@@ -28,11 +29,16 @@
from psycopg2 import IntegrityError
import pytz
from storm.expr import (
+ Alias,
+ And,
+ Desc,
In,
Like,
Select,
Update,
+ Join,
)
+from storm.info import ClassAlias
from storm.locals import (
Max,
Min,
@@ -105,6 +111,10 @@
)
from lp.services.session.model import SessionData
from lp.services.verification.model.logintoken import LoginToken
+from lp.soyuz.model.archive import Archive
+from lp.soyuz.model.publishing import SourcePackagePublishingHistory
+from lp.soyuz.model.reporting import LatestPersonSourcepackageReleaseCache
+from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
from lp.translations.interfaces.potemplate import IPOTemplateSet
from lp.translations.model.potmsgset import POTMsgSet
from lp.translations.model.potranslation import POTranslation
@@ -423,6 +433,103 @@
transaction.commit()
+class PopulateLatestPersonSourcepackageReleaseCache(TunableLoop):
+ """Populate the LatestPersonSourcepackageReleaseCache table."""
+ maximum_chunk_size = 5000
+
+ def __init__(self, log, abort_time=None):
+ super_cl = super(PopulateLatestPersonSourcepackageReleaseCache, self)
+ super_cl.__init__(log, abort_time)
+ self.store = getUtility(IStoreSelector).get(MAIN_STORE, MASTER_FLAVOR)
+ self.next_id = 0
+ self.job_name = self.__class__.__name__
+ job_data = self.store.execute(
+ "SELECT json_data FROM GarboJobState WHERE name = '%s'"
+ % self.job_name).get_one()
+ if job_data:
+ json_data = simplejson.loads(job_data[0])
+ self.next_id = json_data['next_id']
+ else:
+ json_data = simplejson.dumps({'next_id': 0})
+ self.store.execute(
+ "INSERT INTO GarboJobState(name, json_data) "
+ "VALUES ('%s', '%s')" % (self.job_name, json_data))
+
+ def getPendingUpdates(self):
+ spph = ClassAlias(SourcePackagePublishingHistory, "spph")
+ origin = [
+ SourcePackageRelease,
+ Join(
+ spph,
+ And(spph.sourcepackagereleaseID == SourcePackageRelease.id,
+ spph.archiveID == SourcePackageRelease.upload_archiveID))]
+ spr_select = self.store.using(*origin).find(
+ (SourcePackageRelease.id, Alias(spph.id, 'spph_id')),
+ SourcePackageRelease.upload_archiveID == spph.archiveID,
+ SourcePackageRelease.id > self.next_id
+ ).order_by(
+ SourcePackageRelease.upload_distroseriesID,
+ SourcePackageRelease.sourcepackagenameID,
+ SourcePackageRelease.upload_archiveID,
+ Desc(SourcePackageRelease.dateuploaded),
+ SourcePackageRelease.id
+ ).config(distinct=(
+ SourcePackageRelease.upload_distroseriesID,
+ SourcePackageRelease.sourcepackagenameID,
+ SourcePackageRelease.upload_archiveID))._get_select()
+
+ spr = Alias(spr_select, 'spr')
+ origin = [
+ SourcePackageRelease,
+ Join(spr, SQL('spr.id') == SourcePackageRelease.id),
+ Join(Archive, Archive.id == SourcePackageRelease.upload_archiveID)]
+ rs = self.store.using(*origin).find(
+ (SourcePackageRelease.id,
+ SourcePackageRelease.creatorID, SourcePackageRelease.maintainerID,
+ SourcePackageRelease.upload_archiveID,
+ Archive.purpose,
+ SourcePackageRelease.sourcepackagenameID,
+ SourcePackageRelease.upload_distroseriesID,
+ SourcePackageRelease.dateuploaded, SQL('spph_id'))
+ ).order_by(SourcePackageRelease.id)
+ return rs
+
+ def isDone(self):
+ return self.getPendingUpdates().count() == 0
+
+ def update_cache(self, spr_id, creator_id, maintainer_id, archive_id,
+ purpose, spn_id, distroseries_id, dateuploaded, spph_id):
+ lpr = LatestPersonSourcepackageReleaseCache()
+ lpr.publication = spph_id
+ lpr.upload_distroseries_id = distroseries_id
+ lpr.archive_purpose = purpose
+ lpr.creator_id = creator_id
+ lpr.maintainer_id = maintainer_id
+ lpr.sourcepackagename_id = spn_id
+ lpr.sourcepackagerelease = spr_id
+ lpr.upload_archive = archive_id
+ lpr.dateuploaded = dateuploaded
+ self.store.add(lpr)
+
+ def __call__(self, chunk_size):
+ max_id = 0
+ for (spr_id, creator_id, maintainer_id, archive_id, purpose,
+ distroseries_id, spn_id, dateuploaded, spph_id) in (
+ self.getPendingUpdates()[:chunk_size]):
+ self.update_cache(
+ spr_id, creator_id, maintainer_id, archive_id, purpose,
+ distroseries_id, spn_id, dateuploaded, spph_id)
+ max_id = spr_id
+
+ self.next_id = max_id
+ self.store.flush()
+ json_data = simplejson.dumps({'next_id': max_id})
+ self.store.execute(
+ "UPDATE GarboJobState SET json_data = '%s' WHERE name = '%s'"
+ % (json_data, self.job_name))
+ transaction.commit()
+
+
class OpenIDConsumerNoncePruner(TunableLoop):
"""An ITunableLoop to prune old OpenIDConsumerNonce records.
@@ -1339,6 +1446,7 @@
OpenIDConsumerAssociationPruner,
AntiqueSessionPruner,
VoucherRedeemer,
+ PopulateLatestPersonSourcepackageReleaseCache
]
experimental_tunable_loops = []
=== modified file 'lib/lp/soyuz/configure.zcml'
--- lib/lp/soyuz/configure.zcml 2012-09-28 14:48:20 +0000
+++ lib/lp/soyuz/configure.zcml 2012-11-02 04:34:19 +0000
@@ -1002,6 +1002,12 @@
<allow interface="lp.soyuz.adapters.overrides.IOverridePolicy" />
</class>
+ <class
+ class="lp.soyuz.model.reporting.LatestPersonSourcepackageReleaseCache">
+ <allow
+ interface="lp.soyuz.interfaces.reporting.ILatestPersonSourcepackageReleaseCache"/>
+ </class>
+
<!-- ProcessAcceptedBugsJobSource -->
<securedutility
component=".model.processacceptedbugsjob.ProcessAcceptedBugsJob"
=== added file 'lib/lp/soyuz/interfaces/reporting.py'
--- lib/lp/soyuz/interfaces/reporting.py 1970-01-01 00:00:00 +0000
+++ lib/lp/soyuz/interfaces/reporting.py 2012-11-02 04:34:19 +0000
@@ -0,0 +1,25 @@
+# Copyright 2012 Canonical Ltd. This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+__metaclass__ = type
+__all__ = [
+ 'ILatestPersonSourcepackageReleaseCache',
+ ]
+
+
+from zope.interface import Attribute
+from lp.soyuz.interfaces.sourcepackagerelease import ISourcePackageRelease
+
+
+class ILatestPersonSourcepackageReleaseCache(ISourcePackageRelease):
+ """Published source package release information for a person.
+
+ The records represented by this object are the latest published source
+ package releases for a given sourcepackage, distroseries, archive.
+ """
+
+ id = Attribute("The id of the associated SourcePackageRelease.")
+ sourcepackagerelease = Attribute(
+ "The SourcePackageRelease which this object represents.")
+ publication = Attribute(
+ "The publication record for the associated SourcePackageRelease.")
=== added file 'lib/lp/soyuz/model/reporting.py'
--- lib/lp/soyuz/model/reporting.py 1970-01-01 00:00:00 +0000
+++ lib/lp/soyuz/model/reporting.py 2012-11-02 04:34:19 +0000
@@ -0,0 +1,51 @@
+# Copyright 2012 Canonical Ltd. This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+__metaclass__ = type
+__all__ = [
+ 'LatestPersonSourcepackageReleaseCache',
+ ]
+
+from lazr.delegates import delegates
+from storm.base import Storm
+from storm.locals import (
+ Int,
+ Reference,
+ )
+from storm.properties import DateTime
+from zope.interface import implements
+
+from lp.services.database.enumcol import EnumCol
+from lp.soyuz.enums import ArchivePurpose
+from lp.soyuz.interfaces.reporting import (
+ ILatestPersonSourcepackageReleaseCache,
+ )
+from lp.soyuz.interfaces.sourcepackagerelease import ISourcePackageRelease
+
+
+class LatestPersonSourcepackageReleaseCache(Storm):
+ """See `LatestPersonSourcepackageReleaseCache`."""
+ implements(ILatestPersonSourcepackageReleaseCache)
+ delegates(ISourcePackageRelease, context='sourcepackagerelease')
+
+ __storm_table__ = 'LatestPersonSourcepackageReleaseCache'
+
+ id = Int(name='id', primary=True)
+ publication_id = Int(name='publication')
+ publication = Reference(
+ publication_id, 'SourcePackagePublishingHistory.id')
+ dateuploaded = DateTime(name='date_uploaded')
+ creator_id = Int(name='creator')
+ creator = Reference(creator_id, 'Person.id')
+ maintainer_id = Int(name='maintainer')
+ maintainer = Reference(maintainer_id, 'Person.id')
+ upload_archive_id = Int(name='upload_archive')
+ upload_archive = Reference(upload_archive_id, 'Archive.id')
+ archive_purpose = EnumCol(schema=ArchivePurpose)
+ upload_distroseries_id = Int(name='upload_distroseries')
+ upload_distroseries = Reference(upload_distroseries_id, 'DistroSeries.id')
+ sourcepackagename_id = Int(name='sourcepackagename')
+ sourcepackagename = Reference(sourcepackagename_id, 'SourcePackageName.id')
+ sourcepackagerelease_id = Int(name='sourcepackagerelease')
+ sourcepackagerelease = Reference(
+ sourcepackagerelease_id, 'SourcePackageRelease.id')
Follow ups