← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wgrant/launchpad/update-pkgcache-round1 into lp:launchpad

 

William Grant has proposed merging lp:~wgrant/launchpad/update-pkgcache-round1 into lp:launchpad.

Commit message:
Quick optimisation of update-pkgcache, issuing fewer queries and materialising fewer complete Storm objects. Roughly 4x faster on large archives.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/update-pkgcache-round1/+merge/226157

Quick optimisation of update-pkgcache, issuing fewer queries and materialising fewer complete Storm objects. Roughly 4x faster on large archives on dogfood (trusty's binaries drop from 15 minutes to less than four), and it'll be an even larger improvement on prod due to the DB latency.

More work is needed to get it down past O(series * (sources + binaries)), but this is a useful first step.
-- 
https://code.launchpad.net/~wgrant/launchpad/update-pkgcache-round1/+merge/226157
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/update-pkgcache-round1 into lp:launchpad.
=== modified file 'lib/lp/soyuz/doc/package-cache.txt'
--- lib/lp/soyuz/doc/package-cache.txt	2013-05-01 21:23:16 +0000
+++ lib/lp/soyuz/doc/package-cache.txt	2014-07-09 15:22:53 +0000
@@ -248,12 +248,14 @@
     ...     warty, archive=ubuntu.main_archive, ztm=transaction,
     ...     log=FakeLogger())
     DEBUG Considering binary 'at'
-    ...
     DEBUG Considering binary 'cdrkit'
     DEBUG Creating new binary cache entry.
-    ...
+    DEBUG Considering binary 'linux-2.6.12'
     DEBUG Considering binary 'mozilla-firefox'
-    ...
+    DEBUG Considering binary 'mozilla-firefox-data'
+    DEBUG Creating new binary cache entry.
+    DEBUG Considering binary 'pmount'
+
 
     >>> print updates
     6

=== modified file 'lib/lp/soyuz/model/distributionsourcepackagecache.py'
--- lib/lp/soyuz/model/distributionsourcepackagecache.py	2013-06-20 05:50:00 +0000
+++ lib/lp/soyuz/model/distributionsourcepackagecache.py	2014-07-09 15:22:53 +0000
@@ -22,6 +22,8 @@
 from lp.soyuz.interfaces.distributionsourcepackagecache import (
     IDistributionSourcePackageCache,
     )
+from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
+from lp.soyuz.model.binarypackagename import BinaryPackageName
 from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
 from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
 
@@ -172,16 +174,17 @@
             # to the set as the join would fail below.
             if spr.changelog_entry is not None:
                 sprchangelog.add(spr.changelog_entry)
-            binpkgs = BinaryPackageRelease.select("""
-                BinaryPackageRelease.build = BinaryPackageBuild.id AND
-                BinaryPackageBuild.source_package_release = %s
-                """ % sqlvalues(spr.id),
-                clauseTables=['BinaryPackageBuild'])
-            for binpkg in binpkgs:
-                log.debug("Considering binary '%s'" % binpkg.name)
-                binpkgnames.add(binpkg.name)
-                binpkgsummaries.add(binpkg.summary)
-                binpkgdescriptions.add(binpkg.description)
+        binpkgs = IStore(BinaryPackageRelease).find(
+            (BinaryPackageName.name, BinaryPackageRelease.summary,
+             BinaryPackageRelease.description),
+            BinaryPackageRelease.buildID == BinaryPackageBuild.id,
+            BinaryPackageBuild.source_package_release_id.is_in(
+                [spr.id for spr in sprs]),
+            BinaryPackageName.id == BinaryPackageRelease.binarypackagenameID)
+        for name, summary, description in binpkgs:
+            binpkgnames.add(name)
+            binpkgsummaries.add(summary)
+            binpkgdescriptions.add(description)
 
         # Update the caches.
         cache.binpkgnames = ' '.join(sorted(binpkgnames))

=== modified file 'lib/lp/soyuz/model/distroseriespackagecache.py'
--- lib/lp/soyuz/model/distroseriespackagecache.py	2013-06-20 05:50:00 +0000
+++ lib/lp/soyuz/model/distroseriespackagecache.py	2014-07-09 15:22:53 +0000
@@ -12,6 +12,7 @@
     )
 from storm.locals import (
     Desc,
+    Max,
     RawStr,
     )
 from zope.interface import implements
@@ -116,8 +117,9 @@
         (in full batches of 100 elements)
         """
         # get the set of published binarypackagereleases
-        bprs = IStore(BinaryPackageRelease).find(
-            BinaryPackageRelease,
+        details = list(IStore(BinaryPackageRelease).find(
+            (BinaryPackageRelease.summary, BinaryPackageRelease.description,
+             Max(BinaryPackageRelease.datecreated)),
             BinaryPackageRelease.id ==
                 BinaryPackagePublishingHistory.binarypackagereleaseID,
             BinaryPackagePublishingHistory.binarypackagename ==
@@ -126,11 +128,14 @@
                 DistroArchSeries.id,
             DistroArchSeries.distroseries == distroseries,
             BinaryPackagePublishingHistory.archive == archive,
-            BinaryPackagePublishingHistory.dateremoved == None)
-        bprs = bprs.order_by(Desc(BinaryPackageRelease.datecreated))
-        bprs = bprs.config(distinct=True)
+            BinaryPackagePublishingHistory.dateremoved == None
+            ).group_by(
+                BinaryPackageRelease.summary,
+                BinaryPackageRelease.description
+            ).order_by(
+                Desc(Max(BinaryPackageRelease.datecreated))))
 
-        if bprs.count() == 0:
+        if not details:
             log.debug("No binary releases found.")
             return
 
@@ -149,18 +154,17 @@
 
         # make sure the cached name, summary and description are correct
         cache.name = binarypackagename.name
-        cache.summary = bprs[0].summary
-        cache.description = bprs[0].description
+        cache.summary = details[0][0]
+        cache.description = details[0][1]
 
         # get the sets of binary package summaries, descriptions. there is
         # likely only one, but just in case...
 
         summaries = set()
         descriptions = set()
-        for bpr in bprs:
-            log.debug("Considering binary version %s" % bpr.version)
-            summaries.add(bpr.summary)
-            descriptions.add(bpr.description)
+        for summary, description, datecreated in details:
+            summaries.add(summary)
+            descriptions.add(description)
 
         # and update the caches
         cache.summaries = ' '.join(sorted(summaries))


Follow ups