← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:update-pkgcache-binary-race into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:update-pkgcache-binary-race into launchpad:master.

Commit message:
Fix update-pkgcache crash due to binary deletion

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #2023698 in Launchpad itself: "update-pkgcache crashing with OOPS"
  https://bugs.launchpad.net/launchpad/+bug/2023698

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/444768

The `update-pkgcache` script started crashing on production about a week ago, and mysteriously started working again today without us changing anything.  Based on code inspection, the only way I can see for this particular crash to happen is if a binary was deleted between `DistroSeriesPackageCache.findCurrentBinaryPackageNames` and `DistroSeriesPackageCache._update` (`update-pkgcache` is slow enough that it makes many commits along the way, so this is possible), and simulating that in a test reproduces the same crash.

It seems safe to skip the affected binary in such cases, since `DistroSeriesPackageCache.removeOld` will eventually clean up its cache entries.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:update-pkgcache-binary-race into launchpad:master.
diff --git a/lib/lp/soyuz/model/distroseriespackagecache.py b/lib/lp/soyuz/model/distroseriespackagecache.py
index bedc2fd..93d3b3e 100644
--- a/lib/lp/soyuz/model/distroseriespackagecache.py
+++ b/lib/lp/soyuz/model/distroseriespackagecache.py
@@ -77,6 +77,10 @@ class DistroSeriesPackageCache(StormBase):
                 ),
             )
             .config(distinct=True)
+            # Not necessary for correctness, but useful for testability; and
+            # at the time of writing the sort only adds perhaps 10-20 ms to
+            # the query time on staging.
+            .order_by(BinaryPackagePublishingHistory.binarypackagenameID)
         )
         return bulk.load(BinaryPackageName, bpn_ids)
 
@@ -204,6 +208,14 @@ class DistroSeriesPackageCache(StormBase):
             )
 
         for bpn in binarypackagenames:
+            if bpn not in details_map:
+                log.debug(
+                    "No active publishing details found for %s; perhaps "
+                    "removed in parallel with update-pkgcache?  Skipping.",
+                    bpn.name,
+                )
+                continue
+
             cache = cache_map[bpn]
             details = details_map[bpn]
             # make sure the cached name, summary and description are correct
diff --git a/lib/lp/soyuz/scripts/tests/test_update_pkgcache.py b/lib/lp/soyuz/scripts/tests/test_update_pkgcache.py
index 875e956..5560b57 100644
--- a/lib/lp/soyuz/scripts/tests/test_update_pkgcache.py
+++ b/lib/lp/soyuz/scripts/tests/test_update_pkgcache.py
@@ -6,8 +6,11 @@
 import transaction
 
 from lp.services.log.logger import BufferLogger
+from lp.soyuz.enums import PackagePublishingStatus
+from lp.soyuz.model.distroseriespackagecache import DistroSeriesPackageCache
 from lp.soyuz.scripts.update_pkgcache import PackageCacheUpdater
 from lp.testing import TestCaseWithFactory
+from lp.testing.dbuser import dbuser
 from lp.testing.layers import ZopelessDatabaseLayer
 
 
@@ -34,10 +37,61 @@ class TestPackageCacheUpdater(TestCaseWithFactory):
             self.assertEqual(0, archive.sources_cached)
             self.factory.makeSourcePackagePublishingHistory(archive=archive)
         script = self.makeScript()
-        script.updateDistributionCache(distribution, archives[0])
-        archives[0].updateArchiveCache()
+        with dbuser(self.dbuser):
+            script.updateDistributionCache(distribution, archives[0])
+            archives[0].updateArchiveCache()
         self.assertEqual(1, archives[0].sources_cached)
         archives[1].disable()
-        script.updateDistributionCache(distribution, archives[1])
-        archives[1].updateArchiveCache()
+        with dbuser(self.dbuser):
+            script.updateDistributionCache(distribution, archives[1])
+            archives[1].updateArchiveCache()
         self.assertEqual(0, archives[1].sources_cached)
+
+    def test_binary_deleted_during_run(self):
+        distroseries = self.factory.makeDistroSeries()
+        das = self.factory.makeDistroArchSeries(distroseries=distroseries)
+        archive = self.factory.makeArchive(
+            distribution=distroseries.distribution
+        )
+        bpns = [self.factory.makeBinaryPackageName() for _ in range(4)]
+        bpphs = [
+            self.factory.makeBinaryPackagePublishingHistory(
+                binarypackagename=bpn,
+                distroarchseries=das,
+                status=PackagePublishingStatus.PUBLISHED,
+                archive=archive,
+            )
+            for bpn in bpns
+        ]
+
+        class InstrumentedTransaction:
+            def commit(self):
+                transaction.commit()
+                # Change this binary package publishing history's status
+                # after the first commit, to simulate the situation where a
+                # BPPH ceases to be active part-way through an
+                # update-pkgcache run.
+                if bpphs[2].status == PackagePublishingStatus.PUBLISHED:
+                    with dbuser("launchpad"):
+                        bpphs[2].requestDeletion(archive.owner)
+
+        logger = BufferLogger()
+        with dbuser(self.dbuser):
+            DistroSeriesPackageCache.updateAll(
+                distroseries,
+                archive=archive,
+                ztm=InstrumentedTransaction(),
+                log=logger,
+                commit_chunk=2,
+            )
+            archive.updateArchiveCache()
+        self.assertEqual(4, archive.binaries_cached)
+        self.assertEqual(
+            "DEBUG Considering binaries {bpns[0].name}, {bpns[1].name}\n"
+            "DEBUG Committing\n"
+            "DEBUG Considering binaries {bpns[2].name}, {bpns[3].name}\n"
+            "DEBUG No active publishing details found for {bpns[2].name};"
+            " perhaps removed in parallel with update-pkgcache?  Skipping.\n"
+            "DEBUG Committing\n".format(bpns=bpns),
+            logger.getLogBuffer(),
+        )