← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:update-pkgcache-disabled-archive into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:update-pkgcache-disabled-archive into launchpad:master.

Commit message:
Fix update-pkgcache crash with disabled archives

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/418113

If an archive is disabled while `update-pkgcache` is running, then it can cause a crash, because `DistributionSourcePackageCache.updateAll` and `DistroSeriesPackageCache.updateAll` previously returned None rather than 0 in that case.

It's only possible to hit this bug if an archive is disabled between the time when `PackageCacheUpdater.main` fetches the list of archives for a given distribution and the time when `PackageCacheUpdater.updateDistributionCache` or `PackageCacheUpdater.updateDistroSeriesCache` is called for the newly-disabled archive (the fetched list of archives excludes disabled archives, but `update-pkgcache` may commit transactions between those points and so may see updated database state).  To simulate this situation, I had to restructure the script somewhat in order to be able to write in-process tests.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:update-pkgcache-disabled-archive into launchpad:master.
diff --git a/cronscripts/update-pkgcache.py b/cronscripts/update-pkgcache.py
index 0cea878..07dca6a 100755
--- a/cronscripts/update-pkgcache.py
+++ b/cronscripts/update-pkgcache.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python3 -S
 #
-# Copyright 2009-2021 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2022 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 # This script updates the cached source package information in the system.
@@ -9,110 +9,9 @@
 
 import _pythonpath  # noqa: F401
 
-from zope.component import getUtility
+from lp.soyuz.scripts.update_pkgcache import PackageCacheUpdater
 
-from lp.registry.interfaces.distribution import IDistributionSet
-from lp.services.scripts.base import LaunchpadCronScript
-from lp.soyuz.enums import ArchivePurpose
-from lp.soyuz.interfaces.archive import IArchiveSet
-from lp.soyuz.model.distributionsourcepackagecache import (
-    DistributionSourcePackageCache,
-    )
-from lp.soyuz.model.distroseriespackagecache import DistroSeriesPackageCache
 
-
-class PackageCacheUpdater(LaunchpadCronScript):
-    """Helper class for updating package caches.
-
-    It iterates over all distributions, distroseries and archives (including
-    PPAs) updating the package caches to reflect what is currently published
-    in those locations.
-    """
-
-    def updateDistributionPackageCounters(self, distribution):
-        """Update package counters for a given distribution."""
-        for distroseries in distribution:
-            distroseries.updatePackageCount()
-            self.txn.commit()
-            for arch in distroseries.architectures:
-                arch.updatePackageCount()
-                self.txn.commit()
-
-    def updateDistributionCache(self, distribution, archive):
-        """Update package caches for the given location.
-
-        'archive' can be one of the main archives (PRIMARY or PARTNER), a
-        PPA, or None to update caches of official branch links.
-
-        This method commits the transaction frequently since it deal with
-        a huge amount of data.
-
-        PPA archives caches are consolidated in a Archive row to optimize
-        searches across PPAs.
-        """
-        if archive is not None:
-            for distroseries in distribution.series:
-                self.updateDistroSeriesCache(distroseries, archive)
-
-        DistributionSourcePackageCache.removeOld(
-            distribution, archive, log=self.logger)
-
-        updates = DistributionSourcePackageCache.updateAll(
-            distribution, archive=archive, ztm=self.txn, log=self.logger)
-
-        if updates > 0:
-            self.txn.commit()
-
-    def updateDistroSeriesCache(self, distroseries, archive):
-        """Update package caches for the given location."""
-        self.logger.info('%s %s %s starting' % (
-            distroseries.distribution.name, distroseries.name,
-            archive.displayname))
-
-        DistroSeriesPackageCache.removeOld(
-            distroseries, archive=archive, log=self.logger)
-
-        updates = DistroSeriesPackageCache.updateAll(
-            distroseries, archive=archive, ztm=self.txn, log=self.logger)
-
-        if updates > 0:
-            self.txn.commit()
-
-    def main(self):
-        self.logger.debug('Starting the package cache update')
-
-        # Do the package counter and cache update for each distribution.
-        distroset = getUtility(IDistributionSet)
-        for distribution in distroset:
-            self.logger.info(
-                'Updating %s package counters' % distribution.name)
-            self.updateDistributionPackageCounters(distribution)
-
-            self.logger.info(
-                'Updating %s main archives' % distribution.name)
-            for archive in distribution.all_distro_archives:
-                self.updateDistributionCache(distribution, archive)
-
-            self.logger.info(
-                'Updating %s official branch links' % distribution.name)
-            self.updateDistributionCache(distribution, None)
-
-            self.logger.info(
-                'Updating %s PPAs' % distribution.name)
-            archives = getUtility(IArchiveSet).getArchivesForDistribution(
-                distribution, purposes=[ArchivePurpose.PPA],
-                check_permissions=False, exclude_pristine=True)
-            for archive in archives:
-                self.updateDistributionCache(distribution, archive)
-                archive.updateArchiveCache()
-
-            # Commit any remaining update for a distribution.
-            self.txn.commit()
-            self.logger.info('%s done' % distribution.name)
-
-        self.logger.debug('Finished the package cache update')
-
-if __name__ == '__main__':
-    script = PackageCacheUpdater(
-        'update-cache', dbuser="update-pkg-cache")
+if __name__ == "__main__":
+    script = PackageCacheUpdater("update-cache", dbuser="update-pkg-cache")
     script.lock_and_run()
diff --git a/lib/lp/soyuz/model/distributionsourcepackagecache.py b/lib/lp/soyuz/model/distributionsourcepackagecache.py
index 0d1fdcc..e7ee3e6 100644
--- a/lib/lp/soyuz/model/distributionsourcepackagecache.py
+++ b/lib/lp/soyuz/model/distributionsourcepackagecache.py
@@ -267,7 +267,7 @@ class DistributionSourcePackageCache(SQLBase):
         """
         # Do not create cache entries for disabled archives.
         if archive is not None and not archive.enabled:
-            return
+            return 0
 
         # Get the set of source package names to deal with.
         spns = list(sorted(
diff --git a/lib/lp/soyuz/model/distroseriespackagecache.py b/lib/lp/soyuz/model/distroseriespackagecache.py
index 6ea1989..38dd52a 100644
--- a/lib/lp/soyuz/model/distroseriespackagecache.py
+++ b/lib/lp/soyuz/model/distroseriespackagecache.py
@@ -196,7 +196,7 @@ class DistroSeriesPackageCache(SQLBase):
         """
         # Do not create cache entries for disabled archives.
         if not archive.enabled:
-            return
+            return 0
 
         # Get the set of package names to deal with.
         bpns = list(sorted(
diff --git a/lib/lp/soyuz/scripts/tests/test_update_pkgcache.py b/lib/lp/soyuz/scripts/tests/test_update_pkgcache.py
new file mode 100644
index 0000000..f4dba55
--- /dev/null
+++ b/lib/lp/soyuz/scripts/tests/test_update_pkgcache.py
@@ -0,0 +1,37 @@
+# Copyright 2022 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Test update-pkgcache."""
+
+import transaction
+
+from lp.services.log.logger import BufferLogger
+from lp.soyuz.scripts.update_pkgcache import PackageCacheUpdater
+from lp.testing import TestCaseWithFactory
+from lp.testing.layers import ZopelessDatabaseLayer
+
+
+# XXX cjwatson 2022-04-01: Most functional tests currently live in
+# lib/lp/soyuz/doc/package-cache-script.txt, but should be moved here.
+class TestPackageCacheUpdater(TestCaseWithFactory):
+
+    layer = ZopelessDatabaseLayer
+    dbuser = "update-pkg-cache"
+
+    def makeScript(self):
+        script = PackageCacheUpdater(test_args=[])
+        script.logger = BufferLogger()
+        script.txn = transaction
+        return script
+
+    def test_archive_disabled_during_run(self):
+        distribution = self.factory.makeDistribution()
+        archives = [
+            self.factory.makeArchive(distribution=distribution)
+            for _ in range(2)]
+        for archive in archives:
+            self.factory.makeSourcePackagePublishingHistory(archive=archive)
+        script = self.makeScript()
+        script.updateDistributionCache(distribution, archives[0])
+        archives[1].disable()
+        script.updateDistributionCache(distribution, archives[1])
diff --git a/lib/lp/soyuz/scripts/update_pkgcache.py b/lib/lp/soyuz/scripts/update_pkgcache.py
new file mode 100644
index 0000000..ae13027
--- /dev/null
+++ b/lib/lp/soyuz/scripts/update_pkgcache.py
@@ -0,0 +1,113 @@
+# Copyright 2009-2022 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Update cache source package information.
+
+We use this for fast source package searching (as opposed to joining through
+gazillions of publishing tables).
+"""
+
+__all__ = "PackageCacheUpdater"
+
+from zope.component import getUtility
+
+from lp.registry.interfaces.distribution import IDistributionSet
+from lp.services.scripts.base import LaunchpadCronScript
+from lp.soyuz.enums import ArchivePurpose
+from lp.soyuz.interfaces.archive import IArchiveSet
+from lp.soyuz.model.distributionsourcepackagecache import (
+    DistributionSourcePackageCache,
+    )
+from lp.soyuz.model.distroseriespackagecache import DistroSeriesPackageCache
+
+
+class PackageCacheUpdater(LaunchpadCronScript):
+    """Helper class for updating package caches.
+
+    It iterates over all distributions, distroseries and archives (including
+    PPAs) updating the package caches to reflect what is currently published
+    in those locations.
+    """
+
+    def updateDistributionPackageCounters(self, distribution):
+        """Update package counters for a given distribution."""
+        for distroseries in distribution:
+            distroseries.updatePackageCount()
+            self.txn.commit()
+            for arch in distroseries.architectures:
+                arch.updatePackageCount()
+                self.txn.commit()
+
+    def updateDistributionCache(self, distribution, archive):
+        """Update package caches for the given location.
+
+        'archive' can be one of the main archives (PRIMARY or PARTNER), a
+        PPA, or None to update caches of official branch links.
+
+        This method commits the transaction frequently since it deal with
+        a huge amount of data.
+
+        PPA archives caches are consolidated in a Archive row to optimize
+        searches across PPAs.
+        """
+        if archive is not None:
+            for distroseries in distribution.series:
+                self.updateDistroSeriesCache(distroseries, archive)
+
+        DistributionSourcePackageCache.removeOld(
+            distribution, archive, log=self.logger)
+
+        updates = DistributionSourcePackageCache.updateAll(
+            distribution, archive=archive, ztm=self.txn, log=self.logger)
+
+        if updates > 0:
+            self.txn.commit()
+
+    def updateDistroSeriesCache(self, distroseries, archive):
+        """Update package caches for the given location."""
+        self.logger.info('%s %s %s starting' % (
+            distroseries.distribution.name, distroseries.name,
+            archive.displayname))
+
+        DistroSeriesPackageCache.removeOld(
+            distroseries, archive=archive, log=self.logger)
+
+        updates = DistroSeriesPackageCache.updateAll(
+            distroseries, archive=archive, ztm=self.txn, log=self.logger)
+
+        if updates > 0:
+            self.txn.commit()
+
+    def main(self):
+        self.logger.debug('Starting the package cache update')
+
+        # Do the package counter and cache update for each distribution.
+        distroset = getUtility(IDistributionSet)
+        for distribution in distroset:
+            self.logger.info(
+                'Updating %s package counters' % distribution.name)
+            self.updateDistributionPackageCounters(distribution)
+
+            self.logger.info(
+                'Updating %s main archives' % distribution.name)
+            for archive in distribution.all_distro_archives:
+                self.updateDistributionCache(distribution, archive)
+
+            self.logger.info(
+                'Updating %s official branch links' % distribution.name)
+            self.updateDistributionCache(distribution, None)
+
+            self.logger.info(
+                'Updating %s PPAs' % distribution.name)
+            archives = getUtility(IArchiveSet).getArchivesForDistribution(
+                distribution, purposes=[ArchivePurpose.PPA],
+                check_permissions=False, exclude_pristine=True)
+            for archive in archives:
+                self.updateDistributionCache(distribution, archive)
+                archive.updateArchiveCache()
+
+            # Commit any remaining update for a distribution.
+            self.txn.commit()
+            self.logger.info('%s done' % distribution.name)
+
+        self.logger.debug('Finished the package cache update')