← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/publisher-ftparchive-clean into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/publisher-ftparchive-clean into lp:launchpad.

Commit message:
Clean apt-ftparchive caches once a day.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/publisher-ftparchive-clean/+merge/173226

== Summary ==

"apt-ftparchive clean" needs to be run every so often to stop the cache files growing without bound.  This causes the publisher to do that once a day or so.

== Tests ==

bin/test -vvct lp.archivepublisher.tests

== Demo and Q/A ==

Run publisher on dogfood; observe cache size drastically decrease.  (It ought to, anyway.)
-- 
https://code.launchpad.net/~cjwatson/launchpad/publisher-ftparchive-clean/+merge/173226
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/publisher-ftparchive-clean into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/model/ftparchive.py'
--- lib/lp/archivepublisher/model/ftparchive.py	2013-06-21 01:55:44 +0000
+++ lib/lp/archivepublisher/model/ftparchive.py	2013-07-05 15:02:27 +0000
@@ -4,6 +4,7 @@
 from collections import defaultdict
 import os
 from StringIO import StringIO
+import time
 
 from storm.expr import (
     Desc,
@@ -108,6 +109,8 @@
     'debug': 'ddeb',
     }
 
+CLEANUP_FREQUENCY = 60 * 60 * 24
+
 
 class AptFTPArchiveFailure(Exception):
     """Failure while running apt-ftparchive."""
@@ -137,17 +140,9 @@
         self.log.debug("Doing apt-ftparchive work.")
         apt_config_filename = self.generateConfig(is_careful)
         self.runApt(apt_config_filename)
-
-    def _getArchitectureTags(self):
-        """List tags of all architectures enabled in this distro."""
-        archs = set()
-        for series in self.distro.series:
-            archs.update(set([
-                distroarchseries.architecturetag
-                for distroarchseries in series.enabled_architectures]))
-        return archs
-
-    def runApt(self, apt_config_filename):
+        self.cleanCaches()
+
+    def runAptWithArgs(self, apt_config_filename, *args):
         """Run apt-ftparchive in subprocesses.
 
         :raise: AptFTPArchiveFailure if any of the apt-ftparchive
@@ -157,12 +152,7 @@
 
         stdout_handler = OutputLineHandler(self.log.debug, "a-f: ")
         stderr_handler = OutputLineHandler(self.log.info, "a-f: ")
-        base_command = [
-            "apt-ftparchive",
-            "--no-contents",
-            "generate",
-            apt_config_filename,
-            ]
+        base_command = ["apt-ftparchive"] + list(args) + [apt_config_filename]
         spawner = CommandSpawner()
 
         returncodes = {}
@@ -185,6 +175,9 @@
             raise AptFTPArchiveFailure(
                 "Failure(s) from apt-ftparchive: %s" % ", ".join(by_arch))
 
+    def runApt(self, apt_config_filename):
+        self.runAptWithArgs(apt_config_filename, "--no-contents", "generate")
+
     #
     # Empty Pocket Requests
     #
@@ -726,14 +719,10 @@
 
                 self.generateConfigForPocket(apt_config, distroseries, pocket)
 
-        # And now return that string.
-        s = apt_config.getvalue()
-        apt_config.close()
-
         apt_config_filename = os.path.join(self._config.miscroot, "apt.conf")
-        fp = file(apt_config_filename, "w")
-        fp.write(s)
-        fp.close()
+        with open(apt_config_filename, "w") as fp:
+            fp.write(apt_config.getvalue())
+        apt_config.close()
         return apt_config_filename
 
     def generateConfigForPocket(self, apt_config, distroseries, pocket):
@@ -746,6 +735,25 @@
             comp.name for comp in
             self.publisher.archive.getComponentsForSeries(distroseries)]
 
+        self.writeAptConfig(
+            apt_config, suite, comps, archs,
+            distroseries.include_long_descriptions)
+
+        # XXX: 2006-08-24 kiko: Why do we do this directory creation here?
+        for comp in comps:
+            component_path = os.path.join(
+                self._config.distsroot, suite, comp)
+            safe_mkdir(os.path.join(component_path, "source"))
+            if not distroseries.include_long_descriptions:
+                safe_mkdir(os.path.join(component_path, "i18n"))
+            for arch in archs:
+                safe_mkdir(os.path.join(component_path, "binary-" + arch))
+                for subcomp in self.publisher.subcomponents:
+                    safe_mkdir(os.path.join(
+                        component_path, subcomp, "binary-" + arch))
+
+    def writeAptConfig(self, apt_config, suite, comps, archs,
+                       include_long_descriptions):
         self.log.debug("Generating apt config for %s" % suite)
         apt_config.write(STANZA_TEMPLATE % {
                          "LISTPATH": self._config.overrideroot,
@@ -759,8 +767,7 @@
                          "DISTS": os.path.basename(self._config.distsroot),
                          "HIDEEXTRA": "",
                          "LONGDESCRIPTION":
-                             "true" if distroseries.include_long_descriptions
-                                    else "false",
+                             "true" if include_long_descriptions else "false",
                          })
 
         if archs:
@@ -780,15 +787,44 @@
                         "LONGDESCRIPTION": "true",
                         })
 
-        # XXX: 2006-08-24 kiko: Why do we do this directory creation here?
-        for comp in comps:
-            component_path = os.path.join(
-                self._config.distsroot, suite, comp)
-            safe_mkdir(os.path.join(component_path, "source"))
-            if not distroseries.include_long_descriptions:
-                safe_mkdir(os.path.join(component_path, "i18n"))
-            for arch in archs:
-                safe_mkdir(os.path.join(component_path, "binary-" + arch))
-                for subcomp in self.publisher.subcomponents:
-                    safe_mkdir(os.path.join(
-                        component_path, subcomp, "binary-" + arch))
+    def cleanCaches(self):
+        """Clean apt-ftparchive caches.
+
+        This takes a few minutes and doesn't need to be done on every run,
+        but it should be done every so often so that the cache files don't
+        get too large and slow down normal runs of apt-ftparchive.
+        """
+        apt_config_filename = os.path.join(
+            self._config.miscroot, "apt-cleanup.conf")
+        try:
+            last_cleanup = os.stat(apt_config_filename).st_mtime
+            if last_cleanup > time.time() - CLEANUP_FREQUENCY:
+                return
+        except OSError:
+            pass
+
+        apt_config = StringIO()
+        apt_config.write(CONFIG_HEADER % (self._config.archiveroot,
+                                          self._config.overrideroot,
+                                          self._config.cacheroot,
+                                          self._config.miscroot))
+
+        # "apt-ftparchive clean" doesn't care what suite it's given, but it
+        # needs to know the union of all architectures and components for
+        # each suite we might publish.
+        archs = set()
+        comps = set()
+        for distroseries in self.publisher.consider_series:
+            for a in distroseries.enabled_architectures:
+                archs.add(a.architecturetag)
+            for comp in self.publisher.archive.getComponentsForSeries(
+                distroseries):
+                comps.add(comp.name)
+        self.writeAptConfig(
+            apt_config, "nonexistent-suite", sorted(comps), sorted(archs),
+            True)
+
+        with open(apt_config_filename, "w") as fp:
+            fp.write(apt_config.getvalue())
+        apt_config.close()
+        self.runAptWithArgs(apt_config_filename, "clean")

=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py	2013-05-16 00:08:28 +0000
+++ lib/lp/archivepublisher/publishing.py	2013-07-05 15:02:27 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2013 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 __all__ = [
@@ -250,17 +250,8 @@
             subcomps.append('debug')
         return subcomps
 
-    def A_publish(self, force_publishing):
-        """First step in publishing: actual package publishing.
-
-        Asks each DistroSeries to publish itself, which causes
-        publishing records to be updated, and files to be placed on disk
-        where necessary.
-        If self.allowed_suites is set, restrict the publication procedure
-        to them.
-        """
-        self.log.debug("* Step A: Publishing packages")
-
+    @property
+    def consider_series(self):
         if self.archive.purpose in (
             ArchivePurpose.PRIMARY,
             ArchivePurpose.PARTNER,
@@ -269,7 +260,7 @@
             # series.  We will never want to publish anything in them, so it
             # isn't worth thinking about whether they have pending
             # publications.
-            consider_series = [
+            return [
                 series
                 for series in self.distro.series
                 if series.status not in (
@@ -280,9 +271,20 @@
             # Other archives may have reasons to continue building at least
             # for OBSOLETE series.  For example, a PPA may be continuing to
             # provide custom builds for users who haven't upgraded yet.
-            consider_series = self.distro.series
-
-        for distroseries in consider_series:
+            return self.distro.series
+
+    def A_publish(self, force_publishing):
+        """First step in publishing: actual package publishing.
+
+        Asks each DistroSeries to publish itself, which causes
+        publishing records to be updated, and files to be placed on disk
+        where necessary.
+        If self.allowed_suites is set, restrict the publication procedure
+        to them.
+        """
+        self.log.debug("* Step A: Publishing packages")
+
+        for distroseries in self.consider_series:
             for pocket in self.archive.getPockets():
                 if self.isAllowed(distroseries, pocket):
                     more_dirt = distroseries.publish(

=== modified file 'lib/lp/archivepublisher/tests/test_ftparchive.py'
--- lib/lp/archivepublisher/tests/test_ftparchive.py	2013-05-24 03:05:34 +0000
+++ lib/lp/archivepublisher/tests/test_ftparchive.py	2013-07-05 15:02:27 +0000
@@ -10,7 +10,9 @@
 import re
 import shutil
 from textwrap import dedent
+import time
 
+from testtools.matchers import LessThan
 from zope.component import getUtility
 
 from lp.archivepublisher.config import getPubConfig
@@ -22,6 +24,7 @@
 from lp.archivepublisher.publishing import Publisher
 from lp.registry.interfaces.distribution import IDistributionSet
 from lp.registry.interfaces.pocket import PackagePublishingPocket
+from lp.registry.interfaces.series import SeriesStatus
 from lp.services.config import config
 from lp.services.log.logger import (
     BufferLogger,
@@ -131,13 +134,16 @@
         with open(path) as result_file:
             self.assertEqual("", result_file.read())
 
-    def _addRepositoryFile(self, component, sourcename, leafname):
+    def _addRepositoryFile(self, component, sourcename, leafname,
+                           samplename=None):
         """Create a repository file."""
         fullpath = self._dp.pathFor(component, sourcename, leafname)
         dirname = os.path.dirname(fullpath)
         if not os.path.exists(dirname):
             os.makedirs(dirname)
-        leaf = os.path.join(self._sampledir, leafname)
+        if samplename is None:
+            samplename = leafname
+        leaf = os.path.join(self._sampledir, samplename)
         leafcontent = file(leaf).read()
         file(fullpath, "w").write(leafcontent)
 
@@ -535,76 +541,92 @@
             os.path.join(self._distsdir, "hoary-test", "main",
                          "source", "Sources")))
 
+    def test_cleanCaches_noop_if_recent(self):
+        # cleanCaches does nothing if it was run recently.
+        fa = self._setUpFTPArchiveHandler()
+        path = os.path.join(self._config.miscroot, "apt-cleanup.conf")
+        with open(path, "w"):
+            pass
+        timestamp = time.time() - 1
+        os.utime(path, (timestamp, timestamp))
+        fa.cleanCaches()
+        # The filesystem may round off subsecond parts of timestamps.
+        self.assertEqual(int(timestamp), int(os.stat(path).st_mtime))
+
+    def test_cleanCaches_union_architectures(self):
+        # cleanCaches operates on the union of architectures for all
+        # considered series.
+        for series in self._distribution.series:
+            series.status = SeriesStatus.OBSOLETE
+        stable = self.factory.makeDistroSeries(
+            distribution=self._distribution, status=SeriesStatus.CURRENT)
+        unstable = self.factory.makeDistroSeries(
+            distribution=self._distribution)
+        for ds, arch in (
+            (stable, "i386"), (stable, "armel"),
+            (unstable, "i386"), (unstable, "armhf")):
+            self.factory.makeDistroArchSeries(
+                distroseries=ds, architecturetag=arch)
+        self._publisher = Publisher(
+            self._logger, self._config, self._dp, self._archive)
+        fa = self._setUpFTPArchiveHandler()
+        fa.cleanCaches()
+        path = os.path.join(self._config.miscroot, "apt-cleanup.conf")
+        with open(path) as config_file:
+            arch_lines = [
+                line for line in config_file if " Architectures " in line]
+        self.assertNotEqual([], arch_lines)
+        for line in arch_lines:
+            match = re.search(r' Architectures "(.*)"', line)
+            self.assertIsNotNone(match)
+            config_arches = set(match.group(1).split())
+            config_arches.discard("source")
+            self.assertContentEqual(["armel", "armhf", "i386"], config_arches)
+
+    def test_cleanCaches(self):
+        # cleanCaches does real work.
+        self._publisher = Publisher(
+            self._logger, self._config, self._dp, self._archive)
+        fa = self._setUpFTPArchiveHandler()
+        fa.createEmptyPocketRequests(fullpublish=True)
+
+        # Set up an initial repository.
+        source_overrides = FakeSelectResult([("foo", "main", "misc")])
+        binary_overrides = FakeSelectResult([(
+            "bin%d" % i, "main", "misc", "i386",
+            PackagePublishingPriority.EXTRA, BinaryPackageFormat.DEB, None)
+            for i in range(10)])
+        fa.publishOverrides("hoary-test", source_overrides, binary_overrides)
+        source_files = FakeSelectResult([("foo", "foo_1.dsc", "main")])
+        binary_files = FakeSelectResult([(
+            "bin%d" % i, "bin%d_1_i386.deb" % i, "main", "binary-i386")
+            for i in range(10)])
+        fa.publishFileLists("hoary-test", source_files, binary_files)
+        self._addRepositoryFile("main", "foo", "foo_1.dsc")
+        for i in range(10):
+            self._addRepositoryFile(
+                "main", "bin%d" % i, "bin%d_1_i386.deb" % i,
+                samplename="foo_1_i386.deb")
+        apt_conf = fa.generateConfig(fullpublish=True)
+        fa.runApt(apt_conf)
+
+        # Remove most of this repository's files so that cleanCaches has
+        # something to do.
+        for i in range(9):
+            os.unlink(
+                self._dp.pathFor("main", "bin%d" % i, "bin%d_1_i386.deb" % i))
+
+        cache_path = os.path.join(self._config.cacheroot, "packages-i386.db")
+        old_cache_size = os.stat(cache_path).st_size
+        fa.cleanCaches()
+        self.assertThat(os.stat(cache_path).st_size, LessThan(old_cache_size))
+
 
 class TestFTPArchiveRunApt(TestCaseWithFactory):
     """Test `FTPArchive`'s execution of apt-ftparchive."""
 
     layer = ZopelessDatabaseLayer
 
-    def _makeMatchingDistroArchSeries(self):
-        """Create two `DistroArchSeries` for the same distro and processor."""
-        distro = self.factory.makeDistribution()
-        processor = self.factory.makeProcessor()
-        return (
-            self.factory.makeDistroArchSeries(
-                distroseries=self.factory.makeDistroSeries(distro),
-                processorfamily=processor.family,
-                architecturetag=processor.name)
-            for counter in (1, 2))
-
-    def test_getArchitectureTags_starts_out_empty(self):
-        fa = FTPArchiveHandler(
-            DevNullLogger(), None, None, self.factory.makeDistribution(),
-            None)
-        self.assertContentEqual([], fa._getArchitectureTags())
-
-    def test_getArchitectureTags_includes_enabled_architectures(self):
-        distroarchseries = self.factory.makeDistroArchSeries()
-        fa = FTPArchiveHandler(
-            DevNullLogger(), None, None,
-            distroarchseries.distroseries.distribution, None)
-        self.assertContentEqual(
-            [distroarchseries.architecturetag], fa._getArchitectureTags())
-
-    def test_getArchitectureTags_considers_all_series(self):
-        distro = self.factory.makeDistribution()
-        affluent_antilope = self.factory.makeDistroSeries(distribution=distro)
-        bilious_baboon = self.factory.makeDistroSeries(distribution=distro)
-        affluent_arch = self.factory.makeDistroArchSeries(
-            distroseries=affluent_antilope)
-        bilious_arch = self.factory.makeDistroArchSeries(
-            distroseries=bilious_baboon)
-        fa = FTPArchiveHandler(DevNullLogger(), None, None, distro, None)
-        self.assertContentEqual(
-            [affluent_arch.architecturetag, bilious_arch.architecturetag],
-            fa._getArchitectureTags())
-
-    def test_getArchitectureTags_ignores_disabled_architectures(self):
-        distroarchseries = self.factory.makeDistroArchSeries()
-        distroarchseries.enabled = False
-        fa = FTPArchiveHandler(
-            DevNullLogger(), None, None,
-            distroarchseries.distroseries.distribution, None)
-        self.assertContentEqual([], fa._getArchitectureTags())
-
-    def test_getArchitectureTags_contains_no_duplicates(self):
-        ominous_okapi, pilfering_puppy = self._makeMatchingDistroArchSeries()
-        fa = FTPArchiveHandler(
-            DevNullLogger(), None, None,
-            ominous_okapi.distroseries.distribution, None)
-        self.assertEqual(1, len(list(fa._getArchitectureTags())))
-        self.assertContentEqual(
-            [ominous_okapi.architecturetag], fa._getArchitectureTags())
-
-    def test_getArchitectureTags_counts_any_architecture_enabled_once(self):
-        manic_mantis, nervous_nit = self._makeMatchingDistroArchSeries()
-        nervous_nit.enabled = False
-        fa = FTPArchiveHandler(
-            DevNullLogger(), None, None,
-            manic_mantis.distroseries.distribution, None)
-        self.assertContentEqual(
-            [manic_mantis.architecturetag], fa._getArchitectureTags())
-
     def test_runApt_reports_failure(self):
         # If we sabotage apt-ftparchive, runApt notices that it failed
         # and raises an exception.