launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #15719
[Merge] lp:~cjwatson/launchpad/publisher-ftparchive-clean into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/publisher-ftparchive-clean into lp:launchpad.
Commit message:
Clean apt-ftparchive caches once a day.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/publisher-ftparchive-clean/+merge/173226
== Summary ==
"apt-ftparchive clean" needs to be run every so often to stop the cache files growing without bound. This causes the publisher to do that once a day or so.
== Tests ==
bin/test -vvct lp.archivepublisher.tests
== Demo and Q/A ==
Run publisher on dogfood; observe cache size drastically decrease. (It ought to, anyway.)
--
https://code.launchpad.net/~cjwatson/launchpad/publisher-ftparchive-clean/+merge/173226
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/publisher-ftparchive-clean into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/model/ftparchive.py'
--- lib/lp/archivepublisher/model/ftparchive.py 2013-06-21 01:55:44 +0000
+++ lib/lp/archivepublisher/model/ftparchive.py 2013-07-05 15:02:27 +0000
@@ -4,6 +4,7 @@
from collections import defaultdict
import os
from StringIO import StringIO
+import time
from storm.expr import (
Desc,
@@ -108,6 +109,8 @@
'debug': 'ddeb',
}
+CLEANUP_FREQUENCY = 60 * 60 * 24
+
class AptFTPArchiveFailure(Exception):
"""Failure while running apt-ftparchive."""
@@ -137,17 +140,9 @@
self.log.debug("Doing apt-ftparchive work.")
apt_config_filename = self.generateConfig(is_careful)
self.runApt(apt_config_filename)
-
- def _getArchitectureTags(self):
- """List tags of all architectures enabled in this distro."""
- archs = set()
- for series in self.distro.series:
- archs.update(set([
- distroarchseries.architecturetag
- for distroarchseries in series.enabled_architectures]))
- return archs
-
- def runApt(self, apt_config_filename):
+ self.cleanCaches()
+
+ def runAptWithArgs(self, apt_config_filename, *args):
"""Run apt-ftparchive in subprocesses.
:raise: AptFTPArchiveFailure if any of the apt-ftparchive
@@ -157,12 +152,7 @@
stdout_handler = OutputLineHandler(self.log.debug, "a-f: ")
stderr_handler = OutputLineHandler(self.log.info, "a-f: ")
- base_command = [
- "apt-ftparchive",
- "--no-contents",
- "generate",
- apt_config_filename,
- ]
+ base_command = ["apt-ftparchive"] + list(args) + [apt_config_filename]
spawner = CommandSpawner()
returncodes = {}
@@ -185,6 +175,9 @@
raise AptFTPArchiveFailure(
"Failure(s) from apt-ftparchive: %s" % ", ".join(by_arch))
+ def runApt(self, apt_config_filename):
+ self.runAptWithArgs(apt_config_filename, "--no-contents", "generate")
+
#
# Empty Pocket Requests
#
@@ -726,14 +719,10 @@
self.generateConfigForPocket(apt_config, distroseries, pocket)
- # And now return that string.
- s = apt_config.getvalue()
- apt_config.close()
-
apt_config_filename = os.path.join(self._config.miscroot, "apt.conf")
- fp = file(apt_config_filename, "w")
- fp.write(s)
- fp.close()
+ with open(apt_config_filename, "w") as fp:
+ fp.write(apt_config.getvalue())
+ apt_config.close()
return apt_config_filename
def generateConfigForPocket(self, apt_config, distroseries, pocket):
@@ -746,6 +735,25 @@
comp.name for comp in
self.publisher.archive.getComponentsForSeries(distroseries)]
+ self.writeAptConfig(
+ apt_config, suite, comps, archs,
+ distroseries.include_long_descriptions)
+
+ # XXX: 2006-08-24 kiko: Why do we do this directory creation here?
+ for comp in comps:
+ component_path = os.path.join(
+ self._config.distsroot, suite, comp)
+ safe_mkdir(os.path.join(component_path, "source"))
+ if not distroseries.include_long_descriptions:
+ safe_mkdir(os.path.join(component_path, "i18n"))
+ for arch in archs:
+ safe_mkdir(os.path.join(component_path, "binary-" + arch))
+ for subcomp in self.publisher.subcomponents:
+ safe_mkdir(os.path.join(
+ component_path, subcomp, "binary-" + arch))
+
+ def writeAptConfig(self, apt_config, suite, comps, archs,
+ include_long_descriptions):
self.log.debug("Generating apt config for %s" % suite)
apt_config.write(STANZA_TEMPLATE % {
"LISTPATH": self._config.overrideroot,
@@ -759,8 +767,7 @@
"DISTS": os.path.basename(self._config.distsroot),
"HIDEEXTRA": "",
"LONGDESCRIPTION":
- "true" if distroseries.include_long_descriptions
- else "false",
+ "true" if include_long_descriptions else "false",
})
if archs:
@@ -780,15 +787,44 @@
"LONGDESCRIPTION": "true",
})
- # XXX: 2006-08-24 kiko: Why do we do this directory creation here?
- for comp in comps:
- component_path = os.path.join(
- self._config.distsroot, suite, comp)
- safe_mkdir(os.path.join(component_path, "source"))
- if not distroseries.include_long_descriptions:
- safe_mkdir(os.path.join(component_path, "i18n"))
- for arch in archs:
- safe_mkdir(os.path.join(component_path, "binary-" + arch))
- for subcomp in self.publisher.subcomponents:
- safe_mkdir(os.path.join(
- component_path, subcomp, "binary-" + arch))
+ def cleanCaches(self):
+ """Clean apt-ftparchive caches.
+
+ This takes a few minutes and doesn't need to be done on every run,
+ but it should be done every so often so that the cache files don't
+ get too large and slow down normal runs of apt-ftparchive.
+ """
+ apt_config_filename = os.path.join(
+ self._config.miscroot, "apt-cleanup.conf")
+ try:
+ last_cleanup = os.stat(apt_config_filename).st_mtime
+ if last_cleanup > time.time() - CLEANUP_FREQUENCY:
+ return
+ except OSError:
+ pass
+
+ apt_config = StringIO()
+ apt_config.write(CONFIG_HEADER % (self._config.archiveroot,
+ self._config.overrideroot,
+ self._config.cacheroot,
+ self._config.miscroot))
+
+ # "apt-ftparchive clean" doesn't care what suite it's given, but it
+ # needs to know the union of all architectures and components for
+ # each suite we might publish.
+ archs = set()
+ comps = set()
+ for distroseries in self.publisher.consider_series:
+ for a in distroseries.enabled_architectures:
+ archs.add(a.architecturetag)
+ for comp in self.publisher.archive.getComponentsForSeries(
+ distroseries):
+ comps.add(comp.name)
+ self.writeAptConfig(
+ apt_config, "nonexistent-suite", sorted(comps), sorted(archs),
+ True)
+
+ with open(apt_config_filename, "w") as fp:
+ fp.write(apt_config.getvalue())
+ apt_config.close()
+ self.runAptWithArgs(apt_config_filename, "clean")
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py 2013-05-16 00:08:28 +0000
+++ lib/lp/archivepublisher/publishing.py 2013-07-05 15:02:27 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2013 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
__all__ = [
@@ -250,17 +250,8 @@
subcomps.append('debug')
return subcomps
- def A_publish(self, force_publishing):
- """First step in publishing: actual package publishing.
-
- Asks each DistroSeries to publish itself, which causes
- publishing records to be updated, and files to be placed on disk
- where necessary.
- If self.allowed_suites is set, restrict the publication procedure
- to them.
- """
- self.log.debug("* Step A: Publishing packages")
-
+ @property
+ def consider_series(self):
if self.archive.purpose in (
ArchivePurpose.PRIMARY,
ArchivePurpose.PARTNER,
@@ -269,7 +260,7 @@
# series. We will never want to publish anything in them, so it
# isn't worth thinking about whether they have pending
# publications.
- consider_series = [
+ return [
series
for series in self.distro.series
if series.status not in (
@@ -280,9 +271,20 @@
# Other archives may have reasons to continue building at least
# for OBSOLETE series. For example, a PPA may be continuing to
# provide custom builds for users who haven't upgraded yet.
- consider_series = self.distro.series
-
- for distroseries in consider_series:
+ return self.distro.series
+
+ def A_publish(self, force_publishing):
+ """First step in publishing: actual package publishing.
+
+ Asks each DistroSeries to publish itself, which causes
+ publishing records to be updated, and files to be placed on disk
+ where necessary.
+ If self.allowed_suites is set, restrict the publication procedure
+ to them.
+ """
+ self.log.debug("* Step A: Publishing packages")
+
+ for distroseries in self.consider_series:
for pocket in self.archive.getPockets():
if self.isAllowed(distroseries, pocket):
more_dirt = distroseries.publish(
=== modified file 'lib/lp/archivepublisher/tests/test_ftparchive.py'
--- lib/lp/archivepublisher/tests/test_ftparchive.py 2013-05-24 03:05:34 +0000
+++ lib/lp/archivepublisher/tests/test_ftparchive.py 2013-07-05 15:02:27 +0000
@@ -10,7 +10,9 @@
import re
import shutil
from textwrap import dedent
+import time
+from testtools.matchers import LessThan
from zope.component import getUtility
from lp.archivepublisher.config import getPubConfig
@@ -22,6 +24,7 @@
from lp.archivepublisher.publishing import Publisher
from lp.registry.interfaces.distribution import IDistributionSet
from lp.registry.interfaces.pocket import PackagePublishingPocket
+from lp.registry.interfaces.series import SeriesStatus
from lp.services.config import config
from lp.services.log.logger import (
BufferLogger,
@@ -131,13 +134,16 @@
with open(path) as result_file:
self.assertEqual("", result_file.read())
- def _addRepositoryFile(self, component, sourcename, leafname):
+ def _addRepositoryFile(self, component, sourcename, leafname,
+ samplename=None):
"""Create a repository file."""
fullpath = self._dp.pathFor(component, sourcename, leafname)
dirname = os.path.dirname(fullpath)
if not os.path.exists(dirname):
os.makedirs(dirname)
- leaf = os.path.join(self._sampledir, leafname)
+ if samplename is None:
+ samplename = leafname
+ leaf = os.path.join(self._sampledir, samplename)
leafcontent = file(leaf).read()
file(fullpath, "w").write(leafcontent)
@@ -535,76 +541,92 @@
os.path.join(self._distsdir, "hoary-test", "main",
"source", "Sources")))
+ def test_cleanCaches_noop_if_recent(self):
+ # cleanCaches does nothing if it was run recently.
+ fa = self._setUpFTPArchiveHandler()
+ path = os.path.join(self._config.miscroot, "apt-cleanup.conf")
+ with open(path, "w"):
+ pass
+ timestamp = time.time() - 1
+ os.utime(path, (timestamp, timestamp))
+ fa.cleanCaches()
+ # The filesystem may round off subsecond parts of timestamps.
+ self.assertEqual(int(timestamp), int(os.stat(path).st_mtime))
+
+ def test_cleanCaches_union_architectures(self):
+ # cleanCaches operates on the union of architectures for all
+ # considered series.
+ for series in self._distribution.series:
+ series.status = SeriesStatus.OBSOLETE
+ stable = self.factory.makeDistroSeries(
+ distribution=self._distribution, status=SeriesStatus.CURRENT)
+ unstable = self.factory.makeDistroSeries(
+ distribution=self._distribution)
+ for ds, arch in (
+ (stable, "i386"), (stable, "armel"),
+ (unstable, "i386"), (unstable, "armhf")):
+ self.factory.makeDistroArchSeries(
+ distroseries=ds, architecturetag=arch)
+ self._publisher = Publisher(
+ self._logger, self._config, self._dp, self._archive)
+ fa = self._setUpFTPArchiveHandler()
+ fa.cleanCaches()
+ path = os.path.join(self._config.miscroot, "apt-cleanup.conf")
+ with open(path) as config_file:
+ arch_lines = [
+ line for line in config_file if " Architectures " in line]
+ self.assertNotEqual([], arch_lines)
+ for line in arch_lines:
+ match = re.search(r' Architectures "(.*)"', line)
+ self.assertIsNotNone(match)
+ config_arches = set(match.group(1).split())
+ config_arches.discard("source")
+ self.assertContentEqual(["armel", "armhf", "i386"], config_arches)
+
+ def test_cleanCaches(self):
+ # cleanCaches does real work.
+ self._publisher = Publisher(
+ self._logger, self._config, self._dp, self._archive)
+ fa = self._setUpFTPArchiveHandler()
+ fa.createEmptyPocketRequests(fullpublish=True)
+
+ # Set up an initial repository.
+ source_overrides = FakeSelectResult([("foo", "main", "misc")])
+ binary_overrides = FakeSelectResult([(
+ "bin%d" % i, "main", "misc", "i386",
+ PackagePublishingPriority.EXTRA, BinaryPackageFormat.DEB, None)
+ for i in range(10)])
+ fa.publishOverrides("hoary-test", source_overrides, binary_overrides)
+ source_files = FakeSelectResult([("foo", "foo_1.dsc", "main")])
+ binary_files = FakeSelectResult([(
+ "bin%d" % i, "bin%d_1_i386.deb" % i, "main", "binary-i386")
+ for i in range(10)])
+ fa.publishFileLists("hoary-test", source_files, binary_files)
+ self._addRepositoryFile("main", "foo", "foo_1.dsc")
+ for i in range(10):
+ self._addRepositoryFile(
+ "main", "bin%d" % i, "bin%d_1_i386.deb" % i,
+ samplename="foo_1_i386.deb")
+ apt_conf = fa.generateConfig(fullpublish=True)
+ fa.runApt(apt_conf)
+
+ # Remove most of this repository's files so that cleanCaches has
+ # something to do.
+ for i in range(9):
+ os.unlink(
+ self._dp.pathFor("main", "bin%d" % i, "bin%d_1_i386.deb" % i))
+
+ cache_path = os.path.join(self._config.cacheroot, "packages-i386.db")
+ old_cache_size = os.stat(cache_path).st_size
+ fa.cleanCaches()
+ self.assertThat(os.stat(cache_path).st_size, LessThan(old_cache_size))
+
class TestFTPArchiveRunApt(TestCaseWithFactory):
"""Test `FTPArchive`'s execution of apt-ftparchive."""
layer = ZopelessDatabaseLayer
- def _makeMatchingDistroArchSeries(self):
- """Create two `DistroArchSeries` for the same distro and processor."""
- distro = self.factory.makeDistribution()
- processor = self.factory.makeProcessor()
- return (
- self.factory.makeDistroArchSeries(
- distroseries=self.factory.makeDistroSeries(distro),
- processorfamily=processor.family,
- architecturetag=processor.name)
- for counter in (1, 2))
-
- def test_getArchitectureTags_starts_out_empty(self):
- fa = FTPArchiveHandler(
- DevNullLogger(), None, None, self.factory.makeDistribution(),
- None)
- self.assertContentEqual([], fa._getArchitectureTags())
-
- def test_getArchitectureTags_includes_enabled_architectures(self):
- distroarchseries = self.factory.makeDistroArchSeries()
- fa = FTPArchiveHandler(
- DevNullLogger(), None, None,
- distroarchseries.distroseries.distribution, None)
- self.assertContentEqual(
- [distroarchseries.architecturetag], fa._getArchitectureTags())
-
- def test_getArchitectureTags_considers_all_series(self):
- distro = self.factory.makeDistribution()
- affluent_antilope = self.factory.makeDistroSeries(distribution=distro)
- bilious_baboon = self.factory.makeDistroSeries(distribution=distro)
- affluent_arch = self.factory.makeDistroArchSeries(
- distroseries=affluent_antilope)
- bilious_arch = self.factory.makeDistroArchSeries(
- distroseries=bilious_baboon)
- fa = FTPArchiveHandler(DevNullLogger(), None, None, distro, None)
- self.assertContentEqual(
- [affluent_arch.architecturetag, bilious_arch.architecturetag],
- fa._getArchitectureTags())
-
- def test_getArchitectureTags_ignores_disabled_architectures(self):
- distroarchseries = self.factory.makeDistroArchSeries()
- distroarchseries.enabled = False
- fa = FTPArchiveHandler(
- DevNullLogger(), None, None,
- distroarchseries.distroseries.distribution, None)
- self.assertContentEqual([], fa._getArchitectureTags())
-
- def test_getArchitectureTags_contains_no_duplicates(self):
- ominous_okapi, pilfering_puppy = self._makeMatchingDistroArchSeries()
- fa = FTPArchiveHandler(
- DevNullLogger(), None, None,
- ominous_okapi.distroseries.distribution, None)
- self.assertEqual(1, len(list(fa._getArchitectureTags())))
- self.assertContentEqual(
- [ominous_okapi.architecturetag], fa._getArchitectureTags())
-
- def test_getArchitectureTags_counts_any_architecture_enabled_once(self):
- manic_mantis, nervous_nit = self._makeMatchingDistroArchSeries()
- nervous_nit.enabled = False
- fa = FTPArchiveHandler(
- DevNullLogger(), None, None,
- manic_mantis.distroseries.distribution, None)
- self.assertContentEqual(
- [manic_mantis.architecturetag], fa._getArchitectureTags())
-
def test_runApt_reports_failure(self):
# If we sabotage apt-ftparchive, runApt notices that it failed
# and raises an exception.