← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/inrelease-by-hash into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/inrelease-by-hash into lp:launchpad with lp:~cjwatson/launchpad/refactor-archive-signing as a prerequisite.

Commit message:
Add Release, Release.gpg, and InRelease to by-hash directories.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/inrelease-by-hash/+merge/336675

Like most of the by-hash stuff, this has lots of fiddly details and will want some careful QA on dogfood.  But at its core it's reasonably straightforward: now that the signed files are generated early enough, we just add them to the set of files being considered by _updateByHash.  I arranged to add these files to by-hash before they're renamed into place, which entailed introducing the concept of the "real file name" in a few places.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/inrelease-by-hash into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py	2018-01-26 13:27:05 +0000
+++ lib/lp/archivepublisher/publishing.py	2018-01-26 13:27:05 +0000
@@ -772,8 +772,8 @@
             for pocket in self.archive.getPockets():
                 ds_pocket = (distroseries.name, pocket)
                 suite = distroseries.getSuite(pocket)
-                release_path = os.path.join(
-                    self._config.distsroot, suite, "Release")
+                suite_path = os.path.join(self._config.distsroot, suite)
+                release_path = os.path.join(suite_path, "Release")
 
                 if is_careful:
                     if not self.isAllowed(distroseries, pocket):
@@ -803,7 +803,11 @@
                     # We aren't publishing a new Release file for this
                     # suite, probably because it's immutable, but we still
                     # need to prune by-hash files from it.
-                    self._updateByHash(suite, "Release")
+                    extra_by_hash_files = {
+                        filename: filename
+                        for filename in ("Release", "Release.gpg", "InRelease")
+                        if file_exists(os.path.join(suite_path, filename))}
+                    self._updateByHash(suite, "Release", extra_by_hash_files)
 
     def _allIndexFiles(self, distroseries):
         """Return all index files on disk for a distroseries.
@@ -1025,7 +1029,7 @@
             return self.distro.displayname
         return "LP-PPA-%s" % get_ppa_reference(self.archive)
 
-    def _updateByHash(self, suite, release_file_name):
+    def _updateByHash(self, suite, release_file_name, extra_files):
         """Update by-hash files for a suite.
 
         This takes Release file data which references a set of on-disk
@@ -1034,6 +1038,16 @@
         directories to be in sync with ArchiveFile.  Any on-disk by-hash
         entries that ceased to be current sufficiently long ago are removed.
         """
+        extra_data = {}
+        for filename, real_filename in extra_files.items():
+            hashes = self._readIndexFileHashes(
+                suite, filename, real_file_name=real_filename)
+            if hashes is None:
+                continue
+            for archive_hash in archive_hashes:
+                extra_data.setdefault(archive_hash.apt_name, []).append(
+                    hashes[archive_hash.deb822_name])
+
         release_path = os.path.join(
             self._config.distsroot, suite, release_file_name)
         with open(release_path) as release_file:
@@ -1052,12 +1066,13 @@
         # Gather information on entries in the current Release file, and
         # make sure nothing there is condemned.
         current_files = {}
-        current_sha256_checksums = set()
-        for current_entry in release_data["SHA256"]:
+        for current_entry in (
+                release_data["SHA256"] + extra_data.get("SHA256", [])):
             path = os.path.join(suite_dir, current_entry["name"])
+            real_name = current_entry.get("real_name", current_entry["name"])
+            real_path = os.path.join(suite_dir, real_name)
             current_files[path] = (
-                int(current_entry["size"]), current_entry["sha256"])
-            current_sha256_checksums.add(current_entry["sha256"])
+                int(current_entry["size"]), current_entry["sha256"], real_path)
         uncondemned_files = set()
         for db_file in archive_file_set.getByArchive(
                 self.archive, container=container, only_condemned=True,
@@ -1117,15 +1132,16 @@
         # XXX cjwatson 2016-03-15: This should possibly use bulk creation,
         # although we can only avoid about a third of the queries since the
         # librarian client has no bulk upload methods.
-        for path, (size, sha256) in current_files.items():
-            full_path = os.path.join(self._config.distsroot, path)
+        for path, (size, sha256, real_path) in current_files.items():
+            full_path = os.path.join(self._config.distsroot, real_path)
             if (os.path.exists(full_path) and
                     not by_hashes.known(path, "SHA256", sha256)):
                 with open(full_path, "rb") as fileobj:
                     db_file = archive_file_set.newFromFile(
                         self.archive, container, os.path.join("dists", path),
                         fileobj, size, filenameToContentType(path))
-                by_hashes.add(path, db_file.library_file, copy_from_path=path)
+                by_hashes.add(
+                    path, db_file.library_file, copy_from_path=real_path)
 
         # Finally, remove any files from disk that aren't recorded in the
         # database and aren't active.
@@ -1173,6 +1189,9 @@
         # special games with timestamps here, as it will interfere with the
         # "staging" mechanism used to update these files.
         extra_files = set()
+        # Extra by-hash files are not listed in the Release file, but we
+        # still want to include them in by-hash directories.
+        extra_by_hash_files = {}
         for component in all_components:
             self._writeSuiteSource(
                 distroseries, pocket, component, core_files)
@@ -1239,9 +1258,7 @@
 
         self._writeReleaseFile(suite, release_file)
         core_files.add("Release")
-
-        if distroseries.publish_by_hash:
-            self._updateByHash(suite, "Release.new")
+        extra_by_hash_files["Release"] = "Release.new"
 
         signable_archive = ISignableArchive(self.archive)
         if signable_archive.can_sign:
@@ -1249,11 +1266,16 @@
             self.log.debug("Signing Release file for %s" % suite)
             signable_archive.signRepository(suite, suffix=".new", log=self.log)
             core_files.add("Release.gpg")
+            extra_by_hash_files["Release.gpg"] = "Release.gpg.new"
             core_files.add("InRelease")
+            extra_by_hash_files["InRelease"] = "InRelease.new"
         else:
             # Skip signature if the archive is not set up for signing.
             self.log.debug("No signing key available, skipping signature.")
 
+        if distroseries.publish_by_hash:
+            self._updateByHash(suite, "Release.new", extra_by_hash_files)
+
         for name in ("Release", "Release.gpg", "InRelease"):
             if name in core_files:
                 os.rename(
@@ -1365,7 +1387,8 @@
         # Schedule this for inclusion in the Release file.
         all_series_files.add(os.path.join(component, "i18n", "Index"))
 
-    def _readIndexFileHashes(self, suite, file_name, subpath=None):
+    def _readIndexFileHashes(self, suite, file_name, subpath=None,
+                             real_file_name=None):
         """Read an index file and return its hashes.
 
         :param suite: Suite name.
@@ -1373,6 +1396,11 @@
         :param subpath: Optional subpath within the suite root.  Generated
             indexes will not include this path.  If omitted, filenames are
             assumed to be relative to the suite root.
+        :param real_file_name: The actual filename to open when reading
+            data (`file_name` will still be the name used in the returned
+            dictionary).  If this is passed, then the returned hash
+            component dictionaries will include it in additional "real_name"
+            items.
         :return: A dictionary mapping hash field names to dictionaries of
             their components as defined by debian.deb822.Release (e.g.
             {"md5sum": {"md5sum": ..., "size": ..., "name": ...}}), or None
@@ -1380,7 +1408,8 @@
         """
         open_func = open
         full_name = os.path.join(
-            self._config.distsroot, suite, subpath or '.', file_name)
+            self._config.distsroot, suite, subpath or '.',
+            real_file_name or file_name)
         if not os.path.exists(full_name):
             if os.path.exists(full_name + '.gz'):
                 open_func = gzip.open
@@ -1404,9 +1433,13 @@
                 for hashobj in hashes.values():
                     hashobj.update(chunk)
                 size += len(chunk)
-        return {
-            alg: {alg: hashobj.hexdigest(), "name": file_name, "size": size}
-            for alg, hashobj in hashes.items()}
+        ret = {}
+        for alg, hashobj in hashes.items():
+            digest = hashobj.hexdigest()
+            ret[alg] = {alg: digest, "name": file_name, "size": size}
+            if real_file_name:
+                ret[alg]["real_name"] = real_file_name
+        return ret
 
     def deleteArchive(self):
         """Delete the archive.

=== modified file 'lib/lp/archivepublisher/tests/test_debian_installer.py'
--- lib/lp/archivepublisher/tests/test_debian_installer.py	2018-01-26 13:27:05 +0000
+++ lib/lp/archivepublisher/tests/test_debian_installer.py	2018-01-26 13:27:05 +0000
@@ -20,13 +20,14 @@
     )
 from lp.archivepublisher.debian_installer import DebianInstallerUpload
 from lp.archivepublisher.interfaces.publisherconfig import IPublisherConfigSet
+from lp.archivepublisher.tests.test_run_parts import RunPartsMixin
 from lp.services.tarfile_helpers import LaunchpadWriteTarFile
 from lp.soyuz.enums import ArchivePurpose
 from lp.testing import TestCaseWithFactory
 from lp.testing.layers import ZopelessDatabaseLayer
 
 
-class TestDebianInstaller(TestCaseWithFactory):
+class TestDebianInstaller(RunPartsMixin, TestCaseWithFactory):
 
     layer = ZopelessDatabaseLayer
 
@@ -164,17 +165,15 @@
             0o755, os.stat(self.getInstallerPath(directory)).st_mode & 0o777)
 
     def test_sign_with_external_run_parts(self):
-        parts_directory = self.makeTemporaryDirectory()
-        sign_directory = os.path.join(
-            parts_directory, self.distro.name, "sign.d")
-        os.makedirs(sign_directory)
-        with open(os.path.join(sign_directory, "10-sign"), "w") as f:
+        self.enableRunParts(distribution_name=self.distro.name)
+        with open(os.path.join(
+                self.parts_directory, self.distro.name, "sign.d",
+                "10-sign"), "w") as f:
             f.write(dedent("""\
                 #! /bin/sh
                 touch "$OUTPUT_PATH"
                 """))
             os.fchmod(f.fileno(), 0o755)
-        self.pushConfig("archivepublisher", run_parts_location=parts_directory)
         self.openArchive()
         self.addFile("images/list", "a list")
         self.addFile("images/SHA256SUMS", "a checksum")

=== modified file 'lib/lp/archivepublisher/tests/test_dist_upgrader.py'
--- lib/lp/archivepublisher/tests/test_dist_upgrader.py	2018-01-26 13:27:05 +0000
+++ lib/lp/archivepublisher/tests/test_dist_upgrader.py	2018-01-26 13:27:05 +0000
@@ -23,6 +23,7 @@
     DistUpgraderUpload,
     )
 from lp.archivepublisher.interfaces.publisherconfig import IPublisherConfigSet
+from lp.archivepublisher.tests.test_run_parts import RunPartsMixin
 from lp.services.tarfile_helpers import LaunchpadWriteTarFile
 from lp.soyuz.enums import ArchivePurpose
 from lp.testing import TestCaseWithFactory
@@ -35,7 +36,7 @@
         self.archiveroot = archiveroot
 
 
-class TestDistUpgrader(TestCaseWithFactory):
+class TestDistUpgrader(RunPartsMixin, TestCaseWithFactory):
 
     layer = ZopelessDatabaseLayer
 
@@ -112,17 +113,15 @@
         self.assertRaises(DistUpgraderBadVersion, self.process)
 
     def test_sign_with_external_run_parts(self):
-        parts_directory = self.makeTemporaryDirectory()
-        sign_directory = os.path.join(
-            parts_directory, self.distro.name, "sign.d")
-        os.makedirs(sign_directory)
-        with open(os.path.join(sign_directory, "10-sign"), "w") as f:
+        self.enableRunParts(distribution_name=self.distro.name)
+        with open(os.path.join(
+                self.parts_directory, self.distro.name, "sign.d",
+                "10-sign"), "w") as f:
             f.write(dedent("""\
                 #! /bin/sh
                 touch "$OUTPUT_PATH"
                 """))
             os.fchmod(f.fileno(), 0o755)
-        self.pushConfig("archivepublisher", run_parts_location=parts_directory)
         self.openArchive("20060302.0120")
         self.tarfile.add_file("20060302.0120/list", "a list")
         self.tarfile.add_file("20060302.0120/foo.tar.gz", "a tarball")

=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py	2018-01-26 13:27:05 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py	2018-01-26 13:27:05 +0000
@@ -15,9 +15,11 @@
     datetime,
     timedelta,
     )
+from fnmatch import fnmatch
 from functools import partial
 import gzip
 import hashlib
+from itertools import product
 from operator import attrgetter
 import os
 import shutil
@@ -27,10 +29,12 @@
 import time
 
 from debian.deb822 import Release
+from fixtures import MonkeyPatch
 try:
     import lzma
 except ImportError:
     from backports import lzma
+import mock
 import pytz
 from testtools.deferredruntest import AsynchronousDeferredRunTest
 from testtools.matchers import (
@@ -60,6 +64,7 @@
     IArchiveSigningKey,
     )
 from lp.archivepublisher.publishing import (
+    BY_HASH_STAY_OF_EXECUTION,
     ByHash,
     ByHashes,
     DirectoryHash,
@@ -2541,6 +2546,22 @@
 class TestUpdateByHash(TestPublisherBase):
     """Tests for handling of by-hash files."""
 
+    def setUpMockTime(self):
+        """Start simulating the advance of time in the publisher."""
+        self.times = [datetime.now(pytz.UTC)]
+        mock_datetime = mock.patch('lp.archivepublisher.publishing.datetime')
+        mocked_datetime = mock_datetime.start()
+        self.addCleanup(mock_datetime.stop)
+        mocked_datetime.utcnow = lambda: self.times[-1].replace(tzinfo=None)
+        self.useFixture(MonkeyPatch(
+            'lp.soyuz.model.archivefile._now', lambda: self.times[-1]))
+
+    def advanceTime(self, delta=None, absolute=None):
+        if delta is not None:
+            self.times.append(self.times[-1] + delta)
+        else:
+            self.times.append(absolute)
+
     def runSteps(self, publisher, step_a=False, step_a2=False, step_c=False,
                  step_d=False):
         """Run publisher steps."""
@@ -2553,6 +2574,33 @@
         if step_d:
             publisher.D_writeReleaseFiles(False)
 
+    @classmethod
+    def _makeScheduledDeletionDateMatcher(cls, condemned_at):
+        if condemned_at is None:
+            return Is(None)
+        else:
+            return Equals(
+                condemned_at + timedelta(days=BY_HASH_STAY_OF_EXECUTION))
+
+    def assertHasSuiteFiles(self, patterns, *properties):
+        def is_interesting(path):
+            return any(
+                fnmatch(path, 'dists/breezy-autotest/%s' % pattern)
+                for pattern in patterns)
+
+        files = [
+            archive_file
+            for archive_file in getUtility(IArchiveFileSet).getByArchive(
+                self.ubuntutest.main_archive)
+            if is_interesting(archive_file.path)]
+        matchers = []
+        for path, condemned_at in properties:
+            matchers.append(MatchesStructure(
+                path=Equals('dists/breezy-autotest/%s' % path),
+                scheduled_deletion_date=self._makeScheduledDeletionDateMatcher(
+                    condemned_at)))
+        self.assertThat(files, MatchesSetwise(*matchers))
+
     def test_disabled(self):
         # The publisher does not create by-hash directories if it is
         # disabled in the series configuration.
@@ -2605,14 +2653,18 @@
 
         suite_path = partial(
             os.path.join, self.config.distsroot, 'breezy-autotest')
-        contents = set()
+        top_contents = set()
+        with open(suite_path('Release'), 'rb') as f:
+            top_contents.add(f.read())
+        main_contents = set()
         for name in ('Release', 'Sources.gz', 'Sources.bz2'):
             with open(suite_path('main', 'source', name), 'rb') as f:
-                contents.add(f.read())
+                main_contents.add(f.read())
 
+        self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
         self.assertThat(
             suite_path('main', 'source', 'by-hash'),
-            ByHashHasContents(contents))
+            ByHashHasContents(main_contents))
 
         archive_files = getUtility(IArchiveFileSet).getByArchive(
             self.ubuntutest.main_archive)
@@ -2634,8 +2686,11 @@
 
         suite_path = partial(
             os.path.join, self.config.distsroot, 'breezy-autotest')
+        top_contents = set()
         main_contents = set()
         universe_contents = set()
+        with open(suite_path('Release'), 'rb') as f:
+            top_contents.add(f.read())
         for name in ('Release', 'Sources.gz', 'Sources.bz2'):
             with open(suite_path('main', 'source', name), 'rb') as f:
                 main_contents.add(f.read())
@@ -2646,10 +2701,13 @@
         self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
         flush_database_caches()
 
+        with open(suite_path('Release'), 'rb') as f:
+            top_contents.add(f.read())
         for name in ('Release', 'Sources.gz', 'Sources.bz2'):
             with open(suite_path('main', 'source', name), 'rb') as f:
                 main_contents.add(f.read())
 
+        self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
         self.assertThat(
             suite_path('main', 'source', 'by-hash'),
             ByHashHasContents(main_contents))
@@ -2660,7 +2718,8 @@
         archive_files = getUtility(IArchiveFileSet).getByArchive(
             self.ubuntutest.main_archive)
         self.assertContentEqual(
-            ['dists/breezy-autotest/main/source/Sources.bz2',
+            ['dists/breezy-autotest/Release',
+             'dists/breezy-autotest/main/source/Sources.bz2',
              'dists/breezy-autotest/main/source/Sources.gz'],
             [archive_file.path for archive_file in archive_files
              if archive_file.scheduled_deletion_date is not None])
@@ -2674,11 +2733,11 @@
             self.ubuntutest.main_archive)
         suite_path = partial(
             os.path.join, self.config.distsroot, 'breezy-autotest')
-        get_contents_files = lambda: [
-            archive_file
-            for archive_file in getUtility(IArchiveFileSet).getByArchive(
-                self.ubuntutest.main_archive)
-            if archive_file.path.startswith('dists/breezy-autotest/Contents-')]
+        self.setUpMockTime()
+
+        def get_release_contents():
+            with open(suite_path('Release')) as f:
+                return f.read()
 
         # Create the first file.
         with open_for_writing(suite_path('Contents-i386'), 'w') as f:
@@ -2687,72 +2746,93 @@
             self.breezy_autotest, PackagePublishingPocket.RELEASE)
         self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
         flush_database_caches()
-        matchers = [
-            MatchesStructure(
-                path=Equals('dists/breezy-autotest/Contents-i386'),
-                scheduled_deletion_date=Is(None))]
-        self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
+        self.assertHasSuiteFiles(
+            ('Contents-*', 'Release'),
+            ('Contents-i386', None), ('Release', None))
+        releases = [get_release_contents()]
         self.assertThat(
-            suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+            suite_path('by-hash'),
+            ByHashHasContents(['A Contents file\n'] + releases))
 
         # Add a second identical file.
         with open_for_writing(suite_path('Contents-hppa'), 'w') as f:
             f.write('A Contents file\n')
+        self.advanceTime(delta=timedelta(hours=1))
         self.runSteps(publisher, step_d=True)
         flush_database_caches()
-        matchers.append(
-            MatchesStructure(
-                path=Equals('dists/breezy-autotest/Contents-hppa'),
-                scheduled_deletion_date=Is(None)))
-        self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
+        self.assertHasSuiteFiles(
+            ('Contents-*', 'Release'),
+            ('Contents-i386', None), ('Contents-hppa', None),
+            ('Release', self.times[1]), ('Release', None))
+        releases.append(get_release_contents())
         self.assertThat(
-            suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+            suite_path('by-hash'),
+            ByHashHasContents(['A Contents file\n'] + releases))
 
         # Delete the first file, but allow it its stay of execution.
         os.unlink(suite_path('Contents-i386'))
+        self.advanceTime(delta=timedelta(hours=1))
         self.runSteps(publisher, step_d=True)
         flush_database_caches()
-        matchers[0] = matchers[0].update(scheduled_deletion_date=Not(Is(None)))
-        self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
+        self.assertHasSuiteFiles(
+            ('Contents-*', 'Release'),
+            ('Contents-i386', self.times[2]), ('Contents-hppa', None),
+            ('Release', self.times[1]), ('Release', self.times[2]),
+            ('Release', None))
+        releases.append(get_release_contents())
         self.assertThat(
-            suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+            suite_path('by-hash'),
+            ByHashHasContents(['A Contents file\n'] + releases))
 
         # A no-op run leaves the scheduled deletion date intact.
+        self.advanceTime(delta=timedelta(hours=1))
+        self.runSteps(publisher, step_d=True)
+        flush_database_caches()
+        self.assertHasSuiteFiles(
+            ('Contents-*', 'Release'),
+            ('Contents-i386', self.times[2]), ('Contents-hppa', None),
+            ('Release', self.times[1]), ('Release', self.times[2]),
+            ('Release', self.times[3]), ('Release', None))
+        releases.append(get_release_contents())
+        self.assertThat(
+            suite_path('by-hash'),
+            ByHashHasContents(['A Contents file\n'] + releases))
+
+        # Arrange for the first file to be pruned, and delete the second
+        # file.  This also puts us past the stay of execution of the first
+        # two Release files.
         i386_file = getUtility(IArchiveFileSet).getByArchive(
             self.ubuntutest.main_archive,
             path=u'dists/breezy-autotest/Contents-i386').one()
-        i386_date = i386_file.scheduled_deletion_date
-        self.runSteps(publisher, step_d=True)
-        flush_database_caches()
-        matchers[0] = matchers[0].update(
-            scheduled_deletion_date=Equals(i386_date))
-        self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
-        self.assertThat(
-            suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
-
-        # Arrange for the first file to be pruned, and delete the second
-        # file.
-        now = datetime.now(pytz.UTC)
-        removeSecurityProxy(i386_file).scheduled_deletion_date = (
-            now - timedelta(hours=1))
+        self.advanceTime(
+            absolute=i386_file.scheduled_deletion_date + timedelta(minutes=5))
         os.unlink(suite_path('Contents-hppa'))
         self.runSteps(publisher, step_d=True)
         flush_database_caches()
-        matchers = [matchers[1].update(scheduled_deletion_date=Not(Is(None)))]
-        self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
+        self.assertHasSuiteFiles(
+            ('Contents-*', 'Release'),
+            ('Contents-hppa', self.times[4]),
+            ('Release', self.times[3]), ('Release', self.times[4]),
+            ('Release', None))
+        releases.append(get_release_contents())
         self.assertThat(
-            suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+            suite_path('by-hash'),
+            ByHashHasContents(['A Contents file\n'] + releases[2:]))
 
-        # Arrange for the second file to be pruned.
+        # Arrange for the second file to be pruned.  This also puts us past
+        # the stay of execution of the first two remaining Release files.
         hppa_file = getUtility(IArchiveFileSet).getByArchive(
             self.ubuntutest.main_archive,
             path=u'dists/breezy-autotest/Contents-hppa').one()
-        removeSecurityProxy(hppa_file).scheduled_deletion_date = (
-            now - timedelta(hours=1))
+        self.advanceTime(
+            absolute=hppa_file.scheduled_deletion_date + timedelta(minutes=5))
         self.runSteps(publisher, step_d=True)
         flush_database_caches()
-        self.assertContentEqual([], get_contents_files())
-        self.assertThat(suite_path('by-hash'), Not(PathExists()))
+        self.assertHasSuiteFiles(
+            ('Contents-*', 'Release'),
+            ('Release', self.times[5]), ('Release', None))
+        releases.append(get_release_contents())
+        self.assertThat(suite_path('by-hash'), ByHashHasContents(releases[4:]))
 
     def test_reprieve(self):
         # If a newly-modified index file is identical to a
@@ -2765,6 +2845,7 @@
         publisher = Publisher(
             self.logger, self.config, self.disk_pool,
             self.ubuntutest.main_archive)
+        self.setUpMockTime()
 
         # Publish empty index files.
         publisher.markPocketDirty(
@@ -2789,15 +2870,8 @@
             ByHashHasContents(main_contents))
 
         # Make the empty Sources file ready to prune.
-        old_archive_files = []
-        for archive_file in getUtility(IArchiveFileSet).getByArchive(
-                self.ubuntutest.main_archive):
-            if ('main/source' in archive_file.path and
-                    archive_file.scheduled_deletion_date is not None):
-                old_archive_files.append(archive_file)
-        self.assertEqual(1, len(old_archive_files))
-        removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
-            datetime.now(pytz.UTC) - timedelta(hours=1))
+        self.advanceTime(
+            delta=timedelta(days=BY_HASH_STAY_OF_EXECUTION, hours=1))
 
         # Delete the source package so that Sources is empty again.  The
         # empty file is reprieved and the non-empty one is condemned.
@@ -2818,6 +2892,7 @@
                 ]))
 
     def setUpPruneableSuite(self):
+        self.setUpMockTime()
         self.breezy_autotest.publish_by_hash = True
         self.breezy_autotest.advertise_by_hash = True
         publisher = Publisher(
@@ -2826,47 +2901,50 @@
 
         suite_path = partial(
             os.path.join, self.config.distsroot, 'breezy-autotest')
-        main_contents = set()
-        for sourcename in ('foo', 'bar'):
+        top_contents = []
+        main_contents = []
+        for sourcename in ('foo', 'bar', 'baz'):
             self.getPubSource(
                 sourcename=sourcename, filecontent='Source: %s\n' % sourcename)
             self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+            with open(suite_path('Release'), 'rb') as f:
+                top_contents.append(f.read())
             for name in ('Release', 'Sources.gz', 'Sources.bz2'):
                 with open(suite_path('main', 'source', name), 'rb') as f:
-                    main_contents.add(f.read())
+                    main_contents.append(f.read())
+            self.advanceTime(delta=timedelta(hours=6))
         transaction.commit()
 
+        # We have two condemned sets of index files and one uncondemned set.
+        # main/source/Release contains a small enough amount of information
+        # that it doesn't change.
+        expected_suite_files = (
+            list(product(
+                ('main/source/Sources.gz', 'main/source/Sources.bz2',
+                 'Release'),
+                (self.times[1], self.times[2], None))) +
+            [('main/source/Release', None)])
+        self.assertHasSuiteFiles(
+            ('main/source/*', 'Release'), *expected_suite_files)
+        self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
         self.assertThat(
             suite_path('main', 'source', 'by-hash'),
             ByHashHasContents(main_contents))
-        old_archive_files = []
-        for archive_file in getUtility(IArchiveFileSet).getByArchive(
-                self.ubuntutest.main_archive):
-            if ('main/source' in archive_file.path and
-                    archive_file.scheduled_deletion_date is not None):
-                old_archive_files.append(archive_file)
-        self.assertEqual(2, len(old_archive_files))
-
-        now = datetime.now(pytz.UTC)
-        removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
-            now + timedelta(hours=12))
-        removeSecurityProxy(old_archive_files[1]).scheduled_deletion_date = (
-            now - timedelta(hours=12))
-        old_archive_files[1].library_file.open()
-        try:
-            main_contents.remove(old_archive_files[1].library_file.read())
-        finally:
-            old_archive_files[1].library_file.close()
-        self.assertThat(
-            suite_path('main', 'source', 'by-hash'),
-            Not(ByHashHasContents(main_contents)))
-
-        return main_contents
+
+        # Advance time to the point where the first condemned set of index
+        # files is scheduled for deletion.
+        self.advanceTime(
+            absolute=self.times[1] + timedelta(
+                days=BY_HASH_STAY_OF_EXECUTION, hours=1))
+        del top_contents[0]
+        del main_contents[:3]
+
+        return top_contents, main_contents
 
     def test_prune(self):
         # The publisher prunes files from by-hash that were condemned more
         # than a day ago.
-        main_contents = self.setUpPruneableSuite()
+        top_contents, main_contents = self.setUpPruneableSuite()
         suite_path = partial(
             os.path.join, self.config.distsroot, 'breezy-autotest')
 
@@ -2876,7 +2954,19 @@
             self.logger, self.config, self.disk_pool,
             self.ubuntutest.main_archive)
         self.runSteps(publisher, step_a2=True, step_c=True, step_d=True)
+        transaction.commit()
         self.assertEqual(set(), publisher.dirty_pockets)
+        # The condemned index files are removed, and no new Release file is
+        # generated.
+        expected_suite_files = (
+            list(product(
+                ('main/source/Sources.gz', 'main/source/Sources.bz2'),
+                (self.times[2], None))) +
+            [('main/source/Release', None),
+             ('Release', self.times[2]), ('Release', None)])
+        self.assertHasSuiteFiles(
+            ('main/source/*', 'Release'), *expected_suite_files)
+        self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
         self.assertThat(
             suite_path('main', 'source', 'by-hash'),
             ByHashHasContents(main_contents))
@@ -2884,7 +2974,7 @@
     def test_prune_immutable(self):
         # The publisher prunes by-hash files from immutable suites, but
         # doesn't regenerate the Release file in that case.
-        main_contents = self.setUpPruneableSuite()
+        top_contents, main_contents = self.setUpPruneableSuite()
         suite_path = partial(
             os.path.join, self.config.distsroot, 'breezy-autotest')
         release_path = suite_path('Release')
@@ -2897,8 +2987,20 @@
             self.logger, self.config, self.disk_pool,
             self.ubuntutest.main_archive)
         self.runSteps(publisher, step_a2=True, step_c=True, step_d=True)
+        transaction.commit()
         self.assertEqual(set(), publisher.dirty_pockets)
         self.assertEqual(release_mtime, os.stat(release_path).st_mtime)
+        # The condemned index files are removed, and no new Release file is
+        # generated.
+        expected_suite_files = (
+            list(product(
+                ('main/source/Sources.gz', 'main/source/Sources.bz2'),
+                (self.times[2], None))) +
+            [('main/source/Release', None),
+             ('Release', self.times[2]), ('Release', None)])
+        self.assertHasSuiteFiles(
+            ('main/source/*', 'Release'), *expected_suite_files)
+        self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
         self.assertThat(
             suite_path('main', 'source', 'by-hash'),
             ByHashHasContents(main_contents))

=== modified file 'lib/lp/soyuz/model/archivefile.py'
--- lib/lp/soyuz/model/archivefile.py	2016-04-04 10:06:33 +0000
+++ lib/lp/soyuz/model/archivefile.py	2018-01-26 13:27:05 +0000
@@ -1,4 +1,4 @@
-# Copyright 2016 Canonical Ltd.  This software is licensed under the
+# Copyright 2016-2018 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """A file in an archive."""
@@ -33,6 +33,7 @@
     IMasterStore,
     IStore,
     )
+from lp.services.database.sqlbase import convert_storm_clause_to_string
 from lp.services.database.stormexpr import BulkUpdate
 from lp.services.librarian.interfaces import ILibraryFileAliasSet
 from lp.services.librarian.model import (
@@ -76,6 +77,15 @@
         self.scheduled_deletion_date = None
 
 
+def _now():
+    """Get the current transaction timestamp.
+
+    Tests can override this with a Storm expression or a `datetime` to
+    simulate time changes.
+    """
+    return UTC_NOW
+
+
 @implementer(IArchiveFileSet)
 class ArchiveFileSet:
     """See `IArchiveFileSet`."""
@@ -128,7 +138,7 @@
             ArchiveFile.library_file == LibraryFileAlias.id,
             LibraryFileAlias.content == LibraryFileContent.id,
             ]
-        new_date = UTC_NOW + stay_of_execution
+        new_date = _now() + stay_of_execution
         return_columns = [
             ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]
         return list(IMasterStore(ArchiveFile).execute(Returning(
@@ -162,7 +172,7 @@
     def getContainersToReap(archive, container_prefix=None):
         clauses = [
             ArchiveFile.archive == archive,
-            ArchiveFile.scheduled_deletion_date < UTC_NOW,
+            ArchiveFile.scheduled_deletion_date < _now(),
             ]
         if container_prefix is not None:
             clauses.append(ArchiveFile.container.startswith(container_prefix))
@@ -175,22 +185,20 @@
         # XXX cjwatson 2016-03-30 bug=322972: Requires manual SQL due to
         # lack of support for DELETE FROM ... USING ... in Storm.
         clauses = [
-            "ArchiveFile.archive = ?",
-            "ArchiveFile.scheduled_deletion_date < "
-                "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'",
-            "ArchiveFile.library_file = LibraryFileAlias.id",
-            "LibraryFileAlias.content = LibraryFileContent.id",
+            ArchiveFile.archive == archive,
+            ArchiveFile.scheduled_deletion_date < _now(),
+            ArchiveFile.library_file_id == LibraryFileAlias.id,
+            LibraryFileAlias.contentID == LibraryFileContent.id,
             ]
-        values = [archive.id]
         if container is not None:
-            clauses.append("ArchiveFile.container = ?")
-            values.append(container)
+            clauses.append(ArchiveFile.container == container)
+        where = convert_storm_clause_to_string(And(*clauses))
         return list(IMasterStore(ArchiveFile).execute("""
             DELETE FROM ArchiveFile
             USING LibraryFileAlias, LibraryFileContent
-            WHERE """ + " AND ".join(clauses) + """
+            WHERE """ + where + """
             RETURNING
                 ArchiveFile.container,
                 ArchiveFile.path,
                 LibraryFileContent.sha256
-            """, values))
+            """))


Follow ups