← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:archive-file-history into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:archive-file-history into launchpad:master with ~cjwatson/launchpad:refactor-update-by-hash as a prerequisite.

Commit message:
Turn ArchiveFile into a history table

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1765933 in Launchpad itself: "Allow building livefses against a view of the archive at a fixed point in time"
  https://bugs.launchpad.net/launchpad/+bug/1765933

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/434581

This adds `date_created` and `date_superseded` columns, which we'll be able to use later to query for files that existed in an archive at a given point in time.

The publisher changes a little as a result of this, mostly simplifying it: unscheduling `ArchiveFile`s for deletion would now require creating new rows in order to keep the `date_created`/`date_superseded` windows intact, so we now just follow the normal code path to create new files in that case rather than bothering with the previous optimization.

I think the publisher tests are now somewhat clearer, since they now explicitly test creation dates, making the chain of events more obvious.

ArchiveFile.superseded_at is arguably redundant with ArchiveFile.scheduled_deletion_date, but I think keeping both is a good idea for clarity, and in case we ever change the stay of execution in future.

We'll need to backfill the new columns; see https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/390761.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:archive-file-history into launchpad:master.
diff --git a/lib/lp/archivepublisher/publishing.py b/lib/lp/archivepublisher/publishing.py
index b8cea18..07e32dd 100644
--- a/lib/lp/archivepublisher/publishing.py
+++ b/lib/lp/archivepublisher/publishing.py
@@ -1249,6 +1249,9 @@ class Publisher:
             # about to delete the only file and need to know to prune it.
             by_hashes.registerChild(os.path.dirname(strip_dists(db_file.path)))
 
+            # XXX cjwatson 2022-12-13: This should check
+            # db_file.date_superseded instead once that column has been
+            # backfilled.
             if db_file.scheduled_deletion_date is None:
                 # XXX wgrant 2020-09-16: Once we have
                 # ArchiveFile.date_superseded in place, this should be a DB
@@ -1285,12 +1288,17 @@ class Publisher:
             if key not in new_live_files
         ]
         if old_files:
-            for container, path, sha256 in archive_file_set.scheduleDeletion(
+            archive_file_set.scheduleDeletion(
                 old_files, timedelta(days=BY_HASH_STAY_OF_EXECUTION)
-            ):
+            )
+            for db_file in old_files:
                 self.log.debug(
                     "by-hash: Scheduled %s for %s in %s for deletion"
-                    % (sha256, path, container)
+                    % (
+                        db_file.library_file.content.sha256,
+                        db_file.path,
+                        db_file.container,
+                    )
                 )
 
         # Ensure that all the current index files are in by-hash and have
@@ -1302,41 +1310,23 @@ class Publisher:
             file_key = (path, sha256)
             full_path = os.path.join(self._config.distsroot, real_path)
             assert os.path.exists(full_path)  # guaranteed by _getCurrentFiles
-            # Ensure there's a current ArchiveFile row, either by finding a
-            # matching non-live file and marking it live again, or by
-            # creating a new one based on the file on disk.
+            # If there isn't a matching live ArchiveFile row, create one.
             if file_key not in existing_live_files:
-                if file_key in existing_nonlive_files:
-                    db_file = existing_nonlive_files[file_key]
-                    keep_files.add(db_file)
-                else:
-                    with open(full_path, "rb") as fileobj:
-                        db_file = archive_file_set.newFromFile(
-                            self.archive,
-                            container,
-                            os.path.join("dists", path),
-                            fileobj,
-                            size,
-                            filenameToContentType(path),
-                        )
+                with open(full_path, "rb") as fileobj:
+                    db_file = archive_file_set.newFromFile(
+                        self.archive,
+                        container,
+                        os.path.join("dists", path),
+                        fileobj,
+                        size,
+                        filenameToContentType(path),
+                    )
             # And ensure the by-hash links exist on disk.
             if not by_hashes.known(path, "SHA256", sha256):
                 by_hashes.add(
                     path, db_file.library_file, copy_from_path=real_path
                 )
 
-        # Unschedule the deletion of any ArchiveFiles which are current in
-        # the archive this round but that were previously scheduled for
-        # deletion in the DB.
-        if keep_files:
-            for container, path, sha256 in archive_file_set.unscheduleDeletion(
-                keep_files
-            ):
-                self.log.debug(
-                    "by-hash: Unscheduled %s for %s in %s for deletion"
-                    % (sha256, path, container)
-                )
-
         # Remove any files from disk that aren't recorded in the database.
         by_hashes.prune()
 
diff --git a/lib/lp/archivepublisher/tests/test_publisher.py b/lib/lp/archivepublisher/tests/test_publisher.py
index 2a35cf3..15dc4ac 100644
--- a/lib/lp/archivepublisher/tests/test_publisher.py
+++ b/lib/lp/archivepublisher/tests/test_publisher.py
@@ -1,4 +1,4 @@
-# Copyright 2009-2021 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2022 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Tests for publisher class."""
@@ -16,8 +16,6 @@ from collections import OrderedDict, defaultdict
 from datetime import datetime, timedelta
 from fnmatch import fnmatch
 from functools import partial
-from itertools import product
-from operator import attrgetter
 from textwrap import dedent
 from unittest import mock
 
@@ -36,7 +34,6 @@ from testtools.matchers import (
     LessThan,
     Matcher,
     MatchesDict,
-    MatchesListwise,
     MatchesSetwise,
     MatchesStructure,
     Not,
@@ -3061,12 +3058,12 @@ class TestUpdateByHash(TestPublisherBase):
             publisher.D_writeReleaseFiles(False)
 
     @classmethod
-    def _makeScheduledDeletionDateMatcher(cls, condemned_at):
-        if condemned_at is None:
+    def _makeScheduledDeletionDateMatcher(cls, superseded_at):
+        if superseded_at is None:
             return Is(None)
         else:
             return Equals(
-                condemned_at + timedelta(days=BY_HASH_STAY_OF_EXECUTION)
+                superseded_at + timedelta(days=BY_HASH_STAY_OF_EXECUTION)
             )
 
     def assertHasSuiteFiles(self, patterns, *properties):
@@ -3084,13 +3081,15 @@ class TestUpdateByHash(TestPublisherBase):
             if is_interesting(archive_file.path)
         ]
         matchers = []
-        for path, condemned_at in properties:
+        for path, created_at, superseded_at in properties:
             scheduled_deletion_date_matcher = (
-                self._makeScheduledDeletionDateMatcher(condemned_at)
+                self._makeScheduledDeletionDateMatcher(superseded_at)
             )
             matchers.append(
                 MatchesStructure(
                     path=Equals("dists/breezy-autotest/%s" % path),
+                    date_created=Equals(created_at),
+                    date_superseded=Equals(superseded_at),
                     scheduled_deletion_date=scheduled_deletion_date_matcher,
                 )
             )
@@ -3281,8 +3280,8 @@ class TestUpdateByHash(TestPublisherBase):
         flush_database_caches()
         self.assertHasSuiteFiles(
             ("Contents-*", "Release"),
-            ("Contents-i386", None),
-            ("Release", None),
+            ("Contents-i386", self.times[0], None),
+            ("Release", self.times[0], None),
         )
         releases = [get_release_contents()]
         self.assertThat(
@@ -3298,10 +3297,10 @@ class TestUpdateByHash(TestPublisherBase):
         flush_database_caches()
         self.assertHasSuiteFiles(
             ("Contents-*", "Release"),
-            ("Contents-i386", None),
-            ("Contents-hppa", None),
-            ("Release", self.times[1]),
-            ("Release", None),
+            ("Contents-i386", self.times[0], None),
+            ("Contents-hppa", self.times[1], None),
+            ("Release", self.times[0], self.times[1]),
+            ("Release", self.times[1], None),
         )
         releases.append(get_release_contents())
         self.assertThat(
@@ -3316,11 +3315,11 @@ class TestUpdateByHash(TestPublisherBase):
         flush_database_caches()
         self.assertHasSuiteFiles(
             ("Contents-*", "Release"),
-            ("Contents-i386", self.times[2]),
-            ("Contents-hppa", None),
-            ("Release", self.times[1]),
-            ("Release", self.times[2]),
-            ("Release", None),
+            ("Contents-i386", self.times[0], self.times[2]),
+            ("Contents-hppa", self.times[1], None),
+            ("Release", self.times[0], self.times[1]),
+            ("Release", self.times[1], self.times[2]),
+            ("Release", self.times[2], None),
         )
         releases.append(get_release_contents())
         self.assertThat(
@@ -3334,12 +3333,12 @@ class TestUpdateByHash(TestPublisherBase):
         flush_database_caches()
         self.assertHasSuiteFiles(
             ("Contents-*", "Release"),
-            ("Contents-i386", self.times[2]),
-            ("Contents-hppa", None),
-            ("Release", self.times[1]),
-            ("Release", self.times[2]),
-            ("Release", self.times[3]),
-            ("Release", None),
+            ("Contents-i386", self.times[0], self.times[2]),
+            ("Contents-hppa", self.times[1], None),
+            ("Release", self.times[0], self.times[1]),
+            ("Release", self.times[1], self.times[2]),
+            ("Release", self.times[2], self.times[3]),
+            ("Release", self.times[3], None),
         )
         releases.append(get_release_contents())
         self.assertThat(
@@ -3366,10 +3365,10 @@ class TestUpdateByHash(TestPublisherBase):
         flush_database_caches()
         self.assertHasSuiteFiles(
             ("Contents-*", "Release"),
-            ("Contents-hppa", self.times[4]),
-            ("Release", self.times[3]),
-            ("Release", self.times[4]),
-            ("Release", None),
+            ("Contents-hppa", self.times[1], self.times[4]),
+            ("Release", self.times[2], self.times[3]),
+            ("Release", self.times[3], self.times[4]),
+            ("Release", self.times[4], None),
         )
         releases.append(get_release_contents())
         self.assertThat(
@@ -3394,8 +3393,8 @@ class TestUpdateByHash(TestPublisherBase):
         flush_database_caches()
         self.assertHasSuiteFiles(
             ("Contents-*", "Release"),
-            ("Release", self.times[5]),
-            ("Release", None),
+            ("Release", self.times[4], self.times[5]),
+            ("Release", self.times[5], None),
         )
         releases.append(get_release_contents())
         self.assertThat(suite_path("by-hash"), ByHashHasContents(releases[4:]))
@@ -3429,9 +3428,14 @@ class TestUpdateByHash(TestPublisherBase):
         for name in ("Release", "Sources"):
             with open(suite_path("main", "source", name), "rb") as f:
                 main_contents.add(f.read())
+        self.assertHasSuiteFiles(
+            ("main/source/Sources",),
+            ("main/source/Sources", self.times[0], None),
+        )
 
         # Add a source package so that Sources is non-empty.
         pub_source = self.getPubSource(filecontent=b"Source: foo\n")
+        self.advanceTime(delta=timedelta(hours=1))
         self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
         transaction.commit()
         with open(suite_path("main", "source", "Sources"), "rb") as f:
@@ -3441,33 +3445,47 @@ class TestUpdateByHash(TestPublisherBase):
             suite_path("main", "source", "by-hash"),
             ByHashHasContents(main_contents),
         )
-
-        # Make the empty Sources file ready to prune.
-        self.advanceTime(
-            delta=timedelta(days=BY_HASH_STAY_OF_EXECUTION, hours=1)
+        self.assertHasSuiteFiles(
+            ("main/source/Sources",),
+            ("main/source/Sources", self.times[0], self.times[1]),
+            ("main/source/Sources", self.times[1], None),
         )
 
         # Delete the source package so that Sources is empty again.  The
-        # empty file is reprieved and the non-empty one is condemned.
+        # empty file is reprieved (by creating a new ArchiveFile referring
+        # to it) and the non-empty one is condemned.
         pub_source.requestDeletion(self.ubuntutest.owner)
+        self.advanceTime(delta=timedelta(hours=1))
         self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
         transaction.commit()
         self.assertThat(
             suite_path("main", "source", "by-hash"),
             ByHashHasContents(main_contents),
         )
-        archive_files = getUtility(IArchiveFileSet).getByArchive(
-            self.ubuntutest.main_archive,
-            path="dists/breezy-autotest/main/source/Sources",
+        self.assertHasSuiteFiles(
+            ("main/source/Sources",),
+            ("main/source/Sources", self.times[0], self.times[1]),
+            ("main/source/Sources", self.times[1], self.times[2]),
+            ("main/source/Sources", self.times[2], None),
+        )
+
+        # Make the first empty Sources file ready to prune.  This doesn't
+        # change the set of files on disk, because there's still a newer
+        # reference to the empty file.
+        self.advanceTime(
+            absolute=self.times[1]
+            + timedelta(days=BY_HASH_STAY_OF_EXECUTION, minutes=30)
         )
+        self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+        transaction.commit()
         self.assertThat(
-            sorted(archive_files, key=attrgetter("id")),
-            MatchesListwise(
-                [
-                    MatchesStructure(scheduled_deletion_date=Is(None)),
-                    MatchesStructure(scheduled_deletion_date=Not(Is(None))),
-                ]
-            ),
+            suite_path("main", "source", "by-hash"),
+            ByHashHasContents(main_contents),
+        )
+        self.assertHasSuiteFiles(
+            ("main/source/Sources",),
+            ("main/source/Sources", self.times[1], self.times[2]),
+            ("main/source/Sources", self.times[2], None),
         )
 
     def setUpPruneableSuite(self):
@@ -3503,18 +3521,18 @@ class TestUpdateByHash(TestPublisherBase):
         # We have two condemned sets of index files and one uncondemned set.
         # main/source/Release contains a small enough amount of information
         # that it doesn't change.
-        expected_suite_files = list(
-            product(
-                (
-                    "main/source/Sources.gz",
-                    "main/source/Sources.bz2",
-                    "Release",
-                ),
-                (self.times[1], self.times[2], None),
-            )
-        ) + [("main/source/Release", None)]
         self.assertHasSuiteFiles(
-            ("main/source/*", "Release"), *expected_suite_files
+            ("main/source/*", "Release"),
+            ("main/source/Sources.gz", self.times[0], self.times[1]),
+            ("main/source/Sources.gz", self.times[1], self.times[2]),
+            ("main/source/Sources.gz", self.times[2], None),
+            ("main/source/Sources.bz2", self.times[0], self.times[1]),
+            ("main/source/Sources.bz2", self.times[1], self.times[2]),
+            ("main/source/Sources.bz2", self.times[2], None),
+            ("main/source/Release", self.times[0], None),
+            ("Release", self.times[0], self.times[1]),
+            ("Release", self.times[1], self.times[2]),
+            ("Release", self.times[2], None),
         )
         self.assertThat(suite_path("by-hash"), ByHashHasContents(top_contents))
         self.assertThat(
@@ -3554,18 +3572,15 @@ class TestUpdateByHash(TestPublisherBase):
         self.assertEqual(set(), publisher.dirty_suites)
         # The condemned index files are removed, and no new Release file is
         # generated.
-        expected_suite_files = list(
-            product(
-                ("main/source/Sources.gz", "main/source/Sources.bz2"),
-                (self.times[2], None),
-            )
-        ) + [
-            ("main/source/Release", None),
-            ("Release", self.times[2]),
-            ("Release", None),
-        ]
         self.assertHasSuiteFiles(
-            ("main/source/*", "Release"), *expected_suite_files
+            ("main/source/*", "Release"),
+            ("main/source/Sources.gz", self.times[1], self.times[2]),
+            ("main/source/Sources.gz", self.times[2], None),
+            ("main/source/Sources.bz2", self.times[1], self.times[2]),
+            ("main/source/Sources.bz2", self.times[2], None),
+            ("main/source/Release", self.times[0], None),
+            ("Release", self.times[1], self.times[2]),
+            ("Release", self.times[2], None),
         )
         self.assertThat(suite_path("by-hash"), ByHashHasContents(top_contents))
         self.assertThat(
@@ -3598,18 +3613,15 @@ class TestUpdateByHash(TestPublisherBase):
         self.assertEqual(release_mtime, os.stat(release_path).st_mtime)
         # The condemned index files are removed, and no new Release file is
         # generated.
-        expected_suite_files = list(
-            product(
-                ("main/source/Sources.gz", "main/source/Sources.bz2"),
-                (self.times[2], None),
-            )
-        ) + [
-            ("main/source/Release", None),
-            ("Release", self.times[2]),
-            ("Release", None),
-        ]
         self.assertHasSuiteFiles(
-            ("main/source/*", "Release"), *expected_suite_files
+            ("main/source/*", "Release"),
+            ("main/source/Sources.gz", self.times[1], self.times[2]),
+            ("main/source/Sources.gz", self.times[2], None),
+            ("main/source/Sources.bz2", self.times[1], self.times[2]),
+            ("main/source/Sources.bz2", self.times[2], None),
+            ("main/source/Release", self.times[0], None),
+            ("Release", self.times[1], self.times[2]),
+            ("Release", self.times[2], None),
         )
         self.assertThat(suite_path("by-hash"), ByHashHasContents(top_contents))
         self.assertThat(
diff --git a/lib/lp/soyuz/interfaces/archivefile.py b/lib/lp/soyuz/interfaces/archivefile.py
index 1078039..fff4db8 100644
--- a/lib/lp/soyuz/interfaces/archivefile.py
+++ b/lib/lp/soyuz/interfaces/archivefile.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Canonical Ltd.  This software is licensed under the
+# Copyright 2016-2022 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Interface for a file in an archive."""
@@ -52,6 +52,20 @@ class IArchiveFile(Interface):
         readonly=True,
     )
 
+    date_created = Datetime(
+        title=_("The date when this file was created."),
+        # XXX cjwatson 2018-04-17: Should be required=True, but we need to
+        # backfill existing rows first.
+        required=False,
+        readonly=False,
+    )
+
+    date_superseded = Datetime(
+        title=_("The date when this file was scheduled for future deletion."),
+        required=False,
+        readonly=False,
+    )
+
     scheduled_deletion_date = Datetime(
         title=_("The date when this file should stop being published."),
         required=False,
@@ -116,19 +130,6 @@ class IArchiveFileSet(Interface):
         :param archive_files: The `IArchiveFile`s to schedule for deletion.
         :param stay_of_execution: A `timedelta`; schedule files for deletion
             this amount of time in the future.
-        :return: An iterable of (container, path, sha256) for files that
-            were scheduled for deletion.
-        """
-
-    def unscheduleDeletion(archive_files):
-        """Unschedule these archive files for deletion.
-
-        This is useful in the case when the new content of a file is
-        identical to a version that was previously condemned.
-
-        :param archive_files: The `IArchiveFile`s to unschedule for deletion.
-        :return: An iterable of (container, path, sha256) for files that
-            were unscheduled for deletion.
         """
 
     def getContainersToReap(archive, container_prefix=None):
diff --git a/lib/lp/soyuz/model/archivefile.py b/lib/lp/soyuz/model/archivefile.py
index 3aecb98..43e27b9 100644
--- a/lib/lp/soyuz/model/archivefile.py
+++ b/lib/lp/soyuz/model/archivefile.py
@@ -1,4 +1,4 @@
-# Copyright 2016-2018 Canonical Ltd.  This software is licensed under the
+# Copyright 2016-2022 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """A file in an archive."""
@@ -12,7 +12,6 @@ import os.path
 import re
 
 import pytz
-from storm.databases.postgres import Returning
 from storm.locals import And, DateTime, Int, Reference, Storm, Unicode
 from zope.component import getUtility
 from zope.interface import implementer
@@ -22,12 +21,21 @@ from lp.services.database.constants import UTC_NOW
 from lp.services.database.decoratedresultset import DecoratedResultSet
 from lp.services.database.interfaces import IPrimaryStore, IStore
 from lp.services.database.sqlbase import convert_storm_clause_to_string
-from lp.services.database.stormexpr import BulkUpdate, RegexpMatch
+from lp.services.database.stormexpr import RegexpMatch
 from lp.services.librarian.interfaces import ILibraryFileAliasSet
 from lp.services.librarian.model import LibraryFileAlias, LibraryFileContent
 from lp.soyuz.interfaces.archivefile import IArchiveFile, IArchiveFileSet
 
 
+def _now():
+    """Get the current transaction timestamp.
+
+    Tests can override this with a Storm expression or a `datetime` to
+    simulate time changes.
+    """
+    return UTC_NOW
+
+
 @implementer(IArchiveFile)
 class ArchiveFile(Storm):
     """See `IArchiveFile`."""
@@ -46,6 +54,18 @@ class ArchiveFile(Storm):
     library_file_id = Int(name="library_file", allow_none=False)
     library_file = Reference(library_file_id, "LibraryFileAlias.id")
 
+    date_created = DateTime(
+        name="date_created",
+        tzinfo=pytz.UTC,
+        # XXX cjwatson 2018-04-17: Should be allow_none=False, but we need
+        # to backfill existing rows first.
+        allow_none=True,
+    )
+
+    date_superseded = DateTime(
+        name="date_superseded", tzinfo=pytz.UTC, allow_none=True
+    )
+
     scheduled_deletion_date = DateTime(
         name="scheduled_deletion_date", tzinfo=pytz.UTC, allow_none=True
     )
@@ -57,18 +77,11 @@ class ArchiveFile(Storm):
         self.container = container
         self.path = path
         self.library_file = library_file
+        self.date_created = _now()
+        self.date_superseded = None
         self.scheduled_deletion_date = None
 
 
-def _now():
-    """Get the current transaction timestamp.
-
-    Tests can override this with a Storm expression or a `datetime` to
-    simulate time changes.
-    """
-    return UTC_NOW
-
-
 @implementer(IArchiveFileSet)
 class ArchiveFileSet:
     """See `IArchiveFileSet`."""
@@ -145,60 +158,15 @@ class ArchiveFileSet:
     @staticmethod
     def scheduleDeletion(archive_files, stay_of_execution):
         """See `IArchiveFileSet`."""
-        clauses = [
+        rows = IPrimaryStore(ArchiveFile).find(
+            ArchiveFile,
             ArchiveFile.id.is_in(
                 {archive_file.id for archive_file in archive_files}
             ),
-            ArchiveFile.library_file == LibraryFileAlias.id,
-            LibraryFileAlias.content == LibraryFileContent.id,
-        ]
-        new_date = _now() + stay_of_execution
-        return_columns = [
-            ArchiveFile.container,
-            ArchiveFile.path,
-            LibraryFileContent.sha256,
-        ]
-        return list(
-            IPrimaryStore(ArchiveFile).execute(
-                Returning(
-                    BulkUpdate(
-                        {ArchiveFile.scheduled_deletion_date: new_date},
-                        table=ArchiveFile,
-                        values=[LibraryFileAlias, LibraryFileContent],
-                        where=And(*clauses),
-                    ),
-                    columns=return_columns,
-                )
-            )
         )
-
-    @staticmethod
-    def unscheduleDeletion(archive_files):
-        """See `IArchiveFileSet`."""
-        clauses = [
-            ArchiveFile.id.is_in(
-                {archive_file.id for archive_file in archive_files}
-            ),
-            ArchiveFile.library_file == LibraryFileAlias.id,
-            LibraryFileAlias.content == LibraryFileContent.id,
-        ]
-        return_columns = [
-            ArchiveFile.container,
-            ArchiveFile.path,
-            LibraryFileContent.sha256,
-        ]
-        return list(
-            IPrimaryStore(ArchiveFile).execute(
-                Returning(
-                    BulkUpdate(
-                        {ArchiveFile.scheduled_deletion_date: None},
-                        table=ArchiveFile,
-                        values=[LibraryFileAlias, LibraryFileContent],
-                        where=And(*clauses),
-                    ),
-                    columns=return_columns,
-                )
-            )
+        rows.set(
+            date_superseded=_now(),
+            scheduled_deletion_date=_now() + stay_of_execution,
         )
 
     @staticmethod
diff --git a/lib/lp/soyuz/tests/test_archivefile.py b/lib/lp/soyuz/tests/test_archivefile.py
index f3febf1..8a7ea20 100644
--- a/lib/lp/soyuz/tests/test_archivefile.py
+++ b/lib/lp/soyuz/tests/test_archivefile.py
@@ -1,4 +1,4 @@
-# Copyright 2016 Canonical Ltd.  This software is licensed under the
+# Copyright 2016-2022 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """ArchiveFile tests."""
@@ -8,6 +8,7 @@ from datetime import timedelta
 
 import transaction
 from storm.store import Store
+from testtools.matchers import AfterPreprocessing, Equals, Is, MatchesStructure
 from zope.component import getUtility
 from zope.security.proxy import removeSecurityProxy
 
@@ -21,6 +22,14 @@ from lp.testing import TestCaseWithFactory
 from lp.testing.layers import LaunchpadZopelessLayer
 
 
+def read_library_file(library_file):
+    library_file.open()
+    try:
+        return library_file.read()
+    finally:
+        library_file.close()
+
+
 class TestArchiveFile(TestCaseWithFactory):
 
     layer = LaunchpadZopelessLayer
@@ -31,11 +40,20 @@ class TestArchiveFile(TestCaseWithFactory):
         archive_file = getUtility(IArchiveFileSet).new(
             archive, "foo", "dists/foo", library_file
         )
-        self.assertEqual(archive, archive_file.archive)
-        self.assertEqual("foo", archive_file.container)
-        self.assertEqual("dists/foo", archive_file.path)
-        self.assertEqual(library_file, archive_file.library_file)
-        self.assertIsNone(archive_file.scheduled_deletion_date)
+        self.assertThat(
+            archive_file,
+            MatchesStructure(
+                archive=Equals(archive),
+                container=Equals("foo"),
+                path=Equals("dists/foo"),
+                library_file=Equals(library_file),
+                date_created=Equals(
+                    get_transaction_timestamp(Store.of(archive_file))
+                ),
+                date_superseded=Is(None),
+                scheduled_deletion_date=Is(None),
+            ),
+        )
 
     def test_newFromFile(self):
         root = self.makeTemporaryDirectory()
@@ -46,16 +64,22 @@ class TestArchiveFile(TestCaseWithFactory):
             archive_file = getUtility(IArchiveFileSet).newFromFile(
                 archive, "foo", "dists/foo", f, 4, "text/plain"
             )
+        now = get_transaction_timestamp(Store.of(archive_file))
         transaction.commit()
-        self.assertEqual(archive, archive_file.archive)
-        self.assertEqual("foo", archive_file.container)
-        self.assertEqual("dists/foo", archive_file.path)
-        archive_file.library_file.open()
-        try:
-            self.assertEqual(b"abc\n", archive_file.library_file.read())
-        finally:
-            archive_file.library_file.close()
-        self.assertIsNone(archive_file.scheduled_deletion_date)
+        self.assertThat(
+            archive_file,
+            MatchesStructure(
+                archive=Equals(archive),
+                container=Equals("foo"),
+                path=Equals("dists/foo"),
+                library_file=AfterPreprocessing(
+                    read_library_file, Equals(b"abc\n")
+                ),
+                date_created=Equals(now),
+                date_superseded=Is(None),
+                scheduled_deletion_date=Is(None),
+            ),
+        )
 
     def test_getByArchive(self):
         archives = [self.factory.makeArchive(), self.factory.makeArchive()]
@@ -164,48 +188,19 @@ class TestArchiveFile(TestCaseWithFactory):
 
     def test_scheduleDeletion(self):
         archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
-        expected_rows = [
-            (
-                archive_file.container,
-                archive_file.path,
-                archive_file.library_file.content.sha256,
-            )
-            for archive_file in archive_files[:2]
-        ]
-        rows = getUtility(IArchiveFileSet).scheduleDeletion(
+        getUtility(IArchiveFileSet).scheduleDeletion(
             archive_files[:2], timedelta(days=1)
         )
-        self.assertContentEqual(expected_rows, rows)
         flush_database_caches()
-        tomorrow = get_transaction_timestamp(
-            Store.of(archive_files[0])
-        ) + timedelta(days=1)
+        now = get_transaction_timestamp(Store.of(archive_files[0]))
+        tomorrow = now + timedelta(days=1)
+        self.assertEqual(now, archive_files[0].date_superseded)
         self.assertEqual(tomorrow, archive_files[0].scheduled_deletion_date)
+        self.assertEqual(now, archive_files[1].date_superseded)
         self.assertEqual(tomorrow, archive_files[1].scheduled_deletion_date)
+        self.assertIsNone(archive_files[2].date_superseded)
         self.assertIsNone(archive_files[2].scheduled_deletion_date)
 
-    def test_unscheduleDeletion(self):
-        archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
-        now = get_transaction_timestamp(Store.of(archive_files[0]))
-        for archive_file in archive_files:
-            removeSecurityProxy(archive_file).scheduled_deletion_date = now
-        expected_rows = [
-            (
-                archive_file.container,
-                archive_file.path,
-                archive_file.library_file.content.sha256,
-            )
-            for archive_file in archive_files[:2]
-        ]
-        rows = getUtility(IArchiveFileSet).unscheduleDeletion(
-            archive_files[:2]
-        )
-        self.assertContentEqual(expected_rows, rows)
-        flush_database_caches()
-        self.assertIsNone(archive_files[0].scheduled_deletion_date)
-        self.assertIsNone(archive_files[1].scheduled_deletion_date)
-        self.assertEqual(now, archive_files[2].scheduled_deletion_date)
-
     def test_getContainersToReap(self):
         archive = self.factory.makeArchive()
         archive_files = []