← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/fix-by-hash-unschedule-deletion into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/fix-by-hash-unschedule-deletion into lp:launchpad.

Commit message:
Unschedule ArchiveFile rows for deletion more carefully, checking the path as well as the checksum.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/fix-by-hash-unschedule-deletion/+merge/290865

Unschedule ArchiveFile rows for deletion more carefully, checking the path as well as the checksum.  This avoids spurious unschedule/schedule cycles in the case of multiple identical files where some are condemned and some aren't.  Tested on dogfood and seems to be behaving itself.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/fix-by-hash-unschedule-deletion into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py	2016-04-02 21:22:50 +0000
+++ lib/lp/archivepublisher/publishing.py	2016-04-04 11:08:43 +0000
@@ -1010,12 +1010,20 @@
             current_files[path] = (
                 current_entry["size"], current_entry["sha256"])
             current_sha256_checksums.add(current_entry["sha256"])
-        for container, path, sha256 in archive_file_set.unscheduleDeletion(
-                self.archive, container=container,
-                sha256_checksums=current_sha256_checksums):
-            self.log.debug(
-                "by-hash: Unscheduled %s for %s in %s for deletion" % (
-                    sha256, path, container))
+        uncondemned_files = set()
+        for db_file in archive_file_set.getByArchive(
+                self.archive, container=container, only_condemned=True,
+                eager_load=True):
+            if db_file.path in current_files:
+                current_sha256 = current_files[db_file.path][1]
+                if db_file.library_file.content.sha256 == current_sha256:
+                    uncondemned_files.add(db_file)
+        if uncondemned_files:
+            for container, path, sha256 in archive_file_set.unscheduleDeletion(
+                    uncondemned_files):
+                self.log.debug(
+                    "by-hash: Unscheduled %s for %s in %s for deletion" % (
+                        sha256, path, container))
 
         # Remove any condemned files from the database whose stay of
         # execution has elapsed.  We ensure that we know about all the
@@ -1040,9 +1048,8 @@
         condemned_files = set()
         for db_file in db_files:
             if db_file.scheduled_deletion_date is None:
-                path = db_file.path
-                if path in current_files:
-                    current_sha256 = current_files[path][1]
+                if db_file.path in current_files:
+                    current_sha256 = current_files[db_file.path][1]
                 else:
                     current_sha256 = None
                 if db_file.library_file.content.sha256 != current_sha256:

=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py	2016-04-02 21:22:50 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py	2016-04-04 11:08:43 +0000
@@ -2372,12 +2372,22 @@
         self.assertThat(
             suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
 
+        # A no-op run leaves the scheduled deletion date intact.
+        i386_file = getUtility(IArchiveFileSet).getByArchive(
+            self.ubuntutest.main_archive,
+            path=u'dists/breezy-autotest/Contents-i386').one()
+        i386_date = i386_file.scheduled_deletion_date
+        publisher.D_writeReleaseFiles(False)
+        flush_database_caches()
+        matchers[0] = matchers[0].update(
+            scheduled_deletion_date=Equals(i386_date))
+        self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
+        self.assertThat(
+            suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+
         # Arrange for the first file to be pruned, and delete the second
         # file.
         now = datetime.now(pytz.UTC)
-        i386_file = getUtility(IArchiveFileSet).getByArchive(
-            self.ubuntutest.main_archive,
-            path=u'dists/breezy-autotest/Contents-i386').one()
         removeSecurityProxy(i386_file).scheduled_deletion_date = (
             now - timedelta(hours=1))
         os.unlink(suite_path('Contents-hppa'))

=== modified file 'lib/lp/soyuz/interfaces/archivefile.py'
--- lib/lp/soyuz/interfaces/archivefile.py	2016-03-30 08:25:23 +0000
+++ lib/lp/soyuz/interfaces/archivefile.py	2016-04-04 11:08:43 +0000
@@ -79,12 +79,15 @@
         :param content_type: The MIME type of the file.
         """
 
-    def getByArchive(archive, container=None, path=None, eager_load=False):
+    def getByArchive(archive, container=None, path=None, only_condemned=False,
+                     eager_load=False):
         """Get files in an archive.
 
         :param archive: Return files in this `IArchive`.
         :param container: Return only files with this container.
         :param path: Return only files with this path.
+        :param only_condemned: If True, return only files with a
+            scheduled_deletion_date set.
         :param eager_load: If True, preload related `LibraryFileAlias` and
             `LibraryFileContent` rows.
         :return: An iterable of matched files.
@@ -100,19 +103,13 @@
             were scheduled for deletion.
         """
 
-    def unscheduleDeletion(archive, container=None, sha256_checksums=set()):
+    def unscheduleDeletion(archive_files):
         """Unschedule these archive files for deletion.
 
         This is useful in the case when the new content of a file is
-        identical to a version that was previously condemned.  This method's
-        signature does not match that of `scheduleDeletion`; this is more
-        convenient because in such cases we normally do not yet have
-        `ArchiveFile` rows in hand.
+        identical to a version that was previously condemned.
 
-        :param archive: Operate on files in this `IArchive`.
-        :param container: Operate only on files with this container.
-        :param sha256_checksums: Operate only on files with any of these
-            checksums.
+        :param archive_files: The `IArchiveFile`s to unschedule for deletion.
         :return: An iterable of (container, path, sha256) for files that
             were unscheduled for deletion.
         """

=== modified file 'lib/lp/soyuz/model/archivefile.py'
--- lib/lp/soyuz/model/archivefile.py	2016-04-03 02:55:28 +0000
+++ lib/lp/soyuz/model/archivefile.py	2016-04-04 11:08:43 +0000
@@ -96,7 +96,8 @@
         return cls.new(archive, container, path, library_file)
 
     @staticmethod
-    def getByArchive(archive, container=None, path=None, eager_load=False):
+    def getByArchive(archive, container=None, path=None, only_condemned=False,
+                     eager_load=False):
         """See `IArchiveFileSet`."""
         clauses = [ArchiveFile.archive == archive]
         # XXX cjwatson 2016-03-15: We'll need some more sophisticated way to
@@ -105,6 +106,8 @@
             clauses.append(ArchiveFile.container == container)
         if path is not None:
             clauses.append(ArchiveFile.path == path)
+        if only_condemned:
+            clauses.append(ArchiveFile.scheduled_deletion_date != None)
         archive_files = IStore(ArchiveFile).find(ArchiveFile, *clauses)
 
         def eager_load(rows):
@@ -137,17 +140,14 @@
             columns=return_columns)))
 
     @staticmethod
-    def unscheduleDeletion(archive, container=None, sha256_checksums=set()):
+    def unscheduleDeletion(archive_files):
         """See `IArchiveFileSet`."""
         clauses = [
-            ArchiveFile.archive == archive,
-            ArchiveFile.scheduled_deletion_date != None,
+            ArchiveFile.id.is_in(
+                set(archive_file.id for archive_file in archive_files)),
             ArchiveFile.library_file == LibraryFileAlias.id,
             LibraryFileAlias.content == LibraryFileContent.id,
-            LibraryFileContent.sha256.is_in(sha256_checksums),
             ]
-        if container is not None:
-            clauses.append(ArchiveFile.container == container)
         return_columns = [
             ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]
         return list(IMasterStore(ArchiveFile).execute(Returning(

=== modified file 'lib/lp/soyuz/tests/test_archivefile.py'
--- lib/lp/soyuz/tests/test_archivefile.py	2016-03-30 08:25:23 +0000
+++ lib/lp/soyuz/tests/test_archivefile.py	2016-04-04 11:08:43 +0000
@@ -63,8 +63,10 @@
     def test_getByArchive(self):
         archives = [self.factory.makeArchive(), self.factory.makeArchive()]
         archive_files = []
+        now = datetime.now(pytz.UTC)
         for archive in archives:
-            archive_files.append(self.factory.makeArchiveFile(archive=archive))
+            archive_files.append(self.factory.makeArchiveFile(
+                archive=archive, scheduled_deletion_date=now))
             archive_files.append(self.factory.makeArchiveFile(
                 archive=archive, container="foo"))
         archive_file_set = getUtility(IArchiveFileSet)
@@ -82,6 +84,9 @@
         self.assertContentEqual(
             [], archive_file_set.getByArchive(archives[0], path="other"))
         self.assertContentEqual(
+            [archive_files[0]],
+            archive_file_set.getByArchive(archives[0], only_condemned=True))
+        self.assertContentEqual(
             archive_files[2:], archive_file_set.getByArchive(archives[1]))
         self.assertContentEqual(
             [archive_files[3]],
@@ -94,6 +99,9 @@
                 archives[1], path=archive_files[3].path))
         self.assertContentEqual(
             [], archive_file_set.getByArchive(archives[1], path="other"))
+        self.assertContentEqual(
+            [archive_files[2]],
+            archive_file_set.getByArchive(archives[1], only_condemned=True))
 
     def test_scheduleDeletion(self):
         archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
@@ -116,32 +124,21 @@
         self.assertIsNone(archive_files[2].scheduled_deletion_date)
 
     def test_unscheduleDeletion(self):
-        archives = [self.factory.makeArchive() for _ in range(2)]
-        lfas = [
-            self.factory.makeLibraryFileAlias(db_only=True) for _ in range(3)]
-        archive_files = []
-        for archive in archives:
-            for container in ("foo", "bar"):
-                archive_files.extend([
-                    self.factory.makeArchiveFile(
-                        archive=archive, container=container, library_file=lfa)
-                    for lfa in lfas])
+        archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
         now = datetime.now(pytz.UTC)
         for archive_file in archive_files:
             removeSecurityProxy(archive_file).scheduled_deletion_date = now
         expected_rows = [
-            ("foo", archive_files[0].path, lfas[0].content.sha256),
-            ("foo", archive_files[1].path, lfas[1].content.sha256),
-            ]
+            (archive_file.container, archive_file.path,
+             archive_file.library_file.content.sha256)
+            for archive_file in archive_files[:2]]
         rows = getUtility(IArchiveFileSet).unscheduleDeletion(
-            archive=archives[0], container="foo",
-            sha256_checksums=[lfas[0].content.sha256, lfas[1].content.sha256])
+            archive_files[:2])
         self.assertContentEqual(expected_rows, rows)
         flush_database_caches()
-        self.assertContentEqual(
-            [archive_files[0], archive_files[1]],
-            [archive_file for archive_file in archive_files
-             if archive_file.scheduled_deletion_date is None])
+        self.assertIsNone(archive_files[0].scheduled_deletion_date)
+        self.assertIsNone(archive_files[1].scheduled_deletion_date)
+        self.assertIsNotNone(archive_files[2].scheduled_deletion_date)
 
     def test_getContainersToReap(self):
         archive = self.factory.makeArchive()

=== modified file 'lib/lp/testing/factory.py'
--- lib/lp/testing/factory.py	2016-04-02 00:44:01 +0000
+++ lib/lp/testing/factory.py	2016-04-04 11:08:43 +0000
@@ -2888,7 +2888,7 @@
         return person
 
     def makeArchiveFile(self, archive=None, container=None, path=None,
-                        library_file=None):
+                        library_file=None, scheduled_deletion_date=None):
         if archive is None:
             archive = self.makeArchive()
         if container is None:
@@ -2897,9 +2897,13 @@
             path = self.getUniqueUnicode()
         if library_file is None:
             library_file = self.makeLibraryFileAlias()
-        return getUtility(IArchiveFileSet).new(
+        archive_file = getUtility(IArchiveFileSet).new(
             archive=archive, container=container, path=path,
             library_file=library_file)
+        if scheduled_deletion_date is not None:
+            removeSecurityProxy(archive_file).scheduled_deletion_date = (
+                scheduled_deletion_date)
+        return archive_file
 
     def makeBuilder(self, processors=None, url=None, name=None, title=None,
                     owner=None, active=True, virtualized=True, vm_host=None,


Follow ups