launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #20188
[Merge] lp:~cjwatson/launchpad/fix-by-hash-unschedule-deletion into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/fix-by-hash-unschedule-deletion into lp:launchpad.
Commit message:
Unschedule ArchiveFile rows for deletion more carefully, checking the path as well as the checksum.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/fix-by-hash-unschedule-deletion/+merge/290865
Unschedule ArchiveFile rows for deletion more carefully, checking the path as well as the checksum. This avoids spurious unschedule/schedule cycles in the case of multiple identical files where some are condemned and some aren't. Tested on dogfood and seems to be behaving itself.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/fix-by-hash-unschedule-deletion into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py 2016-04-02 21:22:50 +0000
+++ lib/lp/archivepublisher/publishing.py 2016-04-04 11:08:43 +0000
@@ -1010,12 +1010,20 @@
current_files[path] = (
current_entry["size"], current_entry["sha256"])
current_sha256_checksums.add(current_entry["sha256"])
- for container, path, sha256 in archive_file_set.unscheduleDeletion(
- self.archive, container=container,
- sha256_checksums=current_sha256_checksums):
- self.log.debug(
- "by-hash: Unscheduled %s for %s in %s for deletion" % (
- sha256, path, container))
+ uncondemned_files = set()
+ for db_file in archive_file_set.getByArchive(
+ self.archive, container=container, only_condemned=True,
+ eager_load=True):
+ if db_file.path in current_files:
+ current_sha256 = current_files[db_file.path][1]
+ if db_file.library_file.content.sha256 == current_sha256:
+ uncondemned_files.add(db_file)
+ if uncondemned_files:
+ for container, path, sha256 in archive_file_set.unscheduleDeletion(
+ uncondemned_files):
+ self.log.debug(
+ "by-hash: Unscheduled %s for %s in %s for deletion" % (
+ sha256, path, container))
# Remove any condemned files from the database whose stay of
# execution has elapsed. We ensure that we know about all the
@@ -1040,9 +1048,8 @@
condemned_files = set()
for db_file in db_files:
if db_file.scheduled_deletion_date is None:
- path = db_file.path
- if path in current_files:
- current_sha256 = current_files[path][1]
+ if db_file.path in current_files:
+ current_sha256 = current_files[db_file.path][1]
else:
current_sha256 = None
if db_file.library_file.content.sha256 != current_sha256:
=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py 2016-04-02 21:22:50 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py 2016-04-04 11:08:43 +0000
@@ -2372,12 +2372,22 @@
self.assertThat(
suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+ # A no-op run leaves the scheduled deletion date intact.
+ i386_file = getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive,
+ path=u'dists/breezy-autotest/Contents-i386').one()
+ i386_date = i386_file.scheduled_deletion_date
+ publisher.D_writeReleaseFiles(False)
+ flush_database_caches()
+ matchers[0] = matchers[0].update(
+ scheduled_deletion_date=Equals(i386_date))
+ self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
+ self.assertThat(
+ suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+
# Arrange for the first file to be pruned, and delete the second
# file.
now = datetime.now(pytz.UTC)
- i386_file = getUtility(IArchiveFileSet).getByArchive(
- self.ubuntutest.main_archive,
- path=u'dists/breezy-autotest/Contents-i386').one()
removeSecurityProxy(i386_file).scheduled_deletion_date = (
now - timedelta(hours=1))
os.unlink(suite_path('Contents-hppa'))
=== modified file 'lib/lp/soyuz/interfaces/archivefile.py'
--- lib/lp/soyuz/interfaces/archivefile.py 2016-03-30 08:25:23 +0000
+++ lib/lp/soyuz/interfaces/archivefile.py 2016-04-04 11:08:43 +0000
@@ -79,12 +79,15 @@
:param content_type: The MIME type of the file.
"""
- def getByArchive(archive, container=None, path=None, eager_load=False):
+ def getByArchive(archive, container=None, path=None, only_condemned=False,
+ eager_load=False):
"""Get files in an archive.
:param archive: Return files in this `IArchive`.
:param container: Return only files with this container.
:param path: Return only files with this path.
+ :param only_condemned: If True, return only files with a
+ scheduled_deletion_date set.
:param eager_load: If True, preload related `LibraryFileAlias` and
`LibraryFileContent` rows.
:return: An iterable of matched files.
@@ -100,19 +103,13 @@
were scheduled for deletion.
"""
- def unscheduleDeletion(archive, container=None, sha256_checksums=set()):
+ def unscheduleDeletion(archive_files):
"""Unschedule these archive files for deletion.
This is useful in the case when the new content of a file is
- identical to a version that was previously condemned. This method's
- signature does not match that of `scheduleDeletion`; this is more
- convenient because in such cases we normally do not yet have
- `ArchiveFile` rows in hand.
+ identical to a version that was previously condemned.
- :param archive: Operate on files in this `IArchive`.
- :param container: Operate only on files with this container.
- :param sha256_checksums: Operate only on files with any of these
- checksums.
+ :param archive_files: The `IArchiveFile`s to unschedule for deletion.
:return: An iterable of (container, path, sha256) for files that
were unscheduled for deletion.
"""
=== modified file 'lib/lp/soyuz/model/archivefile.py'
--- lib/lp/soyuz/model/archivefile.py 2016-04-03 02:55:28 +0000
+++ lib/lp/soyuz/model/archivefile.py 2016-04-04 11:08:43 +0000
@@ -96,7 +96,8 @@
return cls.new(archive, container, path, library_file)
@staticmethod
- def getByArchive(archive, container=None, path=None, eager_load=False):
+ def getByArchive(archive, container=None, path=None, only_condemned=False,
+ eager_load=False):
"""See `IArchiveFileSet`."""
clauses = [ArchiveFile.archive == archive]
# XXX cjwatson 2016-03-15: We'll need some more sophisticated way to
@@ -105,6 +106,8 @@
clauses.append(ArchiveFile.container == container)
if path is not None:
clauses.append(ArchiveFile.path == path)
+ if only_condemned:
+ clauses.append(ArchiveFile.scheduled_deletion_date != None)
archive_files = IStore(ArchiveFile).find(ArchiveFile, *clauses)
def eager_load(rows):
@@ -137,17 +140,14 @@
columns=return_columns)))
@staticmethod
- def unscheduleDeletion(archive, container=None, sha256_checksums=set()):
+ def unscheduleDeletion(archive_files):
"""See `IArchiveFileSet`."""
clauses = [
- ArchiveFile.archive == archive,
- ArchiveFile.scheduled_deletion_date != None,
+ ArchiveFile.id.is_in(
+ set(archive_file.id for archive_file in archive_files)),
ArchiveFile.library_file == LibraryFileAlias.id,
LibraryFileAlias.content == LibraryFileContent.id,
- LibraryFileContent.sha256.is_in(sha256_checksums),
]
- if container is not None:
- clauses.append(ArchiveFile.container == container)
return_columns = [
ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]
return list(IMasterStore(ArchiveFile).execute(Returning(
=== modified file 'lib/lp/soyuz/tests/test_archivefile.py'
--- lib/lp/soyuz/tests/test_archivefile.py 2016-03-30 08:25:23 +0000
+++ lib/lp/soyuz/tests/test_archivefile.py 2016-04-04 11:08:43 +0000
@@ -63,8 +63,10 @@
def test_getByArchive(self):
archives = [self.factory.makeArchive(), self.factory.makeArchive()]
archive_files = []
+ now = datetime.now(pytz.UTC)
for archive in archives:
- archive_files.append(self.factory.makeArchiveFile(archive=archive))
+ archive_files.append(self.factory.makeArchiveFile(
+ archive=archive, scheduled_deletion_date=now))
archive_files.append(self.factory.makeArchiveFile(
archive=archive, container="foo"))
archive_file_set = getUtility(IArchiveFileSet)
@@ -82,6 +84,9 @@
self.assertContentEqual(
[], archive_file_set.getByArchive(archives[0], path="other"))
self.assertContentEqual(
+ [archive_files[0]],
+ archive_file_set.getByArchive(archives[0], only_condemned=True))
+ self.assertContentEqual(
archive_files[2:], archive_file_set.getByArchive(archives[1]))
self.assertContentEqual(
[archive_files[3]],
@@ -94,6 +99,9 @@
archives[1], path=archive_files[3].path))
self.assertContentEqual(
[], archive_file_set.getByArchive(archives[1], path="other"))
+ self.assertContentEqual(
+ [archive_files[2]],
+ archive_file_set.getByArchive(archives[1], only_condemned=True))
def test_scheduleDeletion(self):
archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
@@ -116,32 +124,21 @@
self.assertIsNone(archive_files[2].scheduled_deletion_date)
def test_unscheduleDeletion(self):
- archives = [self.factory.makeArchive() for _ in range(2)]
- lfas = [
- self.factory.makeLibraryFileAlias(db_only=True) for _ in range(3)]
- archive_files = []
- for archive in archives:
- for container in ("foo", "bar"):
- archive_files.extend([
- self.factory.makeArchiveFile(
- archive=archive, container=container, library_file=lfa)
- for lfa in lfas])
+ archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
now = datetime.now(pytz.UTC)
for archive_file in archive_files:
removeSecurityProxy(archive_file).scheduled_deletion_date = now
expected_rows = [
- ("foo", archive_files[0].path, lfas[0].content.sha256),
- ("foo", archive_files[1].path, lfas[1].content.sha256),
- ]
+ (archive_file.container, archive_file.path,
+ archive_file.library_file.content.sha256)
+ for archive_file in archive_files[:2]]
rows = getUtility(IArchiveFileSet).unscheduleDeletion(
- archive=archives[0], container="foo",
- sha256_checksums=[lfas[0].content.sha256, lfas[1].content.sha256])
+ archive_files[:2])
self.assertContentEqual(expected_rows, rows)
flush_database_caches()
- self.assertContentEqual(
- [archive_files[0], archive_files[1]],
- [archive_file for archive_file in archive_files
- if archive_file.scheduled_deletion_date is None])
+ self.assertIsNone(archive_files[0].scheduled_deletion_date)
+ self.assertIsNone(archive_files[1].scheduled_deletion_date)
+ self.assertIsNotNone(archive_files[2].scheduled_deletion_date)
def test_getContainersToReap(self):
archive = self.factory.makeArchive()
=== modified file 'lib/lp/testing/factory.py'
--- lib/lp/testing/factory.py 2016-04-02 00:44:01 +0000
+++ lib/lp/testing/factory.py 2016-04-04 11:08:43 +0000
@@ -2888,7 +2888,7 @@
return person
def makeArchiveFile(self, archive=None, container=None, path=None,
- library_file=None):
+ library_file=None, scheduled_deletion_date=None):
if archive is None:
archive = self.makeArchive()
if container is None:
@@ -2897,9 +2897,13 @@
path = self.getUniqueUnicode()
if library_file is None:
library_file = self.makeLibraryFileAlias()
- return getUtility(IArchiveFileSet).new(
+ archive_file = getUtility(IArchiveFileSet).new(
archive=archive, container=container, path=path,
library_file=library_file)
+ if scheduled_deletion_date is not None:
+ removeSecurityProxy(archive_file).scheduled_deletion_date = (
+ scheduled_deletion_date)
+ return archive_file
def makeBuilder(self, processors=None, url=None, name=None, title=None,
owner=None, active=True, virtualized=True, vm_host=None,
Follow ups