launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #20206
[Merge] lp:~cjwatson/launchpad/by-hash-dists-in-progress into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/by-hash-dists-in-progress into lp:launchpad.
Commit message:
Always store ArchiveFile.path as if the distsroot were not overridden.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/by-hash-dists-in-progress/+merge/291065
Always store ArchiveFile.path as if the distsroot were not overridden. PublishFTPMaster overrides it by appending ".in-progress", but we want to store the canonical form in the database.
The diff is a bit large, but almost all of it is just moving the various _updateByHash tests out to their own class in order that it's easy to run all of them both with and without an overridden distsroot.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/by-hash-dists-in-progress into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py 2016-04-05 08:55:55 +0000
+++ lib/lp/archivepublisher/publishing.py 2016-04-06 00:46:47 +0000
@@ -1002,12 +1002,16 @@
entries that ceased to be current sufficiently long ago are removed.
"""
archive_file_set = getUtility(IArchiveFileSet)
- by_hashes = ByHashes(self._config.archiveroot, self.log)
+ by_hashes = ByHashes(self._config.distsroot, self.log)
suite_dir = os.path.relpath(
os.path.join(self._config.distsroot, suite),
- self._config.archiveroot)
+ self._config.distsroot)
container = "release:%s" % suite
+ def strip_dists(path):
+ assert path.startswith("dists/")
+ return path[len("dists/"):]
+
# Gather information on entries in the current Release file, and
# make sure nothing there is condemned.
current_files = {}
@@ -1021,8 +1025,9 @@
for db_file in archive_file_set.getByArchive(
self.archive, container=container, only_condemned=True,
eager_load=True):
- if db_file.path in current_files:
- current_sha256 = current_files[db_file.path][1]
+ stripped_path = strip_dists(db_file.path)
+ if stripped_path in current_files:
+ current_sha256 = current_files[stripped_path][1]
if db_file.library_file.content.sha256 == current_sha256:
uncondemned_files.add(db_file)
if uncondemned_files:
@@ -1038,7 +1043,7 @@
# we can prune them properly later.
for db_file in archive_file_set.getByArchive(
self.archive, container=container):
- by_hashes.registerChild(os.path.dirname(db_file.path))
+ by_hashes.registerChild(os.path.dirname(strip_dists(db_file.path)))
for container, path, sha256 in archive_file_set.reap(
self.archive, container=container):
self.log.debug(
@@ -1048,15 +1053,16 @@
db_files = archive_file_set.getByArchive(
self.archive, container=container, eager_load=True)
for db_file in db_files:
- by_hashes.add(db_file.path, db_file.library_file)
+ by_hashes.add(strip_dists(db_file.path), db_file.library_file)
# Condemn any database records that do not correspond to current
# index files.
condemned_files = set()
for db_file in db_files:
if db_file.scheduled_deletion_date is None:
- if db_file.path in current_files:
- current_sha256 = current_files[db_file.path][1]
+ stripped_path = strip_dists(db_file.path)
+ if stripped_path in current_files:
+ current_sha256 = current_files[stripped_path][1]
else:
current_sha256 = None
if db_file.library_file.content.sha256 != current_sha256:
@@ -1075,13 +1081,13 @@
# although we can only avoid about a third of the queries since the
# librarian client has no bulk upload methods.
for path, (size, sha256) in current_files.items():
- full_path = os.path.join(self._config.archiveroot, path)
+ full_path = os.path.join(self._config.distsroot, path)
if (os.path.exists(full_path) and
not by_hashes.known(path, "SHA256", sha256)):
with open(full_path, "rb") as fileobj:
db_file = archive_file_set.newFromFile(
- self.archive, container, path, fileobj,
- size, filenameToContentType(path))
+ self.archive, container, os.path.join("dists", path),
+ fileobj, size, filenameToContentType(path))
by_hashes.add(path, db_file.library_file, copy_from_path=path)
# Finally, remove any files from disk that aren't recorded in the
=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py 2016-04-05 08:55:55 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py 2016-04-06 00:46:47 +0000
@@ -2202,359 +2202,6 @@
'Release')
self.assertTrue(file_exists(source_release))
- def testUpdateByHashDisabled(self):
- # The publisher does not create by-hash directories if it is
- # disabled in the series configuration.
- self.assertFalse(self.breezy_autotest.publish_by_hash)
- self.assertFalse(self.breezy_autotest.advertise_by_hash)
- publisher = Publisher(
- self.logger, self.config, self.disk_pool,
- self.ubuntutest.main_archive)
-
- self.getPubSource(filecontent='Source: foo\n')
-
- publisher.A_publish(False)
- publisher.C_doFTPArchive(False)
- publisher.D_writeReleaseFiles(False)
-
- suite_path = partial(
- os.path.join, self.config.distsroot, 'breezy-autotest')
- self.assertThat(
- suite_path('main', 'source', 'by-hash'), Not(PathExists()))
- release = self.parseRelease(suite_path('Release'))
- self.assertNotIn('Acquire-By-Hash', release)
-
- def testUpdateByHashUnadvertised(self):
- # If the series configuration sets publish_by_hash but not
- # advertise_by_hash, then by-hash directories are created but not
- # advertised in Release. This is useful for testing.
- self.breezy_autotest.publish_by_hash = True
- self.assertFalse(self.breezy_autotest.advertise_by_hash)
- publisher = Publisher(
- self.logger, self.config, self.disk_pool,
- self.ubuntutest.main_archive)
-
- self.getPubSource(filecontent='Source: foo\n')
-
- publisher.A_publish(False)
- publisher.C_doFTPArchive(False)
- publisher.D_writeReleaseFiles(False)
-
- suite_path = partial(
- os.path.join, self.config.distsroot, 'breezy-autotest')
- self.assertThat(suite_path('main', 'source', 'by-hash'), PathExists())
- release = self.parseRelease(suite_path('Release'))
- self.assertNotIn('Acquire-By-Hash', release)
-
- def testUpdateByHashInitial(self):
- # An initial publisher run populates by-hash directories and leaves
- # no archive files scheduled for deletion.
- self.breezy_autotest.publish_by_hash = True
- self.breezy_autotest.advertise_by_hash = True
- publisher = Publisher(
- self.logger, self.config, self.disk_pool,
- self.ubuntutest.main_archive)
-
- self.getPubSource(filecontent='Source: foo\n')
-
- publisher.A_publish(False)
- publisher.C_doFTPArchive(False)
- publisher.D_writeReleaseFiles(False)
- flush_database_caches()
-
- suite_path = partial(
- os.path.join, self.config.distsroot, 'breezy-autotest')
- contents = set()
- for name in ('Release', 'Sources.gz', 'Sources.bz2'):
- with open(suite_path('main', 'source', name), 'rb') as f:
- contents.add(f.read())
-
- self.assertThat(
- suite_path('main', 'source', 'by-hash'),
- ByHashHasContents(contents))
-
- archive_files = getUtility(IArchiveFileSet).getByArchive(
- self.ubuntutest.main_archive)
- self.assertNotEqual([], archive_files)
- self.assertEqual([], [
- archive_file for archive_file in archive_files
- if archive_file.scheduled_deletion_date is not None])
-
- def testUpdateByHashSubsequent(self):
- # A subsequent publisher run updates by-hash directories where
- # necessary, and marks inactive index files for later deletion.
- self.breezy_autotest.publish_by_hash = True
- self.breezy_autotest.advertise_by_hash = True
- publisher = Publisher(
- self.logger, self.config, self.disk_pool,
- self.ubuntutest.main_archive)
-
- self.getPubSource(filecontent='Source: foo\n')
-
- publisher.A_publish(False)
- publisher.C_doFTPArchive(False)
- publisher.D_writeReleaseFiles(False)
-
- suite_path = partial(
- os.path.join, self.config.distsroot, 'breezy-autotest')
- main_contents = set()
- universe_contents = set()
- for name in ('Release', 'Sources.gz', 'Sources.bz2'):
- with open(suite_path('main', 'source', name), 'rb') as f:
- main_contents.add(f.read())
- with open(suite_path('universe', 'source', name), 'rb') as f:
- universe_contents.add(f.read())
-
- self.getPubSource(sourcename='baz', filecontent='Source: baz\n')
-
- publisher.A_publish(False)
- publisher.C_doFTPArchive(False)
- publisher.D_writeReleaseFiles(False)
- flush_database_caches()
-
- for name in ('Release', 'Sources.gz', 'Sources.bz2'):
- with open(suite_path('main', 'source', name), 'rb') as f:
- main_contents.add(f.read())
-
- self.assertThat(
- suite_path('main', 'source', 'by-hash'),
- ByHashHasContents(main_contents))
- self.assertThat(
- suite_path('universe', 'source', 'by-hash'),
- ByHashHasContents(universe_contents))
-
- archive_files = getUtility(IArchiveFileSet).getByArchive(
- self.ubuntutest.main_archive)
- self.assertContentEqual(
- ['dists/breezy-autotest/main/source/Sources.bz2',
- 'dists/breezy-autotest/main/source/Sources.gz'],
- [archive_file.path for archive_file in archive_files
- if archive_file.scheduled_deletion_date is not None])
-
- def testUpdateByHashIdenticalFiles(self):
- # Multiple identical files in the same directory receive multiple
- # ArchiveFile rows, even though they share a by-hash entry.
- self.breezy_autotest.publish_by_hash = True
- publisher = Publisher(
- self.logger, self.config, self.disk_pool,
- self.ubuntutest.main_archive)
- suite_path = partial(
- os.path.join, self.config.distsroot, 'breezy-autotest')
- get_contents_files = lambda: [
- archive_file
- for archive_file in getUtility(IArchiveFileSet).getByArchive(
- self.ubuntutest.main_archive)
- if archive_file.path.startswith('dists/breezy-autotest/Contents-')]
-
- # Create the first file.
- with open_for_writing(suite_path('Contents-i386'), 'w') as f:
- f.write('A Contents file\n')
- publisher.markPocketDirty(
- self.breezy_autotest, PackagePublishingPocket.RELEASE)
- publisher.A_publish(False)
- publisher.C_doFTPArchive(False)
- publisher.D_writeReleaseFiles(False)
- flush_database_caches()
- matchers = [
- MatchesStructure(
- path=Equals('dists/breezy-autotest/Contents-i386'),
- scheduled_deletion_date=Is(None))]
- self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
- self.assertThat(
- suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
-
- # Add a second identical file.
- with open_for_writing(suite_path('Contents-hppa'), 'w') as f:
- f.write('A Contents file\n')
- publisher.D_writeReleaseFiles(False)
- flush_database_caches()
- matchers.append(
- MatchesStructure(
- path=Equals('dists/breezy-autotest/Contents-hppa'),
- scheduled_deletion_date=Is(None)))
- self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
- self.assertThat(
- suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
-
- # Delete the first file, but allow it its stay of execution.
- os.unlink(suite_path('Contents-i386'))
- publisher.D_writeReleaseFiles(False)
- flush_database_caches()
- matchers[0] = matchers[0].update(scheduled_deletion_date=Not(Is(None)))
- self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
- self.assertThat(
- suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
-
- # A no-op run leaves the scheduled deletion date intact.
- i386_file = getUtility(IArchiveFileSet).getByArchive(
- self.ubuntutest.main_archive,
- path=u'dists/breezy-autotest/Contents-i386').one()
- i386_date = i386_file.scheduled_deletion_date
- publisher.D_writeReleaseFiles(False)
- flush_database_caches()
- matchers[0] = matchers[0].update(
- scheduled_deletion_date=Equals(i386_date))
- self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
- self.assertThat(
- suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
-
- # Arrange for the first file to be pruned, and delete the second
- # file.
- now = datetime.now(pytz.UTC)
- removeSecurityProxy(i386_file).scheduled_deletion_date = (
- now - timedelta(hours=1))
- os.unlink(suite_path('Contents-hppa'))
- publisher.D_writeReleaseFiles(False)
- flush_database_caches()
- matchers = [matchers[1].update(scheduled_deletion_date=Not(Is(None)))]
- self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
- self.assertThat(
- suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
-
- # Arrange for the second file to be pruned.
- hppa_file = getUtility(IArchiveFileSet).getByArchive(
- self.ubuntutest.main_archive,
- path=u'dists/breezy-autotest/Contents-hppa').one()
- removeSecurityProxy(hppa_file).scheduled_deletion_date = (
- now - timedelta(hours=1))
- publisher.D_writeReleaseFiles(False)
- flush_database_caches()
- self.assertContentEqual([], get_contents_files())
- self.assertThat(suite_path('by-hash'), Not(PathExists()))
-
- def testUpdateByHashReprieve(self):
- # If a newly-modified index file is identical to a
- # previously-condemned one, then it is reprieved and not pruned.
- self.breezy_autotest.publish_by_hash = True
- # Enable uncompressed index files to avoid relying on stable output
- # from compressors in this test.
- self.breezy_autotest.index_compressors = [
- IndexCompressionType.UNCOMPRESSED]
- publisher = Publisher(
- self.logger, self.config, self.disk_pool,
- self.ubuntutest.main_archive)
-
- # Publish empty index files.
- publisher.markPocketDirty(
- self.breezy_autotest, PackagePublishingPocket.RELEASE)
- publisher.A_publish(False)
- publisher.C_doFTPArchive(False)
- publisher.D_writeReleaseFiles(False)
- suite_path = partial(
- os.path.join, self.config.distsroot, 'breezy-autotest')
- main_contents = set()
- for name in ('Release', 'Sources'):
- with open(suite_path('main', 'source', name), 'rb') as f:
- main_contents.add(f.read())
-
- # Add a source package so that Sources is non-empty.
- pub_source = self.getPubSource(filecontent='Source: foo\n')
- publisher.A_publish(False)
- publisher.C_doFTPArchive(False)
- publisher.D_writeReleaseFiles(False)
- transaction.commit()
- with open(suite_path('main', 'source', 'Sources'), 'rb') as f:
- main_contents.add(f.read())
- self.assertEqual(3, len(main_contents))
- self.assertThat(
- suite_path('main', 'source', 'by-hash'),
- ByHashHasContents(main_contents))
-
- # Make the empty Sources file ready to prune.
- old_archive_files = []
- for archive_file in getUtility(IArchiveFileSet).getByArchive(
- self.ubuntutest.main_archive):
- if ('main/source' in archive_file.path and
- archive_file.scheduled_deletion_date is not None):
- old_archive_files.append(archive_file)
- self.assertEqual(1, len(old_archive_files))
- removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
- datetime.now(pytz.UTC) - timedelta(hours=1))
-
- # Delete the source package so that Sources is empty again. The
- # empty file is reprieved and the non-empty one is condemned.
- pub_source.requestDeletion(self.ubuntutest.owner)
- publisher.A_publish(False)
- publisher.C_doFTPArchive(False)
- publisher.D_writeReleaseFiles(False)
- transaction.commit()
- self.assertThat(
- suite_path('main', 'source', 'by-hash'),
- ByHashHasContents(main_contents))
- archive_files = getUtility(IArchiveFileSet).getByArchive(
- self.ubuntutest.main_archive,
- path=u'dists/breezy-autotest/main/source/Sources')
- self.assertThat(
- sorted(archive_files, key=attrgetter('id')),
- MatchesListwise([
- MatchesStructure(scheduled_deletion_date=Is(None)),
- MatchesStructure(scheduled_deletion_date=Not(Is(None))),
- ]))
-
- def testUpdateByHashPrune(self):
- # The publisher prunes files from by-hash that were condemned more
- # than a day ago.
- self.breezy_autotest.publish_by_hash = True
- self.breezy_autotest.advertise_by_hash = True
- publisher = Publisher(
- self.logger, self.config, self.disk_pool,
- self.ubuntutest.main_archive)
-
- suite_path = partial(
- os.path.join, self.config.distsroot, 'breezy-autotest')
- main_contents = set()
- for sourcename in ('foo', 'bar'):
- self.getPubSource(
- sourcename=sourcename, filecontent='Source: %s\n' % sourcename)
- publisher.A_publish(False)
- publisher.C_doFTPArchive(False)
- publisher.D_writeReleaseFiles(False)
- for name in ('Release', 'Sources.gz', 'Sources.bz2'):
- with open(suite_path('main', 'source', name), 'rb') as f:
- main_contents.add(f.read())
- transaction.commit()
-
- self.assertThat(
- suite_path('main', 'source', 'by-hash'),
- ByHashHasContents(main_contents))
- old_archive_files = []
- for archive_file in getUtility(IArchiveFileSet).getByArchive(
- self.ubuntutest.main_archive):
- if ('main/source' in archive_file.path and
- archive_file.scheduled_deletion_date is not None):
- old_archive_files.append(archive_file)
- self.assertEqual(2, len(old_archive_files))
-
- now = datetime.now(pytz.UTC)
- removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
- now + timedelta(hours=12))
- removeSecurityProxy(old_archive_files[1]).scheduled_deletion_date = (
- now - timedelta(hours=12))
- old_archive_files[1].library_file.open()
- try:
- main_contents.remove(old_archive_files[1].library_file.read())
- finally:
- old_archive_files[1].library_file.close()
- self.assertThat(
- suite_path('main', 'source', 'by-hash'),
- Not(ByHashHasContents(main_contents)))
-
- # Use a fresh Publisher instance to ensure that it doesn't have
- # dirty-pocket state left over from the last run.
- publisher = Publisher(
- self.logger, self.config, self.disk_pool,
- self.ubuntutest.main_archive)
- publisher.A2_markPocketsWithDeletionsDirty()
- publisher.C_doFTPArchive(False)
- publisher.D_writeReleaseFiles(False)
- self.assertEqual(set(), publisher.dirty_pockets)
- self.assertContentEqual(
- [('breezy-autotest', PackagePublishingPocket.RELEASE)],
- publisher.release_files_needed)
- self.assertThat(
- suite_path('main', 'source', 'by-hash'),
- ByHashHasContents(main_contents))
-
def testCreateSeriesAliasesNoAlias(self):
"""createSeriesAliases has nothing to do by default."""
publisher = Publisher(
@@ -2851,6 +2498,368 @@
publisher.C_doFTPArchive(False)
+class TestUpdateByHash(TestPublisherBase):
+ """Tests for handling of by-hash files."""
+
+ def runSteps(self, publisher, step_a=False, step_a2=False, step_c=False,
+ step_d=False):
+ """Run publisher steps."""
+ if step_a:
+ publisher.A_publish(False)
+ if step_a2:
+ publisher.A2_markPocketsWithDeletionsDirty()
+ if step_c:
+ publisher.C_doFTPArchive(False)
+ if step_d:
+ publisher.D_writeReleaseFiles(False)
+
+ def test_disabled(self):
+ # The publisher does not create by-hash directories if it is
+ # disabled in the series configuration.
+ self.assertFalse(self.breezy_autotest.publish_by_hash)
+ self.assertFalse(self.breezy_autotest.advertise_by_hash)
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+ self.getPubSource(filecontent='Source: foo\n')
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+
+ suite_path = partial(
+ os.path.join, self.config.distsroot, 'breezy-autotest')
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'), Not(PathExists()))
+ with open(suite_path('Release')) as release_file:
+ release = Release(release_file)
+ self.assertNotIn('Acquire-By-Hash', release)
+
+ def test_unadvertised(self):
+ # If the series configuration sets publish_by_hash but not
+ # advertise_by_hash, then by-hash directories are created but not
+ # advertised in Release. This is useful for testing.
+ self.breezy_autotest.publish_by_hash = True
+ self.assertFalse(self.breezy_autotest.advertise_by_hash)
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+ self.getPubSource(filecontent='Source: foo\n')
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+
+ suite_path = partial(
+ os.path.join, self.config.distsroot, 'breezy-autotest')
+ self.assertThat(suite_path('main', 'source', 'by-hash'), PathExists())
+ with open(suite_path('Release')) as release_file:
+ release = Release(release_file)
+ self.assertNotIn('Acquire-By-Hash', release)
+
+ def test_initial(self):
+ # An initial publisher run populates by-hash directories and leaves
+ # no archive files scheduled for deletion.
+ self.breezy_autotest.publish_by_hash = True
+ self.breezy_autotest.advertise_by_hash = True
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+ self.getPubSource(filecontent='Source: foo\n')
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+ flush_database_caches()
+
+ suite_path = partial(
+ os.path.join, self.config.distsroot, 'breezy-autotest')
+ contents = set()
+ for name in ('Release', 'Sources.gz', 'Sources.bz2'):
+ with open(suite_path('main', 'source', name), 'rb') as f:
+ contents.add(f.read())
+
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(contents))
+
+ archive_files = getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive)
+ self.assertNotEqual([], archive_files)
+ self.assertEqual([], [
+ archive_file for archive_file in archive_files
+ if archive_file.scheduled_deletion_date is not None])
+
+ def test_subsequent(self):
+ # A subsequent publisher run updates by-hash directories where
+ # necessary, and marks inactive index files for later deletion.
+ self.breezy_autotest.publish_by_hash = True
+ self.breezy_autotest.advertise_by_hash = True
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+ self.getPubSource(filecontent='Source: foo\n')
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+
+ suite_path = partial(
+ os.path.join, self.config.distsroot, 'breezy-autotest')
+ main_contents = set()
+ universe_contents = set()
+ for name in ('Release', 'Sources.gz', 'Sources.bz2'):
+ with open(suite_path('main', 'source', name), 'rb') as f:
+ main_contents.add(f.read())
+ with open(suite_path('universe', 'source', name), 'rb') as f:
+ universe_contents.add(f.read())
+
+ self.getPubSource(sourcename='baz', filecontent='Source: baz\n')
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+ flush_database_caches()
+
+ for name in ('Release', 'Sources.gz', 'Sources.bz2'):
+ with open(suite_path('main', 'source', name), 'rb') as f:
+ main_contents.add(f.read())
+
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(main_contents))
+ self.assertThat(
+ suite_path('universe', 'source', 'by-hash'),
+ ByHashHasContents(universe_contents))
+
+ archive_files = getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive)
+ self.assertContentEqual(
+ ['dists/breezy-autotest/main/source/Sources.bz2',
+ 'dists/breezy-autotest/main/source/Sources.gz'],
+ [archive_file.path for archive_file in archive_files
+ if archive_file.scheduled_deletion_date is not None])
+
+ def test_identical_files(self):
+ # Multiple identical files in the same directory receive multiple
+ # ArchiveFile rows, even though they share a by-hash entry.
+ self.breezy_autotest.publish_by_hash = True
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+ suite_path = partial(
+ os.path.join, self.config.distsroot, 'breezy-autotest')
+ get_contents_files = lambda: [
+ archive_file
+ for archive_file in getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive)
+ if archive_file.path.startswith('dists/breezy-autotest/Contents-')]
+
+ # Create the first file.
+ with open_for_writing(suite_path('Contents-i386'), 'w') as f:
+ f.write('A Contents file\n')
+ publisher.markPocketDirty(
+ self.breezy_autotest, PackagePublishingPocket.RELEASE)
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+ flush_database_caches()
+ matchers = [
+ MatchesStructure(
+ path=Equals('dists/breezy-autotest/Contents-i386'),
+ scheduled_deletion_date=Is(None))]
+ self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
+ self.assertThat(
+ suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+
+ # Add a second identical file.
+ with open_for_writing(suite_path('Contents-hppa'), 'w') as f:
+ f.write('A Contents file\n')
+ self.runSteps(publisher, step_d=True)
+ flush_database_caches()
+ matchers.append(
+ MatchesStructure(
+ path=Equals('dists/breezy-autotest/Contents-hppa'),
+ scheduled_deletion_date=Is(None)))
+ self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
+ self.assertThat(
+ suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+
+ # Delete the first file, but allow it its stay of execution.
+ os.unlink(suite_path('Contents-i386'))
+ self.runSteps(publisher, step_d=True)
+ flush_database_caches()
+ matchers[0] = matchers[0].update(scheduled_deletion_date=Not(Is(None)))
+ self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
+ self.assertThat(
+ suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+
+ # A no-op run leaves the scheduled deletion date intact.
+ i386_file = getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive,
+ path=u'dists/breezy-autotest/Contents-i386').one()
+ i386_date = i386_file.scheduled_deletion_date
+ self.runSteps(publisher, step_d=True)
+ flush_database_caches()
+ matchers[0] = matchers[0].update(
+ scheduled_deletion_date=Equals(i386_date))
+ self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
+ self.assertThat(
+ suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+
+ # Arrange for the first file to be pruned, and delete the second
+ # file.
+ now = datetime.now(pytz.UTC)
+ removeSecurityProxy(i386_file).scheduled_deletion_date = (
+ now - timedelta(hours=1))
+ os.unlink(suite_path('Contents-hppa'))
+ self.runSteps(publisher, step_d=True)
+ flush_database_caches()
+ matchers = [matchers[1].update(scheduled_deletion_date=Not(Is(None)))]
+ self.assertThat(get_contents_files(), MatchesSetwise(*matchers))
+ self.assertThat(
+ suite_path('by-hash'), ByHashHasContents(['A Contents file\n']))
+
+ # Arrange for the second file to be pruned.
+ hppa_file = getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive,
+ path=u'dists/breezy-autotest/Contents-hppa').one()
+ removeSecurityProxy(hppa_file).scheduled_deletion_date = (
+ now - timedelta(hours=1))
+ self.runSteps(publisher, step_d=True)
+ flush_database_caches()
+ self.assertContentEqual([], get_contents_files())
+ self.assertThat(suite_path('by-hash'), Not(PathExists()))
+
+ def test_reprieve(self):
+ # If a newly-modified index file is identical to a
+ # previously-condemned one, then it is reprieved and not pruned.
+ self.breezy_autotest.publish_by_hash = True
+ # Enable uncompressed index files to avoid relying on stable output
+ # from compressors in this test.
+ self.breezy_autotest.index_compressors = [
+ IndexCompressionType.UNCOMPRESSED]
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+
+ # Publish empty index files.
+ publisher.markPocketDirty(
+ self.breezy_autotest, PackagePublishingPocket.RELEASE)
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+ suite_path = partial(
+ os.path.join, self.config.distsroot, 'breezy-autotest')
+ main_contents = set()
+ for name in ('Release', 'Sources'):
+ with open(suite_path('main', 'source', name), 'rb') as f:
+ main_contents.add(f.read())
+
+ # Add a source package so that Sources is non-empty.
+ pub_source = self.getPubSource(filecontent='Source: foo\n')
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+ transaction.commit()
+ with open(suite_path('main', 'source', 'Sources'), 'rb') as f:
+ main_contents.add(f.read())
+ self.assertEqual(3, len(main_contents))
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(main_contents))
+
+ # Make the empty Sources file ready to prune.
+ old_archive_files = []
+ for archive_file in getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive):
+ if ('main/source' in archive_file.path and
+ archive_file.scheduled_deletion_date is not None):
+ old_archive_files.append(archive_file)
+ self.assertEqual(1, len(old_archive_files))
+ removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
+ datetime.now(pytz.UTC) - timedelta(hours=1))
+
+ # Delete the source package so that Sources is empty again. The
+ # empty file is reprieved and the non-empty one is condemned.
+ pub_source.requestDeletion(self.ubuntutest.owner)
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+ transaction.commit()
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(main_contents))
+ archive_files = getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive,
+ path=u'dists/breezy-autotest/main/source/Sources')
+ self.assertThat(
+ sorted(archive_files, key=attrgetter('id')),
+ MatchesListwise([
+ MatchesStructure(scheduled_deletion_date=Is(None)),
+ MatchesStructure(scheduled_deletion_date=Not(Is(None))),
+ ]))
+
+ def test_prune(self):
+ # The publisher prunes files from by-hash that were condemned more
+ # than a day ago.
+ self.breezy_autotest.publish_by_hash = True
+ self.breezy_autotest.advertise_by_hash = True
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+
+ suite_path = partial(
+ os.path.join, self.config.distsroot, 'breezy-autotest')
+ main_contents = set()
+ for sourcename in ('foo', 'bar'):
+ self.getPubSource(
+ sourcename=sourcename, filecontent='Source: %s\n' % sourcename)
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+ for name in ('Release', 'Sources.gz', 'Sources.bz2'):
+ with open(suite_path('main', 'source', name), 'rb') as f:
+ main_contents.add(f.read())
+ transaction.commit()
+
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(main_contents))
+ old_archive_files = []
+ for archive_file in getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive):
+ if ('main/source' in archive_file.path and
+ archive_file.scheduled_deletion_date is not None):
+ old_archive_files.append(archive_file)
+ self.assertEqual(2, len(old_archive_files))
+
+ now = datetime.now(pytz.UTC)
+ removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
+ now + timedelta(hours=12))
+ removeSecurityProxy(old_archive_files[1]).scheduled_deletion_date = (
+ now - timedelta(hours=12))
+ old_archive_files[1].library_file.open()
+ try:
+ main_contents.remove(old_archive_files[1].library_file.read())
+ finally:
+ old_archive_files[1].library_file.close()
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ Not(ByHashHasContents(main_contents)))
+
+ # Use a fresh Publisher instance to ensure that it doesn't have
+ # dirty-pocket state left over from the last run.
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+ self.runSteps(publisher, step_a2=True, step_c=True, step_d=True)
+ self.assertEqual(set(), publisher.dirty_pockets)
+ self.assertContentEqual(
+ [('breezy-autotest', PackagePublishingPocket.RELEASE)],
+ publisher.release_files_needed)
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(main_contents))
+
+
+class TestUpdateByHashOverriddenDistsroot(TestUpdateByHash):
+ """Test by-hash handling with an overridden distsroot.
+
+ This exercises the way that the publisher is used by PublishFTPMaster.
+ """
+
+ def runSteps(self, publisher, **kwargs):
+ """Run publisher steps with an overridden distsroot."""
+ original_dists = self.config.distsroot
+ temporary_dists = original_dists + ".in-progress"
+ if not os.path.exists(original_dists):
+ os.makedirs(original_dists)
+ os.rename(original_dists, temporary_dists)
+ try:
+ self.config.distsroot = temporary_dists
+ super(TestUpdateByHashOverriddenDistsroot, self).runSteps(
+ publisher, **kwargs)
+ finally:
+ self.config.distsroot = original_dists
+ os.rename(temporary_dists, original_dists)
+
+
class TestPublisherRepositorySignatures(TestPublisherBase):
"""Testing `Publisher` signature behaviour."""
Follow ups