launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #20168
Re: [Merge] lp:~cjwatson/launchpad/archive-index-by-hash into lp:launchpad
Should be worth another look now.
Diff comments:
>
> === modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
> --- lib/lp/archivepublisher/tests/test_publisher.py 2016-03-11 11:45:56 +0000
> +++ lib/lp/archivepublisher/tests/test_publisher.py 2016-03-22 12:51:52 +0000
> @@ -1908,6 +2164,265 @@
> os.stat(os.path.join(dep11_path, name)).st_mtime,
> LessThan(now - 59))
>
> + def testUpdateByHashDisabled(self):
> + # The publisher does not create by-hash directories if it is
> + # disabled in the series configuration.
> + self.assertFalse(self.breezy_autotest.publish_by_hash)
> + self.assertFalse(self.breezy_autotest.advertise_by_hash)
> + publisher = Publisher(
> + self.logger, self.config, self.disk_pool,
> + self.ubuntutest.main_archive)
> +
> + self.getPubSource(filecontent='Source: foo\n')
> +
> + publisher.A_publish(False)
> + publisher.C_doFTPArchive(False)
> + publisher.D_writeReleaseFiles(False)
> +
> + suite_path = partial(
> + os.path.join, self.config.distsroot, 'breezy-autotest')
> + self.assertThat(
> + suite_path('main', 'source', 'by-hash'), Not(PathExists()))
> + release = self.parseRelease(suite_path('Release'))
> + self.assertNotIn('Acquire-By-Hash', release)
> +
> + def testUpdateByHashUnadvertised(self):
> + # If the series configuration sets publish_by_hash but not
> + # advertise_by_hash, then by-hash directories are created but not
> + # advertised in Release. This is useful for testing.
> + self.breezy_autotest.publish_by_hash = True
> + self.assertFalse(self.breezy_autotest.advertise_by_hash)
> + publisher = Publisher(
> + self.logger, self.config, self.disk_pool,
> + self.ubuntutest.main_archive)
> +
> + self.getPubSource(filecontent='Source: foo\n')
> +
> + publisher.A_publish(False)
> + publisher.C_doFTPArchive(False)
> + publisher.D_writeReleaseFiles(False)
> +
> + suite_path = partial(
> + os.path.join, self.config.distsroot, 'breezy-autotest')
> + self.assertThat(suite_path('main', 'source', 'by-hash'), PathExists())
> + release = self.parseRelease(suite_path('Release'))
> + self.assertNotIn('Acquire-By-Hash', release)
> +
> + def testUpdateByHashInitial(self):
> + # An initial publisher run populates by-hash directories and leaves
> + # no archive files scheduled for deletion.
> + self.breezy_autotest.publish_by_hash = True
> + self.breezy_autotest.advertise_by_hash = True
> + publisher = Publisher(
> + self.logger, self.config, self.disk_pool,
> + self.ubuntutest.main_archive)
> +
> + self.getPubSource(filecontent='Source: foo\n')
> +
> + publisher.A_publish(False)
> + publisher.C_doFTPArchive(False)
> + publisher.D_writeReleaseFiles(False)
> +
> + suite_path = partial(
> + os.path.join, self.config.distsroot, 'breezy-autotest')
> + contents = set()
> + for name in ('Release', 'Sources.gz', 'Sources.bz2'):
> + with open(suite_path('main', 'source', name), 'rb') as f:
> + contents.add(f.read())
> +
> + self.assertThat(
> + suite_path('main', 'source', 'by-hash'),
> + ByHashHasContents(contents))
> +
> + archive_files = getUtility(IArchiveFileSet).getByArchive(
> + self.ubuntutest.main_archive)
> + self.assertNotEqual([], archive_files)
> + self.assertEqual([], [
> + archive_file for archive_file in archive_files
> + if archive_file.scheduled_deletion_date is not None])
> +
> + def testUpdateByHashSubsequent(self):
> + # A subsequent publisher run updates by-hash directories where
> + # necessary, and marks inactive index files for later deletion.
> + self.breezy_autotest.publish_by_hash = True
> + self.breezy_autotest.advertise_by_hash = True
> + publisher = Publisher(
> + self.logger, self.config, self.disk_pool,
> + self.ubuntutest.main_archive)
> +
> + self.getPubSource(filecontent='Source: foo\n')
> +
> + publisher.A_publish(False)
> + publisher.C_doFTPArchive(False)
> + publisher.D_writeReleaseFiles(False)
> +
> + suite_path = partial(
> + os.path.join, self.config.distsroot, 'breezy-autotest')
> + main_contents = set()
> + universe_contents = set()
> + for name in ('Release', 'Sources.gz', 'Sources.bz2'):
> + with open(suite_path('main', 'source', name), 'rb') as f:
> + main_contents.add(f.read())
> + with open(suite_path('universe', 'source', name), 'rb') as f:
> + universe_contents.add(f.read())
> +
> + self.getPubSource(sourcename='baz', filecontent='Source: baz\n')
> +
> + publisher.A_publish(False)
> + publisher.C_doFTPArchive(False)
> + publisher.D_writeReleaseFiles(False)
> +
> + for name in ('Release', 'Sources.gz', 'Sources.bz2'):
> + with open(suite_path('main', 'source', name), 'rb') as f:
> + main_contents.add(f.read())
> +
> + self.assertThat(
> + suite_path('main', 'source', 'by-hash'),
> + ByHashHasContents(main_contents))
> + self.assertThat(
> + suite_path('universe', 'source', 'by-hash'),
> + ByHashHasContents(universe_contents))
> +
> + archive_files = getUtility(IArchiveFileSet).getByArchive(
> + self.ubuntutest.main_archive)
> + self.assertContentEqual(
> + ['dists/breezy-autotest/main/source/Sources.bz2',
> + 'dists/breezy-autotest/main/source/Sources.gz'],
> + [archive_file.path for archive_file in archive_files
> + if archive_file.scheduled_deletion_date is not None])
> +
> + def testUpdateByHashReprieve(self):
I find this whole set of metaphors distinctly distasteful, TBH, but this branch is not the place to try to rename everything.
> + # If a newly-modified index file is identical to a
> + # previously-condemned one, then it is reprieved and not pruned.
> + self.breezy_autotest.publish_by_hash = True
> + # Enable uncompressed index files to avoid relying on stable output
> + # from compressors in this test.
> + self.breezy_autotest.index_compressors = [
> + IndexCompressionType.UNCOMPRESSED]
> + publisher = Publisher(
> + self.logger, self.config, self.disk_pool,
> + self.ubuntutest.main_archive)
> +
> + # Publish empty index files.
> + publisher.markPocketDirty(
> + self.breezy_autotest, PackagePublishingPocket.RELEASE)
> + publisher.A_publish(False)
> + publisher.C_doFTPArchive(False)
> + publisher.D_writeReleaseFiles(False)
> + suite_path = partial(
> + os.path.join, self.config.distsroot, 'breezy-autotest')
> + main_contents = set()
> + for name in ('Release', 'Sources'):
> + with open(suite_path('main', 'source', name), 'rb') as f:
> + main_contents.add(f.read())
> +
> + # Add a source package so that Sources is non-empty.
> + pub_source = self.getPubSource(filecontent='Source: foo\n')
> + publisher.A_publish(False)
> + publisher.C_doFTPArchive(False)
> + publisher.D_writeReleaseFiles(False)
> + transaction.commit()
> + with open(suite_path('main', 'source', 'Sources'), 'rb') as f:
> + main_contents.add(f.read())
> + self.assertEqual(3, len(main_contents))
> + self.assertThat(
> + suite_path('main', 'source', 'by-hash'),
> + ByHashHasContents(main_contents))
> +
> + # Make the empty Sources file ready to prune.
> + old_archive_files = []
> + for archive_file in getUtility(IArchiveFileSet).getByArchive(
> + self.ubuntutest.main_archive):
> + if ('main/source' in archive_file.path and
> + archive_file.scheduled_deletion_date is not None):
> + old_archive_files.append(archive_file)
> + self.assertEqual(1, len(old_archive_files))
> + removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
> + datetime.now(pytz.UTC) - timedelta(hours=1))
> +
> + # Delete the source package so that Sources is empty again. The
> + # empty file is reprieved and the non-empty one is condemned.
> + pub_source.requestDeletion(self.ubuntutest.owner)
> + publisher.A_publish(False)
> + publisher.C_doFTPArchive(False)
> + publisher.D_writeReleaseFiles(False)
> + transaction.commit()
> + self.assertThat(
> + suite_path('main', 'source', 'by-hash'),
> + ByHashHasContents(main_contents))
> + archive_files = [
> + archive_file
> + for archive_file in getUtility(IArchiveFileSet).getByArchive(
> + self.ubuntutest.main_archive)
> + if archive_file.path == 'dists/breezy-autotest/main/source/Sources'
> + ]
> + self.assertThat(
> + sorted(archive_files, key=attrgetter('id')),
> + MatchesListwise([
> + MatchesStructure(scheduled_deletion_date=Is(None)),
> + MatchesStructure(scheduled_deletion_date=Not(Is(None))),
> + ]))
> +
> + def testUpdateByHashPrune(self):
> + # The publisher prunes files from by-hash that were condemned more
> + # than a day ago.
> + self.breezy_autotest.publish_by_hash = True
> + self.breezy_autotest.advertise_by_hash = True
> + publisher = Publisher(
> + self.logger, self.config, self.disk_pool,
> + self.ubuntutest.main_archive)
> +
> + suite_path = partial(
> + os.path.join, self.config.distsroot, 'breezy-autotest')
> + main_contents = set()
> + for sourcename in ('foo', 'bar'):
> + self.getPubSource(
> + sourcename=sourcename, filecontent='Source: %s\n' % sourcename)
> + publisher.A_publish(False)
> + publisher.C_doFTPArchive(False)
> + publisher.D_writeReleaseFiles(False)
> + for name in ('Release', 'Sources.gz', 'Sources.bz2'):
> + with open(suite_path('main', 'source', name), 'rb') as f:
> + main_contents.add(f.read())
> + transaction.commit()
> + # Undo any previous determination that breezy-autotest is dirty, so
> + # that we can use that to check that future runs don't force index
> + # regeneration.
> + publisher.dirty_pockets = set()
> +
> + self.assertThat(
> + suite_path('main', 'source', 'by-hash'),
> + ByHashHasContents(main_contents))
> + old_archive_files = []
> + for archive_file in getUtility(IArchiveFileSet).getByArchive(
> + self.ubuntutest.main_archive):
> + if ('main/source' in archive_file.path and
> + archive_file.scheduled_deletion_date is not None):
> + old_archive_files.append(archive_file)
> + self.assertEqual(2, len(old_archive_files))
> +
> + now = datetime.now(pytz.UTC)
> + removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
> + now + timedelta(hours=12))
> + removeSecurityProxy(old_archive_files[1]).scheduled_deletion_date = (
> + now - timedelta(hours=12))
> + old_archive_files[1].library_file.open()
> + try:
> + main_contents.remove(old_archive_files[1].library_file.read())
> + finally:
> + old_archive_files[1].library_file.close()
> + self.assertThat(
> + suite_path('main', 'source', 'by-hash'),
> + Not(ByHashHasContents(main_contents)))
> +
> + publisher.A2_markPocketsWithDeletionsDirty()
> + publisher.C_doFTPArchive(False)
> + publisher.D_writeReleaseFiles(False)
> + self.assertEqual(set(), publisher.dirty_pockets)
> + self.assertThat(
> + suite_path('main', 'source', 'by-hash'),
> + ByHashHasContents(main_contents))
> +
> def testCreateSeriesAliasesNoAlias(self):
> """createSeriesAliases has nothing to do by default."""
> publisher = Publisher(
>
> === modified file 'lib/lp/registry/model/distribution.py'
> --- lib/lp/registry/model/distribution.py 2015-10-13 13:22:08 +0000
> +++ lib/lp/registry/model/distribution.py 2016-03-22 12:51:52 +0000
> @@ -1283,10 +1283,22 @@
> bin_query, clauseTables=['BinaryPackagePublishingHistory'],
> orderBy=['archive.id'], distinct=True)
>
> + reapable_af_query = """
> + Archive.purpose = %s AND
> + Archive.distribution = %s AND
> + ArchiveFile.archive = archive.id AND
> + ArchiveFile.scheduled_deletion_date < %s
> + """ % sqlvalues(ArchivePurpose.PPA, self, UTC_NOW)
> +
> + reapable_af_archives = Archive.select(
> + reapable_af_query, clauseTables=['ArchiveFile'],
> + orderBy=['archive.id'], distinct=True)
May not be definitive since ArchiveFile is empty on dogfood, but it looks OK there:
launchpad_dogfood=# EXPLAIN (ANALYZE ON, BUFFERS ON) SELECT Archive.id FROM Archive, ArchiveFile WHERE Archive.purpose = 2 AND Archive.distribution = 1 AND ArchiveFile.archive = archive.id AND ArchiveFile.scheduled_deletion_date < CURRENT_TIMESTAMP AT TIME ZONE 'UTC';
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=4.61..1849.59 rows=236 width=4) (actual time=0.018..0.018 rows=0 loops=1)
Buffers: shared hit=1
-> Bitmap Heap Scan on archivefile (cost=4.20..18.34 rows=237 width=4) (actual time=0.016..0.016 rows=0 loops=1)
Recheck Cond: (scheduled_deletion_date < timezone('UTC'::text, now()))
Buffers: shared hit=1
-> Bitmap Index Scan on archivefile__archive__scheduled_deletion_date__container__idx (cost=0.00..4.14 rows=237 width=0) (actual time=0.012..0.012 rows=0 loops=1)
Index Cond: (scheduled_deletion_date < timezone('UTC'::text, now()))
Buffers: shared hit=1
-> Index Scan using archive_pkey on archive (cost=0.42..7.72 rows=1 width=4) (never executed)
Index Cond: (id = archivefile.archive)
Filter: ((purpose = 2) AND (distribution = 1))
Total runtime: 8.503 ms
(12 rows)
> +
> deleting_archives = Archive.selectBy(
> status=ArchiveStatus.DELETING).orderBy(['archive.id'])
>
> - return src_archives.union(bin_archives).union(deleting_archives)
> + return src_archives.union(bin_archives).union(
> + reapable_af_archives).union(deleting_archives)
>
> def getArchiveByComponent(self, component_name):
> """See `IDistribution`."""
--
https://code.launchpad.net/~cjwatson/launchpad/archive-index-by-hash/+merge/289379
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
References