← Back to team overview

launchpad-reviewers team mailing list archive

Re: [Merge] lp:~cjwatson/launchpad/archive-index-by-hash into lp:launchpad

 

Should be worth another look now.

Diff comments:

> 
> === modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
> --- lib/lp/archivepublisher/tests/test_publisher.py	2016-03-11 11:45:56 +0000
> +++ lib/lp/archivepublisher/tests/test_publisher.py	2016-03-22 12:51:52 +0000
> @@ -1908,6 +2164,265 @@
>                  os.stat(os.path.join(dep11_path, name)).st_mtime,
>                  LessThan(now - 59))
>  
> +    def testUpdateByHashDisabled(self):
> +        # The publisher does not create by-hash directories if it is
> +        # disabled in the series configuration.
> +        self.assertFalse(self.breezy_autotest.publish_by_hash)
> +        self.assertFalse(self.breezy_autotest.advertise_by_hash)
> +        publisher = Publisher(
> +            self.logger, self.config, self.disk_pool,
> +            self.ubuntutest.main_archive)
> +
> +        self.getPubSource(filecontent='Source: foo\n')
> +
> +        publisher.A_publish(False)
> +        publisher.C_doFTPArchive(False)
> +        publisher.D_writeReleaseFiles(False)
> +
> +        suite_path = partial(
> +            os.path.join, self.config.distsroot, 'breezy-autotest')
> +        self.assertThat(
> +            suite_path('main', 'source', 'by-hash'), Not(PathExists()))
> +        release = self.parseRelease(suite_path('Release'))
> +        self.assertNotIn('Acquire-By-Hash', release)
> +
> +    def testUpdateByHashUnadvertised(self):
> +        # If the series configuration sets publish_by_hash but not
> +        # advertise_by_hash, then by-hash directories are created but not
> +        # advertised in Release.  This is useful for testing.
> +        self.breezy_autotest.publish_by_hash = True
> +        self.assertFalse(self.breezy_autotest.advertise_by_hash)
> +        publisher = Publisher(
> +            self.logger, self.config, self.disk_pool,
> +            self.ubuntutest.main_archive)
> +
> +        self.getPubSource(filecontent='Source: foo\n')
> +
> +        publisher.A_publish(False)
> +        publisher.C_doFTPArchive(False)
> +        publisher.D_writeReleaseFiles(False)
> +
> +        suite_path = partial(
> +            os.path.join, self.config.distsroot, 'breezy-autotest')
> +        self.assertThat(suite_path('main', 'source', 'by-hash'), PathExists())
> +        release = self.parseRelease(suite_path('Release'))
> +        self.assertNotIn('Acquire-By-Hash', release)
> +
> +    def testUpdateByHashInitial(self):
> +        # An initial publisher run populates by-hash directories and leaves
> +        # no archive files scheduled for deletion.
> +        self.breezy_autotest.publish_by_hash = True
> +        self.breezy_autotest.advertise_by_hash = True
> +        publisher = Publisher(
> +            self.logger, self.config, self.disk_pool,
> +            self.ubuntutest.main_archive)
> +
> +        self.getPubSource(filecontent='Source: foo\n')
> +
> +        publisher.A_publish(False)
> +        publisher.C_doFTPArchive(False)
> +        publisher.D_writeReleaseFiles(False)
> +
> +        suite_path = partial(
> +            os.path.join, self.config.distsroot, 'breezy-autotest')
> +        contents = set()
> +        for name in ('Release', 'Sources.gz', 'Sources.bz2'):
> +            with open(suite_path('main', 'source', name), 'rb') as f:
> +                contents.add(f.read())
> +
> +        self.assertThat(
> +            suite_path('main', 'source', 'by-hash'),
> +            ByHashHasContents(contents))
> +
> +        archive_files = getUtility(IArchiveFileSet).getByArchive(
> +            self.ubuntutest.main_archive)
> +        self.assertNotEqual([], archive_files)
> +        self.assertEqual([], [
> +            archive_file for archive_file in archive_files
> +            if archive_file.scheduled_deletion_date is not None])
> +
> +    def testUpdateByHashSubsequent(self):
> +        # A subsequent publisher run updates by-hash directories where
> +        # necessary, and marks inactive index files for later deletion.
> +        self.breezy_autotest.publish_by_hash = True
> +        self.breezy_autotest.advertise_by_hash = True
> +        publisher = Publisher(
> +            self.logger, self.config, self.disk_pool,
> +            self.ubuntutest.main_archive)
> +
> +        self.getPubSource(filecontent='Source: foo\n')
> +
> +        publisher.A_publish(False)
> +        publisher.C_doFTPArchive(False)
> +        publisher.D_writeReleaseFiles(False)
> +
> +        suite_path = partial(
> +            os.path.join, self.config.distsroot, 'breezy-autotest')
> +        main_contents = set()
> +        universe_contents = set()
> +        for name in ('Release', 'Sources.gz', 'Sources.bz2'):
> +            with open(suite_path('main', 'source', name), 'rb') as f:
> +                main_contents.add(f.read())
> +            with open(suite_path('universe', 'source', name), 'rb') as f:
> +                universe_contents.add(f.read())
> +
> +        self.getPubSource(sourcename='baz', filecontent='Source: baz\n')
> +
> +        publisher.A_publish(False)
> +        publisher.C_doFTPArchive(False)
> +        publisher.D_writeReleaseFiles(False)
> +
> +        for name in ('Release', 'Sources.gz', 'Sources.bz2'):
> +            with open(suite_path('main', 'source', name), 'rb') as f:
> +                main_contents.add(f.read())
> +
> +        self.assertThat(
> +            suite_path('main', 'source', 'by-hash'),
> +            ByHashHasContents(main_contents))
> +        self.assertThat(
> +            suite_path('universe', 'source', 'by-hash'),
> +            ByHashHasContents(universe_contents))
> +
> +        archive_files = getUtility(IArchiveFileSet).getByArchive(
> +            self.ubuntutest.main_archive)
> +        self.assertContentEqual(
> +            ['dists/breezy-autotest/main/source/Sources.bz2',
> +             'dists/breezy-autotest/main/source/Sources.gz'],
> +            [archive_file.path for archive_file in archive_files
> +             if archive_file.scheduled_deletion_date is not None])
> +
> +    def testUpdateByHashReprieve(self):

I find this whole set of metaphors distinctly distasteful, TBH, but this branch is not the place to try to rename everything.

> +        # If a newly-modified index file is identical to a
> +        # previously-condemned one, then it is reprieved and not pruned.
> +        self.breezy_autotest.publish_by_hash = True
> +        # Enable uncompressed index files to avoid relying on stable output
> +        # from compressors in this test.
> +        self.breezy_autotest.index_compressors = [
> +            IndexCompressionType.UNCOMPRESSED]
> +        publisher = Publisher(
> +            self.logger, self.config, self.disk_pool,
> +            self.ubuntutest.main_archive)
> +
> +        # Publish empty index files.
> +        publisher.markPocketDirty(
> +            self.breezy_autotest, PackagePublishingPocket.RELEASE)
> +        publisher.A_publish(False)
> +        publisher.C_doFTPArchive(False)
> +        publisher.D_writeReleaseFiles(False)
> +        suite_path = partial(
> +            os.path.join, self.config.distsroot, 'breezy-autotest')
> +        main_contents = set()
> +        for name in ('Release', 'Sources'):
> +            with open(suite_path('main', 'source', name), 'rb') as f:
> +                main_contents.add(f.read())
> +
> +        # Add a source package so that Sources is non-empty.
> +        pub_source = self.getPubSource(filecontent='Source: foo\n')
> +        publisher.A_publish(False)
> +        publisher.C_doFTPArchive(False)
> +        publisher.D_writeReleaseFiles(False)
> +        transaction.commit()
> +        with open(suite_path('main', 'source', 'Sources'), 'rb') as f:
> +            main_contents.add(f.read())
> +        self.assertEqual(3, len(main_contents))
> +        self.assertThat(
> +            suite_path('main', 'source', 'by-hash'),
> +            ByHashHasContents(main_contents))
> +
> +        # Make the empty Sources file ready to prune.
> +        old_archive_files = []
> +        for archive_file in getUtility(IArchiveFileSet).getByArchive(
> +                self.ubuntutest.main_archive):
> +            if ('main/source' in archive_file.path and
> +                    archive_file.scheduled_deletion_date is not None):
> +                old_archive_files.append(archive_file)
> +        self.assertEqual(1, len(old_archive_files))
> +        removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
> +            datetime.now(pytz.UTC) - timedelta(hours=1))
> +
> +        # Delete the source package so that Sources is empty again.  The
> +        # empty file is reprieved and the non-empty one is condemned.
> +        pub_source.requestDeletion(self.ubuntutest.owner)
> +        publisher.A_publish(False)
> +        publisher.C_doFTPArchive(False)
> +        publisher.D_writeReleaseFiles(False)
> +        transaction.commit()
> +        self.assertThat(
> +            suite_path('main', 'source', 'by-hash'),
> +            ByHashHasContents(main_contents))
> +        archive_files = [
> +            archive_file
> +            for archive_file in getUtility(IArchiveFileSet).getByArchive(
> +                self.ubuntutest.main_archive)
> +            if archive_file.path == 'dists/breezy-autotest/main/source/Sources'
> +            ]
> +        self.assertThat(
> +            sorted(archive_files, key=attrgetter('id')),
> +            MatchesListwise([
> +                MatchesStructure(scheduled_deletion_date=Is(None)),
> +                MatchesStructure(scheduled_deletion_date=Not(Is(None))),
> +                ]))
> +
> +    def testUpdateByHashPrune(self):
> +        # The publisher prunes files from by-hash that were condemned more
> +        # than a day ago.
> +        self.breezy_autotest.publish_by_hash = True
> +        self.breezy_autotest.advertise_by_hash = True
> +        publisher = Publisher(
> +            self.logger, self.config, self.disk_pool,
> +            self.ubuntutest.main_archive)
> +
> +        suite_path = partial(
> +            os.path.join, self.config.distsroot, 'breezy-autotest')
> +        main_contents = set()
> +        for sourcename in ('foo', 'bar'):
> +            self.getPubSource(
> +                sourcename=sourcename, filecontent='Source: %s\n' % sourcename)
> +            publisher.A_publish(False)
> +            publisher.C_doFTPArchive(False)
> +            publisher.D_writeReleaseFiles(False)
> +            for name in ('Release', 'Sources.gz', 'Sources.bz2'):
> +                with open(suite_path('main', 'source', name), 'rb') as f:
> +                    main_contents.add(f.read())
> +        transaction.commit()
> +        # Undo any previous determination that breezy-autotest is dirty, so
> +        # that we can use that to check that future runs don't force index
> +        # regeneration.
> +        publisher.dirty_pockets = set()
> +
> +        self.assertThat(
> +            suite_path('main', 'source', 'by-hash'),
> +            ByHashHasContents(main_contents))
> +        old_archive_files = []
> +        for archive_file in getUtility(IArchiveFileSet).getByArchive(
> +                self.ubuntutest.main_archive):
> +            if ('main/source' in archive_file.path and
> +                    archive_file.scheduled_deletion_date is not None):
> +                old_archive_files.append(archive_file)
> +        self.assertEqual(2, len(old_archive_files))
> +
> +        now = datetime.now(pytz.UTC)
> +        removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
> +            now + timedelta(hours=12))
> +        removeSecurityProxy(old_archive_files[1]).scheduled_deletion_date = (
> +            now - timedelta(hours=12))
> +        old_archive_files[1].library_file.open()
> +        try:
> +            main_contents.remove(old_archive_files[1].library_file.read())
> +        finally:
> +            old_archive_files[1].library_file.close()
> +        self.assertThat(
> +            suite_path('main', 'source', 'by-hash'),
> +            Not(ByHashHasContents(main_contents)))
> +
> +        publisher.A2_markPocketsWithDeletionsDirty()
> +        publisher.C_doFTPArchive(False)
> +        publisher.D_writeReleaseFiles(False)
> +        self.assertEqual(set(), publisher.dirty_pockets)
> +        self.assertThat(
> +            suite_path('main', 'source', 'by-hash'),
> +            ByHashHasContents(main_contents))
> +
>      def testCreateSeriesAliasesNoAlias(self):
>          """createSeriesAliases has nothing to do by default."""
>          publisher = Publisher(
> 
> === modified file 'lib/lp/registry/model/distribution.py'
> --- lib/lp/registry/model/distribution.py	2015-10-13 13:22:08 +0000
> +++ lib/lp/registry/model/distribution.py	2016-03-22 12:51:52 +0000
> @@ -1283,10 +1283,22 @@
>              bin_query, clauseTables=['BinaryPackagePublishingHistory'],
>              orderBy=['archive.id'], distinct=True)
>  
> +        reapable_af_query = """
> +        Archive.purpose = %s AND
> +        Archive.distribution = %s AND
> +        ArchiveFile.archive = archive.id AND
> +        ArchiveFile.scheduled_deletion_date < %s
> +        """ % sqlvalues(ArchivePurpose.PPA, self, UTC_NOW)
> +
> +        reapable_af_archives = Archive.select(
> +            reapable_af_query, clauseTables=['ArchiveFile'],
> +            orderBy=['archive.id'], distinct=True)

May not be definitive since ArchiveFile is empty on dogfood, but it looks OK there:

launchpad_dogfood=# EXPLAIN (ANALYZE ON, BUFFERS ON) SELECT Archive.id FROM Archive, ArchiveFile WHERE Archive.purpose = 2 AND Archive.distribution = 1 AND ArchiveFile.archive = archive.id AND ArchiveFile.scheduled_deletion_date < CURRENT_TIMESTAMP AT TIME ZONE 'UTC';
                                                                                  QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Nested Loop  (cost=4.61..1849.59 rows=236 width=4) (actual time=0.018..0.018 rows=0 loops=1)
   Buffers: shared hit=1
   ->  Bitmap Heap Scan on archivefile  (cost=4.20..18.34 rows=237 width=4) (actual time=0.016..0.016 rows=0 loops=1)
         Recheck Cond: (scheduled_deletion_date < timezone('UTC'::text, now()))
         Buffers: shared hit=1
         ->  Bitmap Index Scan on archivefile__archive__scheduled_deletion_date__container__idx  (cost=0.00..4.14 rows=237 width=0) (actual time=0.012..0.012 rows=0 loops=1)
               Index Cond: (scheduled_deletion_date < timezone('UTC'::text, now()))
               Buffers: shared hit=1
   ->  Index Scan using archive_pkey on archive  (cost=0.42..7.72 rows=1 width=4) (never executed)
         Index Cond: (id = archivefile.archive)
         Filter: ((purpose = 2) AND (distribution = 1))
 Total runtime: 8.503 ms
(12 rows)

> +
>          deleting_archives = Archive.selectBy(
>              status=ArchiveStatus.DELETING).orderBy(['archive.id'])
>  
> -        return src_archives.union(bin_archives).union(deleting_archives)
> +        return src_archives.union(bin_archives).union(
> +            reapable_af_archives).union(deleting_archives)
>  
>      def getArchiveByComponent(self, component_name):
>          """See `IDistribution`."""


-- 
https://code.launchpad.net/~cjwatson/launchpad/archive-index-by-hash/+merge/289379
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.


References