← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/uncompressed-indexes into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/uncompressed-indexes into lp:launchpad.

Commit message:
Consistently generate Release entries for uncompressed versions of files, even if they don't exist on the filesystem.  Don't create uncompressed Packages/Sources files on the filesystem.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/uncompressed-indexes/+merge/285109

Clean up handling of uncompressed archive index files.

apt requires checksums of the uncompressed versions of index files it downloads, even if it doesn't actually download the uncompressed versions.  Our approach to this for Packages, Sources, and Translation-* has historically been to create the uncompressed versions as well.  However, for the primary archive this requires a hack in lp:ubuntu-archive-publishing to remove the uncompressed versions before mirroring, and for PPAs it just leaves lots of unnecessary cruft all over the filesystem.  Contents handling is now moving into apt, as well as DEP-11 metadata; in both those cases it's quite inconvenient to take the same approach, so it's time to clean this up.

We now generate Release entries for uncompressed versions of files regardless of whether they exist on the filesystem, by decompressing one of the other versions.  This means that we can also stop creating uncompressed Packages/Sources files, and progressively clean these up as archives are published.  The hack in ubuntu-archive-publishing can be removed after this is deployed.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/uncompressed-indexes into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/model/ftparchive.py'
--- lib/lp/archivepublisher/model/ftparchive.py	2014-06-27 03:20:30 +0000
+++ lib/lp/archivepublisher/model/ftparchive.py	2016-02-04 19:54:30 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2014 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 from collections import defaultdict
@@ -67,10 +67,10 @@
 
 Default
 {
-    Packages::Compress ". gzip bzip2";
-    Sources::Compress ". gzip bzip2";
+    Packages::Compress "gzip bzip2";
+    Sources::Compress "gzip bzip2";
     Contents::Compress "gzip";
-    Translation::Compress ". gzip bzip2";
+    Translation::Compress "gzip bzip2";
     DeLinkLimit 0;
     MaxContentsChange 12000;
     FileMode 0644;

=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py	2016-01-14 17:00:34 +0000
+++ lib/lp/archivepublisher/publishing.py	2016-02-04 19:54:30 +0000
@@ -11,8 +11,10 @@
 
 __metaclass__ = type
 
+import bz2
 from datetime import datetime
 import errno
+import gzip
 import hashlib
 from itertools import (
     chain,
@@ -110,6 +112,16 @@
     return ordered
 
 
+def remove_suffix(path):
+    """Return `path` but with any compression suffix removed."""
+    if path.endswith('.gz'):
+        return path[:-len('.gz')]
+    elif path.endswith('.bz2'):
+        return path[:-len('.bz2')]
+    else:
+        return path
+
+
 def get_suffixed_indices(path):
     """Return a set of paths to compressed copies of the given index."""
     return set([path + suffix for suffix in ('', '.gz', '.bz2')])
@@ -563,20 +575,26 @@
                 self._writeSuite(distroseries, pocket)
 
     def _allIndexFiles(self, distroseries):
-        """Return all index files on disk for a distroseries."""
+        """Return all index files on disk for a distroseries.
+
+        For each index file, this yields a tuple of (function to open file
+        in uncompressed form, path to file).
+        """
         components = self.archive.getComponentsForSeries(distroseries)
         for pocket in self.archive.getPockets():
             suite_name = distroseries.getSuite(pocket)
             for component in components:
-                yield get_sources_path(self._config, suite_name, component)
+                yield gzip.open, get_sources_path(
+                    self._config, suite_name, component) + ".gz"
                 for arch in distroseries.architectures:
                     if not arch.enabled:
                         continue
-                    yield get_packages_path(
-                        self._config, suite_name, component, arch)
+                    yield gzip.open, get_packages_path(
+                        self._config, suite_name, component, arch) + ".gz"
                     for subcomp in self.subcomponents:
-                        yield get_packages_path(
-                            self._config, suite_name, component, arch, subcomp)
+                        yield gzip.open, get_packages_path(
+                            self._config, suite_name, component, arch,
+                            subcomp) + ".gz"
 
     def _latestNonEmptySeries(self):
         """Find the latest non-empty series in an archive.
@@ -587,11 +605,12 @@
         through what we published on disk.
         """
         for distroseries in self.distro:
-            for index in self._allIndexFiles(distroseries):
+            for open_func, index in self._allIndexFiles(distroseries):
                 try:
-                    if os.path.getsize(index) > 0:
-                        return distroseries
-                except OSError:
+                    with open_func(index) as index_file:
+                        if index_file.read(1):
+                            return distroseries
+                except IOError:
                     pass
 
     def createSeriesAliases(self):
@@ -793,6 +812,7 @@
         """Make sure the timestamps on all files in a suite match."""
         location = os.path.join(self._config.distsroot, suite)
         paths = [os.path.join(location, path) for path in all_files]
+        paths = [path for path in paths if os.path.exists(path)]
         latest_timestamp = max(os.stat(path).st_mtime for path in paths)
         for path in paths:
             os.utime(path, (latest_timestamp, latest_timestamp))
@@ -831,17 +851,17 @@
                 for dep11_file in os.listdir(dep11_dir):
                     if (dep11_file.startswith("Components-") or
                             dep11_file.startswith("icons-")):
-                        all_files.add(
-                            os.path.join(component, "dep11", dep11_file))
+                        dep11_path = os.path.join(
+                            component, "dep11", dep11_file)
+                        all_files.add(remove_suffix(dep11_path))
+                        all_files.add(dep11_path)
             except OSError as e:
                 if e.errno != errno.ENOENT:
                     raise
         for architecture in all_architectures:
             for contents_path in get_suffixed_indices(
                     'Contents-' + architecture):
-                if os.path.exists(os.path.join(
-                        self._config.distsroot, suite, contents_path)):
-                    all_files.add(contents_path)
+                all_files.add(contents_path)
 
         drsummary = "%s %s " % (self.distro.displayname,
                                 distroseries.displayname)
@@ -960,12 +980,13 @@
 
         i18n_subpath = os.path.join(component, "i18n")
         i18n_dir = os.path.join(self._config.distsroot, suite, i18n_subpath)
-        i18n_files = []
+        i18n_files = set()
         try:
             for i18n_file in os.listdir(i18n_dir):
                 if not i18n_file.startswith('Translation-'):
                     continue
-                i18n_files.append(i18n_file)
+                i18n_files.add(remove_suffix(i18n_file))
+                i18n_files.add(i18n_file)
         except OSError as e:
             if e.errno != errno.ENOENT:
                 raise
@@ -1000,16 +1021,24 @@
         :param file_name: Filename relative to the parent container directory.
         :return: File contents, or None if the file could not be found.
         """
+        open_func = open
         full_name = os.path.join(self._config.distsroot,
                                  distroseries_name, file_name)
         if not os.path.exists(full_name):
-            # The file we were asked to write out doesn't exist.
-            # Most likely we have an incomplete archive (E.g. no sources
-            # for a given distroseries). This is a non-fatal issue
-            self.log.debug("Failed to find " + full_name)
-            return None
+            if os.path.exists(full_name + '.gz'):
+                open_func = gzip.open
+                full_name = full_name + '.gz'
+            elif os.path.exists(full_name + '.bz2'):
+                open_func = bz2.BZ2File
+                full_name = full_name + '.bz2'
+            else:
+                # The file we were asked to write out doesn't exist.
+                # Most likely we have an incomplete archive (e.g. no sources
+                # for a given distroseries). This is a non-fatal issue.
+                self.log.debug("Failed to find " + full_name)
+                return None
 
-        with open(full_name, 'r') as in_file:
+        with open_func(full_name) as in_file:
             return in_file.read()
 
     def deleteArchive(self):

=== modified file 'lib/lp/archivepublisher/tests/apt-data/apt.conf'
--- lib/lp/archivepublisher/tests/apt-data/apt.conf	2014-06-11 08:23:35 +0000
+++ lib/lp/archivepublisher/tests/apt-data/apt.conf	2016-02-04 19:54:30 +0000
@@ -8,10 +8,10 @@
 
 Default
 {
-    Packages::Compress ". gzip bzip2";
-    Sources::Compress ". gzip bzip2";
+    Packages::Compress "gzip bzip2";
+    Sources::Compress "gzip bzip2";
     Contents::Compress "gzip";
-    Translation::Compress ". gzip bzip2";
+    Translation::Compress "gzip bzip2";
     DeLinkLimit 0;
     MaxContentsChange 12000;
     FileMode 0644;

=== modified file 'lib/lp/archivepublisher/tests/apt-data/apt_conf_single_empty_suite_test'
--- lib/lp/archivepublisher/tests/apt-data/apt_conf_single_empty_suite_test	2014-06-11 08:23:35 +0000
+++ lib/lp/archivepublisher/tests/apt-data/apt_conf_single_empty_suite_test	2016-02-04 19:54:30 +0000
@@ -8,10 +8,10 @@
 
 Default
 {
-    Packages::Compress ". gzip bzip2";
-    Sources::Compress ". gzip bzip2";
+    Packages::Compress "gzip bzip2";
+    Sources::Compress "gzip bzip2";
     Contents::Compress "gzip";
-    Translation::Compress ". gzip bzip2";
+    Translation::Compress "gzip bzip2";
     DeLinkLimit 0;
     MaxContentsChange 12000;
     FileMode 0644;

=== modified file 'lib/lp/archivepublisher/tests/test_ftparchive.py'
--- lib/lp/archivepublisher/tests/test_ftparchive.py	2015-04-09 05:16:37 +0000
+++ lib/lp/archivepublisher/tests/test_ftparchive.py	2016-02-04 19:54:30 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2015 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Tests for ftparchive.py"""
@@ -6,6 +6,7 @@
 __metaclass__ = type
 
 import difflib
+import gzip
 import os
 import re
 import shutil
@@ -98,15 +99,18 @@
         super(TestFTPArchive, self).tearDown()
         shutil.rmtree(self._config.distroroot)
 
-    def _verifyFile(self, filename, directory):
+    def _verifyFile(self, filename, directory,
+                    result_suffix="", result_open_func=open):
         """Compare byte-to-byte the given file and the respective sample.
 
         It's a poor way of testing files generated by apt-ftparchive.
         """
-        result_path = os.path.join(directory, filename)
-        result_text = open(result_path).read()
+        result_path = os.path.join(directory, filename) + result_suffix
+        with result_open_func(result_path) as result_file:
+            result_text = result_file.read()
         sample_path = os.path.join(self._sampledir, filename)
-        sample_text = open(sample_path).read()
+        with open(sample_path) as sample_file:
+            sample_text = sample_file.read()
         # When the comparison between the sample text and the generated text
         # differ, just printing the strings will be less than optimal.  Use
         # difflib to get a line-by-line comparison that makes it much more
@@ -115,9 +119,9 @@
             sample_text.splitlines(), result_text.splitlines())
         self.assertEqual(sample_text, result_text, '\n'.join(diff_lines))
 
-    def _verifyEmpty(self, path):
+    def _verifyEmpty(self, path, open_func=open):
         """Assert that the given file is empty."""
-        with open(path) as result_file:
+        with open_func(path) as result_file:
             self.assertEqual("", result_file.read())
 
     def _addRepositoryFile(self, component, sourcename, leafname,
@@ -436,14 +440,19 @@
         # check'. Although they should remain active in PQM to avoid possible
         # regressions.
         fa.runApt(apt_conf)
-        self._verifyFile("Packages",
-            os.path.join(self._distsdir, "hoary-test", "main", "binary-i386"))
+        self._verifyFile(
+            "Packages",
+            os.path.join(self._distsdir, "hoary-test", "main", "binary-i386"),
+            result_suffix=".gz", result_open_func=gzip.open)
         self._verifyEmpty(
             os.path.join(
                 self._distsdir, "hoary-test", "main", "debian-installer",
-                "binary-i386", "Packages"))
-        self._verifyFile("Sources",
-            os.path.join(self._distsdir, "hoary-test", "main", "source"))
+                "binary-i386", "Packages.gz"),
+            open_func=gzip.open)
+        self._verifyFile(
+            "Sources",
+            os.path.join(self._distsdir, "hoary-test", "main", "source"),
+            result_suffix=".gz", result_open_func=gzip.open)
 
         # XXX cprov 2007-03-21: see above, byte-to-byte configuration
         # comparing is weak.
@@ -509,23 +518,23 @@
         fa.runApt(apt_conf)
         self.assertTrue(os.path.exists(
             os.path.join(self._distsdir, "hoary-test-updates", "main",
-                         "binary-i386", "Packages")))
-        self.assertTrue(os.path.exists(
-            os.path.join(self._distsdir, "hoary-test-updates", "main",
-                         "debian-installer", "binary-i386", "Packages")))
-        self.assertTrue(os.path.exists(
-            os.path.join(self._distsdir, "hoary-test-updates", "main",
-                         "source", "Sources")))
+                         "binary-i386", "Packages.gz")))
+        self.assertTrue(os.path.exists(
+            os.path.join(self._distsdir, "hoary-test-updates", "main",
+                         "debian-installer", "binary-i386", "Packages.gz")))
+        self.assertTrue(os.path.exists(
+            os.path.join(self._distsdir, "hoary-test-updates", "main",
+                         "source", "Sources.gz")))
 
         self.assertFalse(os.path.exists(
             os.path.join(self._distsdir, "hoary-test", "main",
-                         "binary-i386", "Packages")))
-        self.assertFalse(os.path.exists(
-            os.path.join(self._distsdir, "hoary-test", "main",
-                         "debian-installer", "binary-i386", "Packages")))
-        self.assertFalse(os.path.exists(
-            os.path.join(self._distsdir, "hoary-test", "main",
-                         "source", "Sources")))
+                         "binary-i386", "Packages.gz")))
+        self.assertFalse(os.path.exists(
+            os.path.join(self._distsdir, "hoary-test", "main",
+                         "debian-installer", "binary-i386", "Packages.gz")))
+        self.assertFalse(os.path.exists(
+            os.path.join(self._distsdir, "hoary-test", "main",
+                         "source", "Sources.gz")))
 
     def test_cleanCaches_noop_if_recent(self):
         # cleanCaches does nothing if it was run recently.

=== modified file 'lib/lp/archivepublisher/tests/test_publish_ftpmaster.py'
--- lib/lp/archivepublisher/tests/test_publish_ftpmaster.py	2016-01-13 17:54:05 +0000
+++ lib/lp/archivepublisher/tests/test_publish_ftpmaster.py	2016-02-04 19:54:30 +0000
@@ -1309,5 +1309,5 @@
         self.assertEqual([], script.listSuitesNeedingIndexes(series))
         sources = os.path.join(
             getPubConfig(series.main_archive).distsroot,
-            series.name, "main", "source", "Sources")
+            series.name, "main", "source", "Sources.gz")
         self.assertTrue(file_exists(sources))

=== modified file 'lib/lp/archivepublisher/tests/test_publishdistro.py'
--- lib/lp/archivepublisher/tests/test_publishdistro.py	2015-08-28 08:38:54 +0000
+++ lib/lp/archivepublisher/tests/test_publishdistro.py	2016-02-04 19:54:30 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2015 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Functional tests for publish-distro.py script."""
@@ -376,17 +376,18 @@
 
         # Check some index files
         index_path = (
-            "%s/hoary-test-updates/main/binary-i386/Packages"
-            % self.config.distsroot)
-        self.assertExists(index_path)
-
-        index_path = (
-            "%s/hoary-test-backports/main/binary-i386/Packages"
-            % self.config.distsroot)
-        self.assertExists(index_path)
-
-        index_path = (
-            "%s/hoary-test/main/binary-i386/Packages" % self.config.distsroot)
+            "%s/hoary-test-updates/main/binary-i386/Packages.gz"
+            % self.config.distsroot)
+        self.assertExists(index_path)
+
+        index_path = (
+            "%s/hoary-test-backports/main/binary-i386/Packages.gz"
+            % self.config.distsroot)
+        self.assertExists(index_path)
+
+        index_path = (
+            "%s/hoary-test/main/binary-i386/Packages.gz" %
+            self.config.distsroot)
         self.assertNotExists(index_path)
 
 

=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py	2016-01-14 17:00:34 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py	2016-02-04 19:54:30 +0000
@@ -1009,42 +1009,39 @@
         self.assertEqual(
             1 + old_num_pending_archives, new_num_pending_archives)
 
-    def _checkCompressedFile(self, archive_publisher, compressed_file_path,
-                             uncompressed_file_path):
-        """Assert that a compressed file is equal to its uncompressed version.
+    def _checkCompressedFiles(self, archive_publisher, base_file_path,
+                              suffixes):
+        """Assert that the various compressed versions of a file are equal.
 
-        Check that a compressed file, such as Packages.gz and Sources.gz,
-        and bz2 variations, matches its uncompressed partner.  The file
-        paths are relative to breezy-autotest/main under the
-        archive_publisher's configured dist root. 'breezy-autotest' is
-        our test distroseries name.
+        Check that the various versions of a compressed file, such as
+        Packages.gz/Packages.bz2 and Sources.gz/Sources.bz2, and bz2
+        variations, all have identical contents.  The file paths are
+        relative to breezy-autotest/main under the archive_publisher's
+        configured dist root.  'breezy-autotest' is our test distroseries
+        name.
 
         The contents of the uncompressed file is returned as a list of lines
         in the file.
         """
-        index_compressed_path = os.path.join(
-            archive_publisher._config.distsroot, 'breezy-autotest', 'main',
-            compressed_file_path)
-        index_path = os.path.join(
-            archive_publisher._config.distsroot, 'breezy-autotest', 'main',
-            uncompressed_file_path)
-
-        if index_compressed_path.endswith('.gz'):
-            index_compressed_contents = gzip.GzipFile(
-                filename=index_compressed_path).read().splitlines()
-        elif index_compressed_path.endswith('.bz2'):
-            index_compressed_contents = bz2.BZ2File(
-                filename=index_compressed_path).read().splitlines()
-        else:
-            raise AssertionError(
-                'Unsupported compression: %s' % compressed_file_path)
-
-        with open(index_path, 'r') as index_file:
-            index_contents = index_file.read().splitlines()
-
-        self.assertEqual(index_contents, index_compressed_contents)
-
-        return index_contents
+        index_base_path = os.path.join(
+            archive_publisher._config.distsroot, 'breezy-autotest', 'main',
+            base_file_path)
+
+        all_contents = []
+        for suffix in suffixes:
+            if suffix == '.gz':
+                open_func = gzip.open
+            elif suffix == '.bz2':
+                open_func = bz2.BZ2File
+            else:
+                open_func = open
+            with open_func(index_base_path + suffix) as index_file:
+                all_contents.append(index_file.read().splitlines())
+
+        for contents in all_contents[1:]:
+            self.assertEqual(all_contents[0], contents)
+
+        return all_contents[0]
 
     def setupPPAArchiveIndexTest(self, long_descriptions=True,
                                  feature_flag=False):
@@ -1100,16 +1097,11 @@
         """Building Archive Indexes from PPA publications."""
         archive_publisher = self.setupPPAArchiveIndexTest()
 
-        # A compressed and uncompressed Sources file are written;
-        # ensure that they are the same after uncompressing the former.
-        index_contents = self._checkCompressedFile(
-            archive_publisher, os.path.join('source', 'Sources.bz2'),
-            os.path.join('source', 'Sources'))
-
-        index_contents = self._checkCompressedFile(
-            archive_publisher, os.path.join('source', 'Sources.gz'),
-            os.path.join('source', 'Sources'))
-
+        # Various compressed Sources files are written; ensure that they are
+        # the same after decompression.
+        index_contents = self._checkCompressedFiles(
+            archive_publisher, os.path.join('source', 'Sources'),
+            ['.gz', '.bz2'])
         self.assertEqual(
             ['Package: foo',
              'Binary: foo-bin',
@@ -1131,16 +1123,11 @@
              ''],
             index_contents)
 
-        # A compressed and an uncompressed Packages file are written;
-        # ensure that they are the same after uncompressing the former.
-        index_contents = self._checkCompressedFile(
-            archive_publisher, os.path.join('binary-i386', 'Packages.bz2'),
-            os.path.join('binary-i386', 'Packages'))
-
-        index_contents = self._checkCompressedFile(
-            archive_publisher, os.path.join('binary-i386', 'Packages.gz'),
-            os.path.join('binary-i386', 'Packages'))
-
+        # Various compressed Packages files are written; ensure that they
+        # are the same after decompression.
+        index_contents = self._checkCompressedFiles(
+            archive_publisher, os.path.join('binary-i386', 'Packages'),
+            ['.gz', '.bz2'])
         self.assertEqual(
             ['Package: foo-bin',
              'Source: foo',
@@ -1163,19 +1150,13 @@
              ''],
             index_contents)
 
-        # A compressed and an uncompressed Packages file are written for
-        # 'debian-installer' section for each architecture. It will list
+        # Various compressed Packages files are written for the
+        # 'debian-installer' section for each architecture.  They will list
         # the 'udeb' files.
-        index_contents = self._checkCompressedFile(
-            archive_publisher,
-            os.path.join('debian-installer', 'binary-i386', 'Packages.bz2'),
-            os.path.join('debian-installer', 'binary-i386', 'Packages'))
-
-        index_contents = self._checkCompressedFile(
-            archive_publisher,
-            os.path.join('debian-installer', 'binary-i386', 'Packages.gz'),
-            os.path.join('debian-installer', 'binary-i386', 'Packages'))
-
+        index_contents = self._checkCompressedFiles(
+            archive_publisher,
+            os.path.join('debian-installer', 'binary-i386', 'Packages'),
+            ['.gz', '.bz2'])
         self.assertEqual(
             ['Package: bingo',
              'Source: foo',
@@ -1197,16 +1178,10 @@
             index_contents)
 
         # 'debug' too, when publish_debug_symbols is enabled.
-        index_contents = self._checkCompressedFile(
-            archive_publisher,
-            os.path.join('debug', 'binary-i386', 'Packages.bz2'),
-            os.path.join('debug', 'binary-i386', 'Packages'))
-
-        index_contents = self._checkCompressedFile(
-            archive_publisher,
-            os.path.join('debug', 'binary-i386', 'Packages.gz'),
-            os.path.join('debug', 'binary-i386', 'Packages'))
-
+        index_contents = self._checkCompressedFiles(
+            archive_publisher,
+            os.path.join('debug', 'binary-i386', 'Packages'),
+            ['.gz', '.bz2'])
         self.assertEqual(
             ['Package: foo-bin-dbgsym',
              'Source: foo',
@@ -1272,16 +1247,11 @@
         archive_publisher = self.setupPPAArchiveIndexTest(
             long_descriptions=False, feature_flag=True)
 
-        # A compressed and uncompressed Sources file are written;
-        # ensure that they are the same after uncompressing the former.
-        index_contents = self._checkCompressedFile(
-            archive_publisher, os.path.join('source', 'Sources.bz2'),
-            os.path.join('source', 'Sources'))
-
-        index_contents = self._checkCompressedFile(
-            archive_publisher, os.path.join('source', 'Sources.gz'),
-            os.path.join('source', 'Sources'))
-
+        # Various compressed Sources files are written; ensure that they are
+        # the same after decompression.
+        index_contents = self._checkCompressedFiles(
+            archive_publisher, os.path.join('source', 'Sources'),
+            ['.gz', '.bz2'])
         self.assertEqual(
             ['Package: foo',
              'Binary: foo-bin',
@@ -1303,16 +1273,11 @@
              ''],
             index_contents)
 
-        # A compressed and an uncompressed Packages file are written;
-        # ensure that they are the same after uncompressing the former.
-        index_contents = self._checkCompressedFile(
-            archive_publisher, os.path.join('binary-i386', 'Packages.bz2'),
-            os.path.join('binary-i386', 'Packages'))
-
-        index_contents = self._checkCompressedFile(
-            archive_publisher, os.path.join('binary-i386', 'Packages.gz'),
-            os.path.join('binary-i386', 'Packages'))
-
+        # Various compressed Packages files are written; ensure that they
+        # are the same after decompression.
+        index_contents = self._checkCompressedFiles(
+            archive_publisher, os.path.join('binary-i386', 'Packages'),
+            ['.gz', '.bz2'])
         self.assertEqual(
             ['Package: foo-bin',
              'Source: foo',
@@ -1333,19 +1298,13 @@
              ''],
             index_contents)
 
-        # A compressed and an uncompressed Packages file are written for
-        # 'debian-installer' section for each architecture. It will list
+        # Various compressed Packages files are written for the
+        # 'debian-installer' section for each architecture.  They will list
         # the 'udeb' files.
-        index_contents = self._checkCompressedFile(
-            archive_publisher,
-            os.path.join('debian-installer', 'binary-i386', 'Packages.bz2'),
-            os.path.join('debian-installer', 'binary-i386', 'Packages'))
-
-        index_contents = self._checkCompressedFile(
-            archive_publisher,
-            os.path.join('debian-installer', 'binary-i386', 'Packages.gz'),
-            os.path.join('debian-installer', 'binary-i386', 'Packages'))
-
+        index_contents = self._checkCompressedFiles(
+            archive_publisher,
+            os.path.join('debian-installer', 'binary-i386', 'Packages'),
+            ['.gz', '.bz2'])
         self.assertEqual(
             ['Package: bingo',
              'Source: foo',
@@ -1367,16 +1326,10 @@
             index_contents)
 
         # 'debug' too, when publish_debug_symbols is enabled.
-        index_contents = self._checkCompressedFile(
-            archive_publisher,
-            os.path.join('debug', 'binary-i386', 'Packages.bz2'),
-            os.path.join('debug', 'binary-i386', 'Packages'))
-
-        index_contents = self._checkCompressedFile(
-            archive_publisher,
-            os.path.join('debug', 'binary-i386', 'Packages.gz'),
-            os.path.join('debug', 'binary-i386', 'Packages'))
-
+        index_contents = self._checkCompressedFiles(
+            archive_publisher,
+            os.path.join('debug', 'binary-i386', 'Packages'),
+            ['.gz', '.bz2'])
         self.assertEqual(
             ['Package: foo-bin-dbgsym',
              'Source: foo',
@@ -1402,16 +1355,11 @@
             ('breezy-autotest', PackagePublishingPocket.RELEASE) in
             archive_publisher.release_files_needed)
 
-        # A compressed and an uncompressed Translation-en file is written.
-        # ensure that they are the same after uncompressing the former.
-        index_contents = self._checkCompressedFile(
-            archive_publisher, os.path.join('i18n', 'Translation-en.gz'),
-            os.path.join('i18n', 'Translation-en'))
-
-        index_contents = self._checkCompressedFile(
-            archive_publisher, os.path.join('i18n', 'Translation-en.bz2'),
-            os.path.join('i18n', 'Translation-en'))
-
+        # Various compressed Translation-en files are written; ensure that
+        # they are the same after decompression.
+        index_contents = self._checkCompressedFiles(
+            archive_publisher, os.path.join('i18n', 'Translation-en'),
+            ['.gz', '.bz2'])
         self.assertEqual(
             ['Package: bingo',
              'Description-md5: 6fecedf187298acb6bc5f15cc5807fb7',
@@ -1656,8 +1604,8 @@
 
         arch_sources_path = os.path.join(
             archive_publisher._config.distsroot, 'breezy-autotest',
-            'main', 'source', 'Sources')
-        with open(arch_sources_path) as arch_sources_file:
+            'main', 'source', 'Sources.gz')
+        with gzip.open(arch_sources_path) as arch_sources_file:
             self.assertReleaseContentsMatch(
                 release, 'main/source/Sources', arch_sources_file.read())
 
@@ -1892,7 +1840,7 @@
 
         suite_path = partial(
             os.path.join, self.config.distsroot, 'breezy-autotest')
-        sources = suite_path('main', 'source', 'Sources')
+        sources = suite_path('main', 'source', 'Sources.gz')
         sources_timestamp = os.stat(sources).st_mtime - 60
         os.utime(sources, (sources_timestamp, sources_timestamp))
 
@@ -1900,7 +1848,9 @@
 
         release = self.parseRelease(suite_path('Release'))
         paths = ['Release'] + [entry['name'] for entry in release['md5sum']]
-        timestamps = set(os.stat(suite_path(path)).st_mtime for path in paths)
+        timestamps = set(
+            os.stat(suite_path(path)).st_mtime
+            for path in paths if os.path.exists(suite_path(path)))
         self.assertEqual(1, len(timestamps))
 
     def testCreateSeriesAliasesNoAlias(self):
@@ -1990,8 +1940,7 @@
         i18n_root = os.path.join(
             self.config.distsroot, 'breezy-autotest', 'main', 'i18n')
 
-        # Write a zero-length Translation-en file and compressed versions of
-        # it.
+        # Write compressed versions of a zero-length Translation-en file.
         translation_en_index = RepositoryIndexFile(
             os.path.join(i18n_root, 'Translation-en'), self.config.temproot)
         translation_en_index.close()
@@ -2014,7 +1963,8 @@
                          i18n_index['sha1'][1]['size'])
 
         # i18n/Index and i18n/Translation-en.bz2 are scheduled for inclusion
-        # in Release.
+        # in Release.  Checksums of the uncompressed version are included
+        # despite it not actually being written to disk.
         self.assertEqual(4, len(all_files))
         self.assertContentEqual(
             ['main/i18n/Index',
@@ -2087,8 +2037,8 @@
             publisher._config.distsroot, series.getSuite(pocket), '%s/%s')
 
         release_template = os.path.join(arch_template, 'Release')
-        packages_template = os.path.join(arch_template, 'Packages')
-        sources_template = os.path.join(arch_template, 'Sources')
+        packages_template = os.path.join(arch_template, 'Packages.gz')
+        sources_template = os.path.join(arch_template, 'Sources.gz')
         release_path = os.path.join(
             publisher._config.distsroot, series.getSuite(pocket), 'Release')
         with open(release_path) as release_file:

=== modified file 'lib/lp/archivepublisher/tests/test_repositoryindexfile.py'
--- lib/lp/archivepublisher/tests/test_repositoryindexfile.py	2013-08-29 10:29:01 +0000
+++ lib/lp/archivepublisher/tests/test_repositoryindexfile.py	2016-02-04 19:54:30 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2010 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Tests for `RepositoryIndexFile`."""
@@ -44,7 +44,7 @@
     def testWorkflow(self):
         """`RepositoryIndexFile` workflow.
 
-        On creation, 3 temporary files are atomically created in the
+        On creation, 2 temporary files are atomically created in the
         'temp_root' location (mkstemp). One for storing the plain contents
         and other for the corresponding compressed contents. At this point,
         no files were created in the 'root' location yet.
@@ -58,16 +58,15 @@
         repo_file = self.getRepoFile('boing')
 
         self.assertEqual(0, len(os.listdir(self.root)))
-        self.assertEqual(3, len(os.listdir(self.temp_root)))
+        self.assertEqual(2, len(os.listdir(self.temp_root)))
 
         repo_file.close()
 
-        self.assertEqual(3, len(os.listdir(self.root)))
+        self.assertEqual(2, len(os.listdir(self.root)))
         self.assertEqual(0, len(os.listdir(self.temp_root)))
 
         resulting_files = sorted(os.listdir(self.root))
-        self.assertEqual(
-            ['boing', 'boing.bz2', 'boing.gz'], resulting_files)
+        self.assertEqual(['boing.bz2', 'boing.gz'], resulting_files)
 
         for filename in resulting_files:
             file_path = os.path.join(self.root, filename)
@@ -87,14 +86,12 @@
         repo_file.write('hello')
         repo_file.close()
 
-        plain_content = open(os.path.join(self.root, 'boing')).read()
         gzip_content = gzip.open(os.path.join(self.root, 'boing.gz')).read()
         bz2_content = bz2.decompress(
             open(os.path.join(self.root, 'boing.bz2')).read())
 
-        self.assertEqual(plain_content, bz2_content)
-        self.assertEqual(plain_content, gzip_content)
-        self.assertEqual('hello', plain_content)
+        self.assertEqual(gzip_content, bz2_content)
+        self.assertEqual('hello', gzip_content)
 
     def testUnreferencing(self):
         """`RepositoryIndexFile` unreferencing.
@@ -105,7 +102,7 @@
         repo_file = self.getRepoFile('boing')
 
         self.assertEqual(0, len(os.listdir(self.root)))
-        self.assertEqual(3, len(os.listdir(self.temp_root)))
+        self.assertEqual(2, len(os.listdir(self.temp_root)))
 
         del repo_file
 
@@ -123,7 +120,7 @@
         repo_file.close()
 
         self.assertEqual(
-            ['boing', 'boing.bz2', 'boing.gz'],
+            ['boing.bz2', 'boing.gz'],
             sorted(os.listdir(missing_root)))
 
     def testMissingTempRoot(self):
@@ -132,3 +129,14 @@
         self.assertRaises(
             AssertionError, RepositoryIndexFile,
             os.path.join(self.root, 'boing'), missing_temp_root)
+
+    def testRemoveOld(self):
+        """`RepositoryIndexFile` removes old index files."""
+        old_path = os.path.join(self.root, 'boing')
+        with open(old_path, 'w'):
+            pass
+        self.assertEqual(['boing'], sorted(os.listdir(self.root)))
+        repo_file = self.getRepoFile('boing')
+        repo_file.close()
+        self.assertEqual(
+            ['boing.bz2', 'boing.gz'], sorted(os.listdir(self.root)))

=== modified file 'lib/lp/archivepublisher/utils.py'
--- lib/lp/archivepublisher/utils.py	2015-09-28 17:38:45 +0000
+++ lib/lp/archivepublisher/utils.py	2016-02-04 19:54:30 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Miscellaneous functions for publisher."""
@@ -43,17 +43,21 @@
     # File path built on initialization.
     path = None
 
-    def __init__(self, temp_root, filename):
+    def __init__(self, temp_root, filename, auto_open=True):
+        self.temp_root = temp_root
         self.filename = filename + self.suffix
 
+        if auto_open:
+            self.open()
+
+    def _buildFile(self, fd):
+        return os.fdopen(fd, 'wb')
+
+    def open(self):
         fd, self.path = tempfile.mkstemp(
-            dir=temp_root, prefix='%s_' % filename)
-
+            dir=self.temp_root, prefix='%s_' % self.filename)
         self._fd = self._buildFile(fd)
 
-    def _buildFile(self, fd):
-        return os.fdopen(fd, 'wb')
-
     def write(self, content):
         self._fd.write(content)
 
@@ -102,10 +106,12 @@
         assert os.path.exists(temp_root), 'Temporary root does not exist.'
 
         self.index_files = (
-            PlainTempFile(temp_root, filename),
             GzipTempFile(temp_root, filename),
             Bzip2TempFile(temp_root, filename),
             )
+        self.old_index_files = (
+            PlainTempFile(temp_root, filename, auto_open=False),
+            )
 
     def write(self, content):
         """Write contents to all target medias."""
@@ -138,3 +144,10 @@
             mode = stat.S_IMODE(os.stat(root_path).st_mode)
             os.chmod(root_path,
                      mode | stat.S_IWGRP | stat.S_IRGRP | stat.S_IROTH)
+
+        # Remove files that may have been created by older versions of this
+        # code.
+        for index_file in self.old_index_files:
+            root_path = os.path.join(self.root, index_file.filename)
+            if os.path.exists(root_path):
+                os.remove(root_path)

=== modified file 'lib/lp/soyuz/doc/soyuz-upload.txt'
--- lib/lp/soyuz/doc/soyuz-upload.txt	2014-11-24 16:03:20 +0000
+++ lib/lp/soyuz/doc/soyuz-upload.txt	2016-02-04 19:54:30 +0000
@@ -464,9 +464,11 @@
 component of ubuntutest/breezy-autotest, containing the only the
 required entry for 'etherwake':
 
-    >>> sources = open(
+    >>> import gzip
+
+    >>> sources = gzip.open(
     ...    "/var/tmp/archive/ubuntutest/dists/breezy-autotest/universe/source"
-    ...    "/Sources").read()
+    ...    "/Sources.gz").read()
     >>> print sources + '\nEND'
     Package: etherwake
     Binary: etherwake
@@ -548,16 +550,16 @@
 
 Check if the package was moved properly to the component 'multiverse':
 
-    >>> main_sources = open(
+    >>> main_sources = gzip.open(
     ...     "/var/tmp/archive/ubuntutest/dists/breezy-autotest"
-    ...     "/main/source/Sources").read()
+    ...     "/main/source/Sources.gz").read()
     >>> print main_sources + '\nEND'
     <BLANKLINE>
     END
 
-    >>> multiverse_sources = open(
+    >>> multiverse_sources = gzip.open(
     ...     "/var/tmp/archive/ubuntutest/dists/breezy-autotest"
-    ...     "/multiverse/source/Sources").read()
+    ...     "/multiverse/source/Sources.gz").read()
     >>> print multiverse_sources + '\nEND'
     Package: drdsl
     ...


Follow ups