← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/gina-stronger-checksums into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/gina-stronger-checksums into lp:launchpad.

Commit message:
Make gina accept archives using xz compression and/or using (only) stronger checksums than MD5.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/gina-stronger-checksums/+merge/289505

Make gina accept archives using xz compression and/or using (only) stronger checksums than MD5.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/gina-stronger-checksums into lp:launchpad.
=== modified file 'lib/lp/archiveuploader/dscfile.py'
--- lib/lp/archiveuploader/dscfile.py	2016-03-01 21:20:44 +0000
+++ lib/lp/archiveuploader/dscfile.py	2016-03-18 14:22:06 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2014 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """ DSCFile and related.
@@ -251,6 +251,7 @@
         "Build-Conflicts-Indep",
         "Checksums-Sha1",
         "Checksums-Sha256",
+        "Checksums-Sha512",
         "Format",
         "Homepage",
         "Standards-Version",

=== modified file 'lib/lp/soyuz/scripts/gina/archive.py'
--- lib/lp/soyuz/scripts/gina/archive.py	2013-05-22 09:51:08 +0000
+++ lib/lp/soyuz/scripts/gina/archive.py	2016-03-18 14:22:06 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Archive pool classes.
@@ -37,7 +37,7 @@
 class ArchiveFilesystemInfo:
     """Archive information files holder
 
-    This class gets and holds the Packages.gz and Source.gz files
+    This class gets and holds the Packages and Sources files
     from a Package Archive and holds them as internal attributes
     to be used for other classes.
     """
@@ -62,53 +62,49 @@
             raise MangledArchiveError("No archive directory for %s/%s" %
                                       (distroseries, component))
 
-        # Search and get the files with full path
-        sources_zipped = os.path.join(root, "dists", distroseries,
-                                      component, "source", "Sources.gz")
-        if not os.path.exists(sources_zipped):
-            raise MangledArchiveError("Archive missing Sources.gz at %s"
-                                      % sources_zipped)
-
         # Extract Sources index.
-        srcfd, sources_tagfile = tempfile.mkstemp()
-        call("gzip -dc %s > %s" % (sources_zipped, sources_tagfile))
-        srcfile = os.fdopen(srcfd)
-
-        # Holds the opened files and its names.
-        self.sources_tagfile = sources_tagfile
-        self.srcfile = srcfile
+        sources_prefix = os.path.join(
+            root, "dists", distroseries, component, "source", "Sources")
+        self.srcfile, self.sources_tagfile = self.openTagFile(sources_prefix)
 
         # Detect source-only mode and skip binary index parsing.
         if source_only:
             return
 
-        # Extract Binaries indexes.
+        # Extract binary indexes.
         dist_bin_dir = os.path.join(dist_dir, "binary-%s" % arch)
         if not os.path.exists(dist_bin_dir):
             raise NoBinaryArchive
 
-        binaries_zipped = os.path.join(dist_bin_dir, "Packages.gz")
-        if not os.path.exists(binaries_zipped):
-            raise MangledArchiveError("Archive mising Packages.gz at %s"
-                                      % binaries_zipped)
-        di_zipped = os.path.join(root, "dists", distroseries, component,
-                                 "debian-installer", "binary-%s" % arch,
-                                 "Packages.gz")
-        # Extract Binary indexes.
-        binfd, binaries_tagfile = tempfile.mkstemp()
-        call("gzip -dc %s > %s" % (binaries_zipped, binaries_tagfile))
-        binfile = os.fdopen(binfd)
-
-        difd, di_tagfile = tempfile.mkstemp()
-        if os.path.exists(di_zipped):
-            call("gzip -dc %s > %s" % (di_zipped, di_tagfile))
-        difile = os.fdopen(difd)
-
-        # Holds the opened files and its names.
-        self.binaries_tagfile = binaries_tagfile
-        self.binfile = binfile
-        self.di_tagfile = di_tagfile
-        self.difile = difile
+        self.binfile, self.binaries_tagfile = self.openTagFile(
+            os.path.join(dist_bin_dir, "Packages"))
+
+        try:
+            self.difile, self.di_tagfile = self.openTagFile(
+                os.path.join(
+                    root, "dists", distroseries, component,
+                    "debian-installer", "binary-%s" % arch, "Packages"))
+        except MangledArchiveError:
+            # d-i binary indexes may be missing.  Put something empty in
+            # place so that PackagesMap doesn't need to care.
+            difd, self.di_tagfile = tempfile.mkstemp()
+            self.difile = os.fdopen(difd)
+
+    def openTagFile(self, prefix):
+        for suffix in (".xz", ".gz"):
+            if os.path.exists(prefix + suffix):
+                # Extract index.
+                fd, tagfile = tempfile.mkstemp()
+                if suffix == ".xz":
+                    call("xz -dc %s > %s" % (prefix + suffix, tagfile))
+                elif suffix == ".gz":
+                    call("gzip -dc %s > %s" % (prefix + suffix, tagfile))
+                else:
+                    raise AssertionError("Unknown suffix '%s'" % suffix)
+                return os.fdopen(fd), tagfile
+        else:
+            raise MangledArchiveError(
+                "Archive missing any variant of %s" % prefix)
 
     def cleanup(self):
         os.unlink(self.sources_tagfile)

=== modified file 'lib/lp/soyuz/scripts/gina/handlers.py'
--- lib/lp/soyuz/scripts/gina/handlers.py	2016-03-04 14:18:23 +0000
+++ lib/lp/soyuz/scripts/gina/handlers.py	2016-03-18 14:22:06 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2015 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Gina db handlers.
@@ -83,14 +83,7 @@
 
 def check_not_in_librarian(files, archive_root, directory):
     to_upload = []
-    if not isinstance(files, list):
-        # A little bit of ugliness. The source package's files attribute
-        # returns a three-tuple with md5sum, size and name. The binary
-        # package, on the other hand, only really provides a filename.
-        # This is tested through the two codepaths, so it should be safe.
-        files = [(None, files)]
-    for i in files:
-        fname = i[-1]
+    for fname in files:
         path = os.path.join(archive_root, directory)
         if not os.path.exists(os.path.join(path, fname)):
             # XXX kiko 2005-10-22: Untested
@@ -781,7 +774,7 @@
     def createBinaryPackage(self, bin, srcpkg, distroarchseries, archtag):
         """Create a new binarypackage."""
         fdir, fname = os.path.split(bin.filename)
-        to_upload = check_not_in_librarian(fname, bin.archive_root, fdir)
+        to_upload = check_not_in_librarian([fname], bin.archive_root, fdir)
         fname, path = to_upload[0]
 
         componentID = self.distro_handler.getComponentByName(bin.component).id

=== modified file 'lib/lp/soyuz/scripts/gina/packages.py'
--- lib/lp/soyuz/scripts/gina/packages.py	2015-12-07 21:49:16 +0000
+++ lib/lp/soyuz/scripts/gina/packages.py	2016-03-18 14:22:06 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2015 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Package information classes.
@@ -225,8 +225,14 @@
                                       (self.package, self.version))
 
         absent = object()
-        missing = [attr for attr in self._required if
-                   getattr(self, attr, absent) is absent]
+        missing = []
+        for attr in self._required:
+            if isinstance(attr, tuple):
+                if all(getattr(self, oneattr, absent) is absent
+                       for oneattr in attr):
+                    missing.append(attr)
+            elif getattr(self, attr, absent) is absent:
+                missing.append(attr)
         if missing:
             raise MissingRequiredArguments(missing)
 
@@ -249,14 +255,26 @@
         self.date_uploaded = UTC_NOW
         return True
 
-    def set_field(self, key, value):
-        """Record an arbitrary control field."""
-        lowkey = key.lower()
+    def is_field_known(self, lowfield):
+        """Is this field a known one?"""
         # _known_fields contains the fields that archiveuploader recognises
         # from a raw .dsc or .*deb; _required contains a few extra fields
         # that are added to Sources and Packages index files.  If a field is
         # in neither, it counts as user-defined.
-        if lowkey in self._known_fields or lowkey in self._required:
+        if lowfield in self._known_fields:
+            return True
+        for required in self._required:
+            if isinstance(required, tuple):
+                if lowfield in required:
+                    return True
+            elif lowfield == required:
+                return True
+        return False
+
+    def set_field(self, key, value):
+        """Record an arbitrary control field."""
+        lowkey = key.lower()
+        if self.is_field_known(lowkey):
             setattr(self, lowkey.replace("-", "_"), value)
         else:
             if self._user_defined is None:
@@ -294,7 +312,7 @@
         'section',
         'architecture',
         'directory',
-        'files',
+        ('files', 'checksums-sha1', 'checksums-sha256', 'checksums-sha512'),
         'component',
         ]
 
@@ -326,11 +344,12 @@
                 except UnicodeDecodeError:
                     raise DisplayNameDecodingError(
                         "Could not decode name %s" % displayname)
-            elif k == 'Files':
-                self.files = []
-                files = v.split("\n")
-                for f in files:
-                    self.files.append(stripseq(f.split(" ")))
+            elif k == 'Files' or k.startswith('Checksums-'):
+                if not hasattr(self, 'files'):
+                    self.files = []
+                    files = v.split("\n")
+                    for f in files:
+                        self.files.append(stripseq(f.split(" "))[-1])
             else:
                 self.set_field(k, v)
 
@@ -413,7 +432,7 @@
         'filename',
         'component',
         'size',
-        'md5sum',
+        ('md5sum', 'sha1', 'sha256', 'sha512'),
         'description',
         'summary',
         'priority',

=== modified file 'lib/lp/soyuz/scripts/tests/test_gina.py'
--- lib/lp/soyuz/scripts/tests/test_gina.py	2015-12-07 21:49:16 +0000
+++ lib/lp/soyuz/scripts/tests/test_gina.py	2016-03-18 14:22:06 +0000
@@ -1,13 +1,16 @@
-# Copyright 2009-2015 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 from doctest import DocTestSuite
 import hashlib
 import os
+import shutil
+import subprocess
 import tempfile
 from textwrap import dedent
 from unittest import TestLoader
 
+import apt_pkg
 from fixtures import EnvironmentVariableFixture
 import transaction
 
@@ -23,6 +26,7 @@
 from lp.soyuz.scripts.gina import ExecutionError
 from lp.soyuz.scripts.gina.archive import (
     ArchiveComponentItems,
+    ArchiveFilesystemInfo,
     PackagesMap,
     )
 from lp.soyuz.scripts.gina.dominate import dominate_imported_source_packages
@@ -36,10 +40,14 @@
     )
 from lp.soyuz.scripts.gina.packages import (
     BinaryPackageData,
+    MissingRequiredArguments,
     SourcePackageData,
     )
 from lp.soyuz.scripts.gina.runner import import_sourcepackages
-from lp.testing import TestCaseWithFactory
+from lp.testing import (
+    TestCase,
+    TestCaseWithFactory,
+    )
 from lp.testing.faketransaction import FakeTransaction
 from lp.testing.layers import (
     LaunchpadZopelessLayer,
@@ -164,6 +172,60 @@
             PackagePublishingStatus.DELETED, PackagePublishingStatus.DELETED])
 
 
+class TestArchiveFilesystemInfo(TestCase):
+
+    def test_gzip(self):
+        archive_root = self.useTempDir()
+        sampledata_root = os.path.join(
+            os.path.dirname(__file__), "gina_test_archive")
+        sampledata_component_dir = os.path.join(
+            sampledata_root, "dists", "breezy", "main")
+        component_dir = os.path.join(archive_root, "dists", "breezy", "main")
+        os.makedirs(os.path.join(component_dir, "source"))
+        shutil.copy(
+            os.path.join(sampledata_component_dir, "source", "Sources.gz"),
+            os.path.join(component_dir, "source", "Sources.gz"))
+        os.makedirs(os.path.join(component_dir, "binary-i386"))
+        shutil.copy(
+            os.path.join(
+                sampledata_component_dir, "binary-i386", "Packages.gz"),
+            os.path.join(component_dir, "binary-i386", "Packages.gz"))
+
+        archive_info = ArchiveFilesystemInfo(
+            archive_root, "breezy", "main", "i386")
+        sources = apt_pkg.TagFile(archive_info.srcfile)
+        self.assertEqual("archive-copier", next(sources)["Package"])
+        binaries = apt_pkg.TagFile(archive_info.binfile)
+        self.assertEqual("python-pam", next(binaries)["Package"])
+
+    def test_xz(self):
+        archive_root = self.useTempDir()
+        sampledata_root = os.path.join(
+            os.path.dirname(__file__), "gina_test_archive")
+        sampledata_component_dir = os.path.join(
+            sampledata_root, "dists", "breezy", "main")
+        component_dir = os.path.join(archive_root, "dists", "breezy", "main")
+        os.makedirs(os.path.join(component_dir, "source"))
+        shutil.copy(
+            os.path.join(sampledata_component_dir, "source", "Sources"),
+            os.path.join(component_dir, "source", "Sources"))
+        subprocess.check_call(
+            ["xz", os.path.join(component_dir, "source", "Sources")])
+        os.makedirs(os.path.join(component_dir, "binary-i386"))
+        shutil.copy(
+            os.path.join(sampledata_component_dir, "binary-i386", "Packages"),
+            os.path.join(component_dir, "binary-i386", "Packages"))
+        subprocess.check_call(
+            ["xz", os.path.join(component_dir, "binary-i386", "Packages")])
+
+        archive_info = ArchiveFilesystemInfo(
+            archive_root, "breezy", "main", "i386")
+        sources = apt_pkg.TagFile(archive_info.srcfile)
+        self.assertEqual("archive-copier", next(sources)["Package"])
+        binaries = apt_pkg.TagFile(archive_info.binfile)
+        self.assertEqual("python-pam", next(binaries)["Package"])
+
+
 class TestSourcePackageData(TestCaseWithFactory):
 
     layer = ZopelessDatabaseLayer
@@ -287,6 +349,28 @@
                 tempfile.tempdir = None
         self.assertEqual([], os.listdir(unpack_tmpdir))
 
+    def test_checksum_fields(self):
+        # We only need one of Files or Checksums-*.
+        base_dsc_contents = {
+            "Package": "foo",
+            "Binary": "foo",
+            "Version": "1.0-1",
+            "Maintainer": "Foo Bar <foo@xxxxxxxxxxxxx>",
+            "Section": "misc",
+            "Architecture": "all",
+            "Directory": "pool/main/f/foo",
+            "Component": "main",
+            }
+        for field in (
+                "Files", "Checksums-Sha1", "Checksums-Sha256",
+                "Checksums-Sha512"):
+            dsc_contents = dict(base_dsc_contents)
+            dsc_contents[field] = "xxx 000 foo_1.0-1.dsc"
+            sp_data = SourcePackageData(**dsc_contents)
+            self.assertEqual(["foo_1.0-1.dsc"], sp_data.files)
+        self.assertRaises(
+            MissingRequiredArguments, SourcePackageData, **base_dsc_contents)
+
 
 class TestSourcePackageHandler(TestCaseWithFactory):
 
@@ -352,6 +436,33 @@
         self.assertEqual(PackagePublishingStatus.PUBLISHED, spph.status)
 
 
+class TestBinaryPackageData(TestCaseWithFactory):
+
+    layer = ZopelessDatabaseLayer
+
+    def test_checksum_fields(self):
+        # We only need one of MD5sum or SHA*.
+        base_deb_contents = {
+            "Package": "foo",
+            "Installed-Size": "0",
+            "Maintainer": "Foo Bar <foo@xxxxxxxxxxxxx>",
+            "Section": "misc",
+            "Architecture": "all",
+            "Version": "1.0-1",
+            "Filename": "pool/main/f/foo/foo_1.0-1_all.deb",
+            "Component": "main",
+            "Size": "0",
+            "Description": "",
+            "Priority": "extra",
+            }
+        for field in ("MD5sum", "SHA1", "SHA256", "SHA512"):
+            deb_contents = dict(base_deb_contents)
+            deb_contents[field] = "0"
+            BinaryPackageData(**deb_contents)
+        self.assertRaises(
+            MissingRequiredArguments, BinaryPackageData, **base_deb_contents)
+
+
 class TestBinaryPackageHandler(TestCaseWithFactory):
 
     layer = LaunchpadZopelessLayer


Follow ups