← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wgrant/launchpad/sha256-archiveuploader-pre into lp:launchpad

 

William Grant has proposed merging lp:~wgrant/launchpad/sha256-archiveuploader-pre into lp:launchpad.

Commit message:
Start generalising archiveuploader to cope with more than just MD5 hashes.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1190885 in Launchpad itself: "nascentupload doesn't verify SHA-256 hashes"
  https://bugs.launchpad.net/launchpad/+bug/1190885

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/sha256-archiveuploader-pre/+merge/171464

Start generalising archiveuploader to cope with more than just MD5 hashes. NascentUploadFile knows how to verify them all, but its callsites don't yet give it anything more than MD5.
-- 
https://code.launchpad.net/~wgrant/launchpad/sha256-archiveuploader-pre/+merge/171464
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/sha256-archiveuploader-pre into lp:launchpad.
=== modified file 'lib/lp/archiveuploader/changesfile.py'
--- lib/lp/archiveuploader/changesfile.py	2012-09-24 01:45:49 +0000
+++ lib/lp/archiveuploader/changesfile.py	2013-06-26 06:19:30 +0000
@@ -178,9 +178,9 @@
             # files lines from a changes file are always of the form:
             # CHECKSUM SIZE [COMPONENT/]SECTION PRIORITY FILENAME
             try:
-                digest, size, component_and_section, priority_name, filename = (
+                md5, size, component_and_section, priority_name, filename = (
                     fileline.strip().split())
-            except ValueError as e:
+            except ValueError:
                 yield UploadError(
                     "Wrong number of fields in Files line in .changes.")
                 continue
@@ -191,7 +191,7 @@
                     # otherwise the tarballs in custom uploads match
                     # with source_match.
                     file_instance = CustomUploadFile(
-                        filepath, digest, size, component_and_section,
+                        filepath, dict(MD5=md5), size, component_and_section,
                         priority_name, self.policy, self.logger)
                 else:
                     try:
@@ -203,7 +203,7 @@
                         continue
 
                     file_instance = cls(
-                        filepath, digest, size, component_and_section,
+                        filepath, dict(MD5=md5), size, component_and_section,
                         priority_name, package, self.version, self,
                         self.policy, self.logger)
 

=== modified file 'lib/lp/archiveuploader/dscfile.py'
--- lib/lp/archiveuploader/dscfile.py	2013-06-18 05:12:56 +0000
+++ lib/lp/archiveuploader/dscfile.py	2013-06-26 06:19:30 +0000
@@ -263,7 +263,7 @@
     copyright = None
     changelog = None
 
-    def __init__(self, filepath, digest, size, component_and_section,
+    def __init__(self, filepath, checksums, size, component_and_section,
                  priority, package, version, changes, policy, logger):
         """Construct a DSCFile instance.
 
@@ -276,7 +276,7 @@
         from lp.archiveuploader.nascentupload import EarlyReturnUploadError
 
         SourceUploadFile.__init__(
-            self, filepath, digest, size, component_and_section, priority,
+            self, filepath, checksums, size, component_and_section, priority,
             package, version, changes, policy, logger)
         self.parse(verify_signature=not policy.unsigned_dsc_ok)
 
@@ -353,7 +353,7 @@
         files = []
         for fileline in self._dict['Files'].strip().split("\n"):
             # DSC lines are always of the form: CHECKSUM SIZE FILENAME
-            digest, size, filename = fileline.strip().split()
+            md5, size, filename = fileline.strip().split()
             if not re_issource.match(filename):
                 # DSC files only really hold on references to source
                 # files; they are essentially a description of a source
@@ -364,7 +364,7 @@
             filepath = os.path.join(self.dirname, filename)
             try:
                 file_instance = DSCUploadedFile(
-                    filepath, digest, size, self.policy, self.logger)
+                    filepath, dict(MD5=md5), size, self.policy, self.logger)
             except UploadError as error:
                 yield error
             else:
@@ -521,7 +521,7 @@
                 # dismiss. It prevents us from having scary duplicated
                 # filenames in Librarian and misapplied files in archive,
                 # fixes bug # 38636 and friends.
-                if sub_dsc_file.digest != library_file.content.md5:
+                if sub_dsc_file.checksums['MD5'] != library_file.content.md5:
                     yield UploadError(
                         "File %s already exists in %s, but uploaded version "
                         "has different contents. See more information about "
@@ -721,10 +721,10 @@
           store_in_database() method.
     """
 
-    def __init__(self, filepath, digest, size, policy, logger):
+    def __init__(self, filepath, checksums, size, policy, logger):
         component_and_section = priority = "--no-value--"
         NascentUploadFile.__init__(
-            self, filepath, digest, size, component_and_section,
+            self, filepath, checksums, size, component_and_section,
             priority, policy, logger)
 
     def verify(self):

=== modified file 'lib/lp/archiveuploader/nascentuploadfile.py'
--- lib/lp/archiveuploader/nascentuploadfile.py	2013-01-22 01:47:26 +0000
+++ lib/lp/archiveuploader/nascentuploadfile.py	2013-06-26 06:19:30 +0000
@@ -122,7 +122,6 @@
     The filename, along with information about it, is kept here.
     """
     new = False
-    sha_digest = None
 
     # Files need their content type for creating in the librarian.
     # This maps endings of filenames onto content types we may encounter
@@ -135,10 +134,10 @@
         ".tar.gz": "application/gzipped-tar",
         }
 
-    def __init__(self, filepath, digest, size, component_and_section,
+    def __init__(self, filepath, checksums, size, component_and_section,
                  priority_name, policy, logger):
         self.filepath = filepath
-        self.digest = digest
+        self.checksums = checksums
         self.priority_name = priority_name
         self.policy = policy
         self.logger = logger
@@ -207,13 +206,10 @@
                 "Invalid character(s) in filename: '%s'." % self.filename)
 
     def checkSizeAndCheckSum(self):
-        """Check the md5sum and size of the nascent file.
+        """Check the size and checksums of the nascent file.
 
-        Raise UploadError if the digest or size does not match or if the
+        Raise UploadError if the size or checksums do not match or if the
         file is not found on the disk.
-
-        Populate self.sha_digest with the calculated sha1 digest of the
-        file on disk.
         """
         if not self.exists_on_disk:
             raise UploadError(
@@ -222,31 +218,28 @@
 
         # Read in the file and compute its md5 and sha1 checksums and remember
         # the size of the file as read-in.
-        digest = hashlib.md5()
-        sha_cksum = hashlib.sha1()
+        digesters = dict((n, hashlib.new(n)) for n in self.checksums.keys())
         ckfile = open(self.filepath, "r")
         size = 0
         for chunk in filechunks(ckfile):
-            digest.update(chunk)
-            sha_cksum.update(chunk)
+            for digester in digesters.itervalues():
+                digester.update(chunk)
             size += len(chunk)
         ckfile.close()
 
         # Check the size and checksum match what we were told in __init__
-        if digest.hexdigest() != self.digest:
-            raise UploadError(
-                "File %s mentioned in the changes has a checksum mismatch. "
-                "%s != %s" % (self.filename, digest.hexdigest(), self.digest))
+        for n in sorted(self.checksums.keys()):
+            if digesters[n].hexdigest() != self.checksums[n]:
+                raise UploadError(
+                    "File %s mentioned in the changes has a %s mismatch. "
+                    "%s != %s" % (
+                        self.filename, n, digesters[n].hexdigest(),
+                        self.checksums[n]))
         if size != self.size:
             raise UploadError(
                 "File %s mentioned in the changes has a size mismatch. "
                 "%s != %s" % (self.filename, size, self.size))
 
-        # The sha_digest is used later when verifying packages mentioned
-        # in the DSC file; it's used to compare versus files in the
-        # Librarian.
-        self.sha_digest = sha_cksum.hexdigest()
-
 
 class CustomUploadFile(NascentUploadFile):
     """NascentUpload file for Custom uploads.
@@ -327,7 +320,7 @@
 class PackageUploadFile(NascentUploadFile):
     """Base class to model sources and binary files contained in a upload. """
 
-    def __init__(self, filepath, digest, size, component_and_section,
+    def __init__(self, filepath, md5, size, component_and_section,
                  priority_name, package, version, changes, policy, logger):
         """Check presence of the component and section from an uploaded_file.
 
@@ -336,7 +329,7 @@
         Even if they might be overridden in the future.
         """
         super(PackageUploadFile, self).__init__(
-            filepath, digest, size, component_and_section, priority_name,
+            filepath, md5, size, component_and_section, priority_name,
             policy, logger)
         self.package = package
         self.version = version
@@ -480,11 +473,11 @@
     source_name = None
     source_version = None
 
-    def __init__(self, filepath, digest, size, component_and_section,
+    def __init__(self, filepath, md5, size, component_and_section,
                  priority_name, package, version, changes, policy, logger):
 
         PackageUploadFile.__init__(
-            self, filepath, digest, size, component_and_section,
+            self, filepath, md5, size, component_and_section,
             priority_name, package, version, changes, policy, logger)
 
         if self.priority_name not in self.priority_map:

=== modified file 'lib/lp/archiveuploader/tests/meta-data-custom-files.txt'
--- lib/lp/archiveuploader/tests/meta-data-custom-files.txt	2010-06-25 16:26:20 +0000
+++ lib/lp/archiveuploader/tests/meta-data-custom-files.txt	2013-06-26 06:19:30 +0000
@@ -13,7 +13,7 @@
 
     >>> from lp.archiveuploader.nascentuploadfile import CustomUploadFile
     >>> custom_upload_file = CustomUploadFile(
-    ...     filepath="", digest="", size=1, priority_name="", policy=None,
+    ...     filepath="", checksums={}, size=1, priority_name="", policy=None,
     ...     component_and_section="main/raw-meta-data", logger=None)
 
     >>> print custom_upload_file.custom_type.name

=== modified file 'lib/lp/archiveuploader/tests/nascentuploadfile.txt'
--- lib/lp/archiveuploader/tests/nascentuploadfile.txt	2013-01-02 04:03:05 +0000
+++ lib/lp/archiveuploader/tests/nascentuploadfile.txt	2013-06-26 06:19:30 +0000
@@ -343,7 +343,7 @@
 
     >>> ed_source_dsc = DSCFile(
     ...     datadir('ed_0.2-20.dsc'),
-    ...     'de8b206f8fc57bd931f6226feac6644a', 578, 'editors',
+    ...     dict(MD5='de8b206f8fc57bd931f6226feac6644a'), 578, 'editors',
     ...     'important', 'ed', '0.2-20', ed_source_changes,
     ...     modified_insecure_policy, DevNullLogger())
 
@@ -401,20 +401,20 @@
 
     >>> ed_broken_dsc = DSCFile(
     ...     datadir('ed_0.2-20.dsc'),
-    ...     'e31eeb0b6b3b87e1ea79378df864ffff', 500, 'editors',
+    ...     dict(MD5='e31eeb0b6b3b87e1ea79378df864ffff'), 500, 'editors',
     ...     'important', 'ed', '0.2-20', ed_source_changes,
     ...     modified_insecure_policy, DevNullLogger())
 
     >>> errors = ed_broken_dsc.verify()
     >>> [str(err) for err in errors]
-    ['File ed_0.2-20.dsc mentioned in the changes has a checksum mismatch.
+    ['File ed_0.2-20.dsc mentioned in the changes has a MD5 mismatch.
     de8b206f8fc57bd931f6226feac6644a != e31eeb0b6b3b87e1ea79378df864ffff']
 
 It also verifies the file size when the checksum matches.
 
     >>> ed_broken_dsc = DSCFile(
     ...     datadir('ed_0.2-20.dsc'),
-    ...     'de8b206f8fc57bd931f6226feac6644a', 500, 'editors',
+    ...     dict(MD5='de8b206f8fc57bd931f6226feac6644a'), 500, 'editors',
     ...     'important', 'ed', '0.2-20', ed_source_changes,
     ...     modified_insecure_policy, DevNullLogger())
 
@@ -443,11 +443,11 @@
 
     >>> ed_broken_dsc_file = DSCUploadedFile(
     ...     datadir('ed_0.2-20.diff.gz'),
-    ...     'f9e1e5f13725f581919e9bfd6227ffff', 500,
+    ...     dict(MD5='f9e1e5f13725f581919e9bfd6227ffff'), 500,
     ...     modified_insecure_policy, DevNullLogger())
     >>> errors = ed_broken_dsc_file.verify()
     >>> [str(err) for err in errors]
-    ['File ed_0.2-20.diff.gz mentioned in the changes has a checksum mismatch.
+    ['File ed_0.2-20.diff.gz mentioned in the changes has a MD5 mismatch.
     8343836094fb01ee9b9a1067b23365f1 != f9e1e5f13725f581919e9bfd6227ffff']
 
 
@@ -460,7 +460,7 @@
     ...    DebBinaryUploadFile)
     >>> ed_deb_path = datadir('ed_0.2-20_i386.deb')
     >>> ed_binary_deb = DebBinaryUploadFile(
-    ...     ed_deb_path, 'e31eeb0b6b3b87e1ea79378df864ffff', 15,
+    ...     ed_deb_path, dict(MD5='e31eeb0b6b3b87e1ea79378df864ffff'), 15,
     ...     'main/editors', 'important', 'foo', '1.2', ed_binary_changes,
     ...     modified_insecure_policy, DevNullLogger())
 
@@ -473,8 +473,8 @@
 changes file:
 
     >>> ed_binary_deb = DebBinaryUploadFile(
-    ...     ed_deb_path, 'e31eeb0b6b3b87e1ea79378df864ffff', 15, 'main/net',
-    ...     'important', 'foo', '1.2', ed_binary_changes,
+    ...     ed_deb_path, dict(MD5='e31eeb0b6b3b87e1ea79378df864ffff'), 15,
+    ...     'main/net', 'important', 'foo', '1.2', ed_binary_changes,
     ...     modified_insecure_policy, DevNullLogger())
     >>> list(ed_binary_deb.verify())
     [UploadError('ed_0.2-20_i386.deb
@@ -484,7 +484,7 @@
 It also checks the priority against the changes file:
 
     >>> ed_binary_deb = DebBinaryUploadFile(
-    ...     ed_deb_path, 'e31eeb0b6b3b87e1ea79378df864ffff', 15,
+    ...     ed_deb_path, dict(MD5='e31eeb0b6b3b87e1ea79378df864ffff'), 15,
     ...     'main/editors', 'extra', 'foo', '1.2', ed_binary_changes,
     ...     modified_insecure_policy, DevNullLogger())
     >>> list(ed_binary_deb.verify())
@@ -501,7 +501,7 @@
     >>> old_only_policy.future_time_grace = -20 * 365 * 24 * 60 * 60
 
     >>> ed_binary_deb = DebBinaryUploadFile(
-    ...     ed_deb_path, 'e31eeb0b6b3b87e1ea79378df864ffff', 15,
+    ...     ed_deb_path, dict(MD5='e31eeb0b6b3b87e1ea79378df864ffff'), 15,
     ...     'main/editors', 'important', 'foo', '1.2', ed_binary_changes,
     ...     old_only_policy, DevNullLogger())
     >>> list(ed_binary_deb.verifyDebTimestamp())
@@ -516,7 +516,7 @@
     >>> new_only_policy.accepted_type = ArchiveUploadType.BINARY_ONLY
     >>> new_only_policy.earliest_year = 2010
     >>> ed_binary_deb = DebBinaryUploadFile(
-    ...     ed_deb_path, 'e31eeb0b6b3b87e1ea79378df864ffff', 15,
+    ...     ed_deb_path, dict(MD5='e31eeb0b6b3b87e1ea79378df864ffff'), 15,
     ...     'main/editors', 'important', 'foo', '1.2', ed_binary_changes,
     ...     new_only_policy, DevNullLogger())
     >>> list(ed_binary_deb.verify())

=== modified file 'lib/lp/archiveuploader/tests/static-translations.txt'
--- lib/lp/archiveuploader/tests/static-translations.txt	2010-12-22 20:46:21 +0000
+++ lib/lp/archiveuploader/tests/static-translations.txt	2013-06-26 06:19:30 +0000
@@ -11,7 +11,7 @@
 
     >>> from lp.archiveuploader.nascentuploadfile import CustomUploadFile
     >>> custom_upload_file = CustomUploadFile(
-    ...     filepath="", digest="", size=1, priority_name="", policy=None,
+    ...     filepath="", checksums={}, size=1, priority_name="", policy=None,
     ...     component_and_section="main/raw-translations-static", logger=None)
 
     >>> print custom_upload_file.custom_type.name

=== modified file 'lib/lp/archiveuploader/tests/test_changesfile.py'
--- lib/lp/archiveuploader/tests/test_changesfile.py	2012-01-01 02:58:52 +0000
+++ lib/lp/archiveuploader/tests/test_changesfile.py	2013-06-26 06:19:30 +0000
@@ -8,6 +8,7 @@
 import os
 
 from debian.deb822 import Changes
+from testtools.matchers import MatchesStructure
 from zope.component import getUtility
 
 from lp.archiveuploader.changesfile import (
@@ -115,7 +116,7 @@
             "size": "1791",
             "section": "python",
             "priority": "optional",
-            "name": "dulwich_0.4.1-1.dsc"}]
+            "name": "dulwich_0.4.1-1_i386.deb"}]
         return contents
 
     def test_newline_in_Binary_field(self):
@@ -201,6 +202,21 @@
             UploadError,
             self.createChangesFile, "mypkg_0.1_i386.changes", contents)
 
+    def test_processFiles(self):
+        # processFiles sets self.files to a list of NascentUploadFiles.
+        contents = self.getBaseChanges()
+        changes = self.createChangesFile("mypkg_0.1_i386.changes", contents)
+        self.assertEqual([], list(changes.processFiles()))
+        [file] = changes.files
+        self.assertEqual(DebBinaryUploadFile, type(file))
+        self.assertThat(
+            file,
+            MatchesStructure.byEquality(
+                filepath=changes.dirname + "/dulwich_0.4.1-1_i386.deb",
+                checksums=dict(MD5="d2bd347b3fed184fe28e112695be491c"),
+                size=1791, priority_name="optional",
+                component_name="main", section_name="python"))
+
 
 class TestSignatureVerification(TestCase):
 

=== modified file 'lib/lp/archiveuploader/tests/test_nascentuploadfile.py'
--- lib/lp/archiveuploader/tests/test_nascentuploadfile.py	2012-06-27 14:07:43 +0000
+++ lib/lp/archiveuploader/tests/test_nascentuploadfile.py	2013-06-26 06:19:30 +0000
@@ -19,6 +19,7 @@
 from lp.archiveuploader.nascentuploadfile import (
     CustomUploadFile,
     DebBinaryUploadFile,
+    NascentUploadFile,
     UploadError,
     )
 from lp.archiveuploader.tests import AbsolutelyAnythingGoesUploadPolicy
@@ -31,7 +32,10 @@
     PackageUploadCustomFormat,
     )
 from lp.testing import TestCaseWithFactory
-from lp.testing.layers import LaunchpadZopelessLayer
+from lp.testing.layers import (
+    LaunchpadZopelessLayer,
+    ZopelessDatabaseLayer,
+    )
 
 
 class NascentUploadFileTestCase(TestCaseWithFactory):
@@ -51,15 +55,49 @@
 
         :param filename: Filename to use
         :param contents: Contents of the file
-        :return: Tuple with path, digest and size
+        :return: Tuple with path, md5 and size
         """
         path = os.path.join(self.makeTemporaryDirectory(), filename)
-        f = open(path, 'w')
-        try:
+        with open(path, 'w') as f:
             f.write(contents)
-        finally:
-            f.close()
-        return (path, hashlib.sha1(contents), len(contents))
+        return (
+            path, hashlib.md5(contents).hexdigest(),
+            hashlib.sha1(contents).hexdigest(), len(contents))
+
+
+class TestNascentUploadFile(NascentUploadFileTestCase):
+
+    layer = ZopelessDatabaseLayer
+
+    def test_checkSizeAndCheckSum_validates_size(self):
+        (path, md5, sha1, size) = self.writeUploadFile('foo', 'bar')
+        nuf = NascentUploadFile(
+            path, dict(MD5=md5), size - 1, 'main/devel', None, None, None)
+        self.assertRaisesWithContent(
+            UploadError,
+            'File foo mentioned in the changes has a size mismatch. 3 != 2',
+            nuf.checkSizeAndCheckSum)
+
+    def test_checkSizeAndCheckSum_validates_md5(self):
+        (path, md5, sha1, size) = self.writeUploadFile('foo', 'bar')
+        nuf = NascentUploadFile(
+            path, dict(MD5='deadbeef'), size, 'main/devel', None, None, None)
+        self.assertRaisesWithContent(
+            UploadError,
+            'File foo mentioned in the changes has a MD5 mismatch. '
+            '37b51d194a7513e45b56f6524f2d51f2 != deadbeef',
+            nuf.checkSizeAndCheckSum)
+
+    def test_checkSizeAndCheckSum_validates_sha1(self):
+        (path, md5, sha1, size) = self.writeUploadFile('foo', 'bar')
+        nuf = NascentUploadFile(
+            path, dict(MD5=md5, SHA1='foobar'), size, 'main/devel', None,
+            None, None)
+        self.assertRaisesWithContent(
+            UploadError,
+            'File foo mentioned in the changes has a SHA1 mismatch. '
+            '62cdb7020ff920e5aa642c3d4066950dd1f01f4d != foobar',
+            nuf.checkSizeAndCheckSum)
 
 
 class CustomUploadFileTests(NascentUploadFileTestCase):
@@ -70,9 +108,9 @@
     def createCustomUploadFile(self, filename, contents,
                                component_and_section, priority_name):
         """Simple wrapper to create a CustomUploadFile."""
-        (path, digest, size) = self.writeUploadFile(filename, contents)
+        (path, md5, sha1, size) = self.writeUploadFile(filename, contents)
         uploadfile = CustomUploadFile(
-            path, digest, size, component_and_section, priority_name,
+            path, dict(MD5=md5), size, component_and_section, priority_name,
             self.policy, self.logger)
         return uploadfile
 
@@ -156,11 +194,8 @@
     def createChangesFile(self, filename, changes):
         tempdir = self.makeTemporaryDirectory()
         path = os.path.join(tempdir, filename)
-        changes_fd = open(path, "w")
-        try:
+        with open(path, "w") as changes_fd:
             changes.dump(changes_fd)
-        finally:
-            changes_fd.close()
         return ChangesFile(path, self.policy, self.logger)
 
 
@@ -185,12 +220,12 @@
 
     def createDSCFile(self, filename, dsc, component_and_section,
                       priority_name, package, version, changes):
-        (path, digest, size) = self.writeUploadFile(filename, dsc.dump())
+        (path, md5, sha1, size) = self.writeUploadFile(filename, dsc.dump())
         if changes:
             self.assertEquals([], list(changes.processAddresses()))
         return DSCFile(
-            path, digest, size, component_and_section, priority_name, package,
-            version, changes, self.policy, self.logger)
+            path, dict(MD5=md5), size, component_and_section, priority_name,
+            package, version, changes, self.policy, self.logger)
 
     def test_filetype(self):
         # The filetype attribute is set based on the file extension.
@@ -332,10 +367,10 @@
             data = self.createDeb(filename, data_format)
         else:
             data = "DUMMY DATA"
-        (path, digest, size) = self.writeUploadFile(filename, data)
+        (path, md5, sha1, size) = self.writeUploadFile(filename, data)
         return DebBinaryUploadFile(
-            path, digest, size, component_and_section, priority_name, package,
-            version, changes, self.policy, self.logger)
+            path, dict(MD5=md5), size, component_and_section, priority_name,
+            package, version, changes, self.policy, self.logger)
 
     def test_unknown_priority(self):
         # Unknown priorities automatically get changed to 'extra'.