← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wgrant/launchpad/sha256-archiveuploader into lp:launchpad

 

William Grant has proposed merging lp:~wgrant/launchpad/sha256-archiveuploader into lp:launchpad with lp:~wgrant/launchpad/sha256-archiveuploader-pre as a prerequisite.

Commit message:
Verify SHA-1 and SHA-256 checksums in uploaded .changes and .dsc when present.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1190885 in Launchpad itself: "nascentupload doesn't verify SHA-256 hashes"
  https://bugs.launchpad.net/launchpad/+bug/1190885

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/sha256-archiveuploader/+merge/171465

Verify SHA-1 and SHA-256 checksums in uploaded .changes and .dsc when present. DSCFile and ChangesFile Files parsing is unified, and there are improved internal consistency checks (no duplicate files, sets of files have to match between Files, Checksums-Sha1 and Checksums-Sha256, and file sizes have to match).
-- 
https://code.launchpad.net/~wgrant/launchpad/sha256-archiveuploader/+merge/171465
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/sha256-archiveuploader into lp:launchpad.
=== modified file 'lib/lp/archiveuploader/changesfile.py'
--- lib/lp/archiveuploader/changesfile.py	2013-06-26 06:24:32 +0000
+++ lib/lp/archiveuploader/changesfile.py	2013-06-26 06:24:33 +0000
@@ -29,15 +29,17 @@
     SourceUploadFile,
     splitComponentAndSection,
     UdebBinaryUploadFile,
-    UploadError,
-    UploadWarning,
     )
 from lp.archiveuploader.utils import (
     determine_binary_file_type,
     determine_source_file_type,
+    merge_file_lists,
+    parse_file_list,
     re_changes_file_name,
     re_isadeb,
     re_issource,
+    UploadError,
+    UploadWarning,
     )
 from lp.registry.interfaces.sourcepackage import (
     SourcePackageFileType,
@@ -173,17 +175,22 @@
         all exceptions that are generated while processing all mentioned
         files.
         """
+        sha1_lines = None
+        sha256_lines = None
+        try:
+            files_lines = parse_file_list(self._dict['Files'], 'Files', 5)
+            sha1_lines = parse_file_list(
+                self._dict.get('Checksums-Sha1'), 'Checksums-Sha1', 3)
+            sha256_lines = parse_file_list(
+                self._dict.get('Checksums-Sha256'), 'Checksums-Sha256', 3)
+            raw_files = merge_file_lists(files_lines, sha1_lines, sha256_lines)
+        except UploadError as e:
+            yield e
+            return
+
         files = []
-        for fileline in self._dict['Files'].strip().split("\n"):
-            # files lines from a changes file are always of the form:
-            # CHECKSUM SIZE [COMPONENT/]SECTION PRIORITY FILENAME
-            try:
-                md5, size, component_and_section, priority_name, filename = (
-                    fileline.strip().split())
-            except ValueError:
-                yield UploadError(
-                    "Wrong number of fields in Files line in .changes.")
-                continue
+        for attr in raw_files:
+            filename, hashes, size, component_and_section, priority_name = attr
             filepath = os.path.join(self.dirname, filename)
             try:
                 if self.isCustom(component_and_section):
@@ -191,7 +198,7 @@
                     # otherwise the tarballs in custom uploads match
                     # with source_match.
                     file_instance = CustomUploadFile(
-                        filepath, dict(MD5=md5), size, component_and_section,
+                        filepath, hashes, size, component_and_section,
                         priority_name, self.policy, self.logger)
                 else:
                     try:
@@ -203,7 +210,7 @@
                         continue
 
                     file_instance = cls(
-                        filepath, dict(MD5=md5), size, component_and_section,
+                        filepath, hashes, size, component_and_section,
                         priority_name, package, self.version, self,
                         self.policy, self.logger)
 

=== modified file 'lib/lp/archiveuploader/dscfile.py'
--- lib/lp/archiveuploader/dscfile.py	2013-06-26 06:24:32 +0000
+++ lib/lp/archiveuploader/dscfile.py	2013-06-26 06:24:33 +0000
@@ -1,5 +1,6 @@
-# Copyright 2009-2012 Canonical Ltd.  This software is licensed under the
-# GNU Affero General Public License version 3 (see the file LICENSE).
+# Copyright 2009-2012 Canonical Ltd.  This software is licensed under
+# the GNU Affero General Public License version 3 (see the file
+# LICENSE).
 
 """ DSCFile and related.
 
@@ -33,8 +34,6 @@
 from lp.archiveuploader.nascentuploadfile import (
     NascentUploadFile,
     SourceUploadFile,
-    UploadError,
-    UploadWarning,
     )
 from lp.archiveuploader.tagfiles import (
     parse_tagfile_content,
@@ -45,12 +44,16 @@
     DpkgSourceError,
     extract_dpkg_source,
     get_source_file_extension,
+    merge_file_lists,
+    parse_file_list,
     ParseMaintError,
     re_is_component_orig_tar_ext,
     re_issource,
     re_valid_pkg_name,
     re_valid_version,
     safe_fix_maintainer,
+    UploadError,
+    UploadWarning,
     )
 from lp.registry.interfaces.gpg import IGPGKeySet
 from lp.registry.interfaces.person import (
@@ -350,10 +353,23 @@
         except UploadError as error:
             yield error
 
+        sha1_lines = None
+        sha256_lines = None
+        try:
+            files_lines = parse_file_list(self._dict['Files'], 'Files', 3)
+            sha1_lines = parse_file_list(
+                self._dict.get('Checksums-Sha1'), 'Checksums-Sha1', 3)
+            sha256_lines = parse_file_list(
+                self._dict.get('Checksums-Sha256'), 'Checksums-Sha256', 3)
+            raw_files = merge_file_lists(
+                files_lines, sha1_lines, sha256_lines, changes=False)
+        except UploadError as e:
+            yield e
+            return
+
         files = []
-        for fileline in self._dict['Files'].strip().split("\n"):
-            # DSC lines are always of the form: CHECKSUM SIZE FILENAME
-            md5, size, filename = fileline.strip().split()
+        for attr in raw_files:
+            filename, hashes, size = attr
             if not re_issource.match(filename):
                 # DSC files only really hold on references to source
                 # files; they are essentially a description of a source
@@ -364,7 +380,7 @@
             filepath = os.path.join(self.dirname, filename)
             try:
                 file_instance = DSCUploadedFile(
-                    filepath, dict(MD5=md5), size, self.policy, self.logger)
+                    filepath, hashes, size, self.policy, self.logger)
             except UploadError as error:
                 yield error
             else:

=== modified file 'lib/lp/archiveuploader/nascentupload.py'
--- lib/lp/archiveuploader/nascentupload.py	2012-10-31 23:44:22 +0000
+++ lib/lp/archiveuploader/nascentupload.py	2013-06-26 06:24:33 +0000
@@ -33,10 +33,12 @@
     DebBinaryUploadFile,
     SourceUploadFile,
     UdebBinaryUploadFile,
+    )
+from lp.archiveuploader.utils import (
+    determine_source_file_type,
     UploadError,
     UploadWarning,
     )
-from lp.archiveuploader.utils import determine_source_file_type
 from lp.registry.interfaces.distribution import IDistributionSet
 from lp.registry.interfaces.pocket import PackagePublishingPocket
 from lp.registry.interfaces.sourcepackage import SourcePackageFileType

=== modified file 'lib/lp/archiveuploader/nascentuploadfile.py'
--- lib/lp/archiveuploader/nascentuploadfile.py	2013-06-26 06:24:32 +0000
+++ lib/lp/archiveuploader/nascentuploadfile.py	2013-06-26 06:24:33 +0000
@@ -14,8 +14,6 @@
     'PackageUploadFile',
     'SourceUploadFile',
     'UdebBinaryUploadFile',
-    'UploadError',
-    'UploadWarning',
     'splitComponentAndSection',
     ]
 
@@ -46,6 +44,7 @@
     re_taint_free,
     re_valid_pkg_name,
     re_valid_version,
+    UploadError,
     )
 from lp.buildmaster.enums import BuildStatus
 from lp.services.encoding import guess as guess_encoding
@@ -65,14 +64,6 @@
 apt_pkg.init_system()
 
 
-class UploadError(Exception):
-    """All upload errors are returned in this form."""
-
-
-class UploadWarning(Warning):
-    """All upload warnings are returned in this form."""
-
-
 class TarFileDateChecker:
     """Verify all files in a tar in a deb are within a given date range.
 

=== added directory 'lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1'
=== added file 'lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1/badhash_1.0-1_i386.changes'
--- lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1/badhash_1.0-1_i386.changes	1970-01-01 00:00:00 +0000
+++ lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1/badhash_1.0-1_i386.changes	2013-06-26 06:24:33 +0000
@@ -0,0 +1,22 @@
+Format: 1.8
+Date: Thu, 15 Apr 2009 15:14:09 +0100
+Source: badhash
+Binary: badhash
+Architecture: i386
+Version: 1.0-1
+Distribution: hoary
+Urgency: low
+Maintainer: Launchpad team <launchpad@xxxxxxxxxxxxxxxxxxx>
+Changed-By: Celso Providelo <cprov@xxxxxxxxxxxxx>
+Description: 
+ badhash  - Stuff for testing
+Changes: 
+ badhash (1.0-1) hoary; urgency=low
+ .
+   * Testing hash mismatches
+Checksums-Sha1: 
+ 91556113ad38eb35d2fe03d27ae646e0ed487a3d 668 badhash_1.0-1_i386.deb
+Checksums-Sha256: 
+ 5ca0ce3d3bfde3cc698d9ad4b027663abecb958bea641d29ac3bf8333fd3ebad 668 badhash_1.0-1_i386.deb
+Files: 
+ 16d23e5c69723e511aabc64a3ac89afa 668 devel optional badhash_1.0-1_i386.deb

=== added file 'lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1/badhash_1.0-1_i386.deb'
Binary files lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1/badhash_1.0-1_i386.deb	1970-01-01 00:00:00 +0000 and lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1/badhash_1.0-1_i386.deb	2013-06-26 06:24:33 +0000 differ
=== added directory 'lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1_broken_dsc'
=== added file 'lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1_broken_dsc/badhash_1.0-1.dsc'
--- lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1_broken_dsc/badhash_1.0-1.dsc	1970-01-01 00:00:00 +0000
+++ lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1_broken_dsc/badhash_1.0-1.dsc	2013-06-26 06:24:33 +0000
@@ -0,0 +1,13 @@
+Format: 1.0
+Source: badhash
+Binary: badhash-bin
+Architecture: any
+Version: 1.0-1
+Maintainer: Launchpad team <launchpad@xxxxxxxxxxxxxxxxxxx>
+Standards-Version: 3.6.2
+Checksums-Sha1: 
+ e7ba6a0bf1f18fd4e9c5bc98d68e4b15278545a0 743 badhash_1.0-1.tar.gz
+Checksums-Sha256: 
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 743 badhash_1.0-1.tar.gz
+Files: 
+ 91cf79aaaf959dbc9d77588e98509784 743 badhash_1.0-1.tar.gz

=== added file 'lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1_broken_dsc/badhash_1.0-1.tar.gz'
Binary files lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1_broken_dsc/badhash_1.0-1.tar.gz	1970-01-01 00:00:00 +0000 and lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1_broken_dsc/badhash_1.0-1.tar.gz	2013-06-26 06:24:33 +0000 differ
=== added file 'lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1_broken_dsc/badhash_1.0-1_source.changes'
--- lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1_broken_dsc/badhash_1.0-1_source.changes	1970-01-01 00:00:00 +0000
+++ lib/lp/archiveuploader/tests/data/suite/badhash_1.0-1_broken_dsc/badhash_1.0-1_source.changes	2013-06-26 06:24:33 +0000
@@ -0,0 +1,25 @@
+Format: 1.8
+Date: Thu, 15 Apr 2009 15:14:09 +0100
+Source: badhash
+Binary: badhash-bin
+Architecture: source
+Version: 1.0-1
+Distribution: hoary
+Urgency: low
+Maintainer: Launchpad team <launchpad@xxxxxxxxxxxxxxxxxxx>
+Changed-By: Celso Providelo <cprov@xxxxxxxxxxxxx>
+Description: 
+ badhash-bin - Stuff for testing
+Changes: 
+ badhash (1.0-1) hoary; urgency=low
+ .
+   * Testing bad hashes in the DSC.
+Checksums-Sha1: 
+ fcbb6549b6d85d3011706e2c6f8950914fc87867 426 badhash_1.0-1.dsc
+ e7ba6a0bf1f18fd4e9c5bc98d68e4b15278545a0 743 badhash_1.0-1.tar.gz
+Checksums-Sha256: 
+ 4fe14f7e87c85825151832a1efa9c5308ec4d1bbf47742083ef6c11f865b72c3 426 badhash_1.0-1.dsc
+ a29ec2370df83193c3fb2cc9e1287dbfe9feba04108ccfa490bbe20ea66f3d08 743 badhash_1.0-1.tar.gz
+Files: 
+ d336c49d4d16225b7108f7cf91d904b2 426 devel optional badhash_1.0-1.dsc
+ 91cf79aaaf959dbc9d77588e98509784 743 devel optional badhash_1.0-1.tar.gz

=== modified file 'lib/lp/archiveuploader/tests/test_changesfile.py'
--- lib/lp/archiveuploader/tests/test_changesfile.py	2013-06-26 06:24:32 +0000
+++ lib/lp/archiveuploader/tests/test_changesfile.py	2013-06-26 06:24:33 +0000
@@ -15,6 +15,7 @@
     CannotDetermineFileTypeError,
     ChangesFile,
     determine_file_class_and_name,
+    merge_file_lists,
     )
 from lp.archiveuploader.dscfile import DSCFile
 from lp.archiveuploader.nascentuploadfile import (
@@ -79,6 +80,79 @@
             'foo')
 
 
+class TestMergeFileLists(TestCase):
+
+    def test_all_hashes(self):
+        # merge_file_lists returns a list of
+        # (filename, {algo: hash}, size, component_and_section, priority).
+        files = [
+            ('a', '1', 'd', 'e', 'foo.deb'), ('b', '2', 's', 'o', 'bar.dsc')]
+        checksums_sha1 = [('aa', '1', 'foo.deb'), ('bb', '2', 'bar.dsc')]
+        checksums_sha256 = [('aaa', '1', 'foo.deb'), ('bbb', '2', 'bar.dsc')]
+        self.assertEqual(
+            [("foo.deb",
+              {'MD5': 'a', 'SHA1': 'aa', 'SHA256': 'aaa'}, '1', 'd', 'e'),
+             ("bar.dsc",
+              {'MD5': 'b', 'SHA1': 'bb', 'SHA256': 'bbb'}, '2', 's', 'o')],
+             merge_file_lists(files, checksums_sha1, checksums_sha256))
+
+    def test_all_hashes_for_dsc(self):
+        # merge_file_lists in DSC mode returns a list of
+        # (filename, {algo: hash}, size).
+        files = [
+            ('a', '1', 'foo.deb'), ('b', '2', 'bar.dsc')]
+        checksums_sha1 = [('aa', '1', 'foo.deb'), ('bb', '2', 'bar.dsc')]
+        checksums_sha256 = [('aaa', '1', 'foo.deb'), ('bbb', '2', 'bar.dsc')]
+        self.assertEqual(
+            [("foo.deb", {'MD5': 'a', 'SHA1': 'aa', 'SHA256': 'aaa'}, '1'),
+             ("bar.dsc", {'MD5': 'b', 'SHA1': 'bb', 'SHA256': 'bbb'}, '2')],
+             merge_file_lists(
+                 files, checksums_sha1, checksums_sha256, changes=False))
+
+    def test_just_md5(self):
+        # merge_file_lists copes with the omission of SHA1 or SHA256
+        # hashes.
+        files = [
+            ('a', '1', 'd', 'e', 'foo.deb'), ('b', '2', 's', 'o', 'bar.dsc')]
+        self.assertEqual(
+            [("foo.deb", {'MD5': 'a'}, '1', 'd', 'e'),
+             ("bar.dsc", {'MD5': 'b'}, '2', 's', 'o')],
+             merge_file_lists(files, None, None))
+
+    def test_duplicate_filename_is_rejected(self):
+        # merge_file_lists rejects fields with duplicated filenames.
+        files = [
+            ('a', '1', 'd', 'e', 'foo.deb'), ('b', '2', 's', 'o', 'foo.deb')]
+        self.assertRaisesWithContent(
+            UploadError, "Duplicate filenames in Files field.",
+            merge_file_lists, files, None, None)
+
+    def test_differing_file_lists_are_rejected(self):
+        # merge_file_lists rejects Checksums-* fields which are present
+        # but have a different set of filenames.
+        files = [
+            ('a', '1', 'd', 'e', 'foo.deb'), ('b', '2', 's', 'o', 'bar.dsc')]
+        sha1s = [('aa', '1', 'foo.deb')]
+        sha256s = [('aaa', '1', 'foo.deb')]
+        self.assertRaisesWithContent(
+            UploadError, "Mismatch between Checksums-Sha1 and Files fields.",
+            merge_file_lists, files, sha1s, None)
+        self.assertRaisesWithContent(
+            UploadError, "Mismatch between Checksums-Sha256 and Files fields.",
+            merge_file_lists, files, None, sha256s)
+
+    def test_differing_file_sizes_are_rejected(self):
+        # merge_file_lists rejects Checksums-* fields which are present
+        # but have a different set of filenames.
+        files = [('a', '1', 'd', 'e', 'foo.deb')]
+        sha1s = [('aa', '1', 'foo.deb')]
+        sha1s_bad_size = [('aa', '2', 'foo.deb')]
+        self.assertEqual(1, len(merge_file_lists(files, sha1s, None)))
+        self.assertRaisesWithContent(
+            UploadError, "Mismatch between Checksums-Sha1 and Files fields.",
+            merge_file_lists, files, sha1s_bad_size, None)
+
+
 class ChangesFileTests(TestCase):
     """Tests for ChangesFile."""
 
@@ -217,6 +291,52 @@
                 size=1791, priority_name="optional",
                 component_name="main", section_name="python"))
 
+    def test_processFiles_additional_checksums(self):
+        # processFiles parses the Checksums-Sha1 and Checksums-Sha256
+        # fields if present.
+        contents = self.getBaseChanges()
+        md5 = "d2bd347b3fed184fe28e112695be491c"
+        sha1 = "378b3498ead213d35a82033a6e9196014a5ef25c"
+        sha256 = (
+            "39bb3bad01bf931b34f3983536c0f331e4b4e3e38fb78abfc75e5b09"
+            "efd6507f")
+        contents["Checksums-Sha1"] = [{
+            "sha1": sha1, "size": "1791",
+            "name": "dulwich_0.4.1-1_i386.deb"}]
+        contents["Checksums-Sha256"] = [{
+            "sha256": sha256, "size": "1791",
+            "name": "dulwich_0.4.1-1_i386.deb"}]
+        changes = self.createChangesFile("mypkg_0.1_i386.changes", contents)
+        self.assertEqual([], list(changes.processFiles()))
+        [file] = changes.files
+        self.assertEqual(DebBinaryUploadFile, type(file))
+        self.assertThat(
+            file,
+            MatchesStructure.byEquality(
+                filepath=changes.dirname + "/dulwich_0.4.1-1_i386.deb",
+                checksums=dict(MD5=md5, SHA1=sha1, SHA256=sha256),
+                size=1791, priority_name="optional",
+                component_name="main", section_name="python"))
+
+    def test_processFiles_additional_checksums_must_match(self):
+        # processFiles ensures that Files, Checksums-Sha1 and
+        # Checksums-Sha256 all list the same files.
+        contents = self.getBaseChanges()
+        contents["Checksums-Sha1"] = [{
+            "sha1": "aaa", "size": "1791", "name": "doesnotexist.deb"}]
+        changes = self.createChangesFile("mypkg_0.1_i386.changes", contents)
+        [error] = list(changes.processFiles())
+        self.assertEqual(
+            "Mismatch between Checksums-Sha1 and Files fields.", error[0])
+
+    def test_processFiles_rejects_duplicate_filenames(self):
+        # processFiles ensures that Files lists each file only once.
+        contents = self.getBaseChanges()
+        contents['Files'].append(contents['Files'][0])
+        changes = self.createChangesFile("mypkg_0.1_i386.changes", contents)
+        [error] = list(changes.processFiles())
+        self.assertEqual("Duplicate filenames in Files field.", error[0])
+
 
 class TestSignatureVerification(TestCase):
 

=== modified file 'lib/lp/archiveuploader/tests/test_dscfile.py'
--- lib/lp/archiveuploader/tests/test_dscfile.py	2012-12-05 17:26:03 +0000
+++ lib/lp/archiveuploader/tests/test_dscfile.py	2013-06-26 06:24:33 +0000
@@ -18,7 +18,10 @@
     unpack_source,
     )
 from lp.archiveuploader.nascentuploadfile import UploadError
-from lp.archiveuploader.tests import datadir
+from lp.archiveuploader.tests import (
+    datadir,
+    getPolicy,
+    )
 from lp.archiveuploader.uploadpolicy import BuildDaemonUploadPolicy
 from lp.registry.interfaces.sourcepackage import SourcePackageFileType
 from lp.registry.model.person import Person
@@ -120,6 +123,34 @@
             error.args[0], "debian/changelog file too large, 10MiB max")
 
 
+class FakeChangesFile:
+    architectures = ['source']
+
+
+class TestDSCFileWithDatabase(TestCaseWithFactory):
+
+    layer = ZopelessDatabaseLayer
+
+    def test_checkFiles_verifies_additional_hashes(self):
+        """Test that checkFiles detects SHA1 and SHA256 mismatches."""
+        policy = getPolicy(
+            name="sync", distro="ubuntu", distroseries="hoary")
+        path = datadir(os.path.join(
+            'suite', 'badhash_1.0-1_broken_dsc', 'badhash_1.0-1.dsc'))
+        dsc = DSCFile(
+            path, {}, 426, 'main/editors', 'priority',
+            'badhash', '1.0-1', FakeChangesFile(), policy, DevNullLogger())
+        errors = [e[0] for e in dsc.verify()]
+        self.assertEqual(
+            ['File badhash_1.0-1.tar.gz mentioned in the changes has a SHA256'
+             ' mismatch. a29ec2370df83193c3fb2cc9e1287dbfe9feba04108ccfa490bb'
+             'e20ea66f3d08 != aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
+             'aaaaaaaaaaaaaaaaa',
+             'Files specified in DSC are broken or missing, skipping package '
+             'unpack verification.'],
+            errors)
+
+
 class TestSignableTagFile(TestCaseWithFactory):
     """Test `SignableTagFile`, a helper mixin."""
 

=== modified file 'lib/lp/archiveuploader/tests/test_nascentupload.py'
--- lib/lp/archiveuploader/tests/test_nascentupload.py	2012-01-01 02:58:52 +0000
+++ lib/lp/archiveuploader/tests/test_nascentupload.py	2013-06-26 06:24:33 +0000
@@ -9,9 +9,20 @@
 from testtools.matchers import MatchesStructure
 
 from lp.archiveuploader.changesfile import determine_file_class_and_name
-from lp.archiveuploader.nascentupload import NascentUpload
+from lp.archiveuploader.nascentupload import (
+    EarlyReturnUploadError,
+    NascentUpload,
+    )
+from lp.archiveuploader.tests import (
+    datadir,
+    getPolicy,
+    )
+from lp.archiveuploader.uploadpolicy import ArchiveUploadType
 from lp.services.log.logger import DevNullLogger
-from lp.testing.layers import LaunchpadZopelessLayer
+from lp.testing.layers import (
+    LaunchpadZopelessLayer,
+    ZopelessDatabaseLayer,
+    )
 
 
 class FakeChangesFile:
@@ -105,3 +116,25 @@
             ddeb,
             MatchesStructure.fromExample(
                 deb, "component_name", "section_name", "priority_name"))
+
+
+class TestNascentUpload(TestCase):
+
+    layer = ZopelessDatabaseLayer
+
+    def test_hash_mismatch_rejects(self):
+        # A hash mismatch for any uploaded file will cause the upload to
+        # be rejected.
+        policy = getPolicy(
+            name="sync", distro="ubuntu", distroseries="hoary")
+        policy.accepted_type = ArchiveUploadType.BINARY_ONLY
+        upload = NascentUpload.from_changesfile_path(
+            datadir("suite/badhash_1.0-1/badhash_1.0-1_i386.changes"),
+            policy, DevNullLogger())
+        upload.process()
+        self.assertTrue(upload.is_rejected)
+        self.assertEqual(
+            'File badhash_1.0-1_i386.deb mentioned in the changes has a SHA1 '
+            'mismatch. 2ca33cf32a45852c62b465aaf9063fb7deb31725 != '
+            '91556113ad38eb35d2fe03d27ae646e0ed487a3d',
+            upload.rejection_message)

=== modified file 'lib/lp/archiveuploader/tests/test_uploadprocessor.py'
--- lib/lp/archiveuploader/tests/test_uploadprocessor.py	2013-02-14 01:10:48 +0000
+++ lib/lp/archiveuploader/tests/test_uploadprocessor.py	2013-06-26 06:24:33 +0000
@@ -1286,10 +1286,12 @@
         uploadprocessor = self.setupBreezyAndGetUploadProcessor()
         upload_dir = self.queueUpload("bar_1.0-1_malformed_section")
         self.processUpload(uploadprocessor, upload_dir)
-        self.assertRejectionMessage(
-            uploadprocessor,
-            'Wrong number of fields in Files line in .changes.',
-            with_file=False)
+        expected = (
+            'Wrong number of fields in Files field line.\n'
+            'Further error processing not possible because of a '
+            'critical previous error.')
+        self.assertEqual(
+            expected, uploadprocessor.last_processed_upload.rejection_message)
 
     def testUploadWithUnknownComponentIsRejected(self):
         uploadprocessor = self.setupBreezyAndGetUploadProcessor()

=== modified file 'lib/lp/archiveuploader/uploadprocessor.py'
--- lib/lp/archiveuploader/uploadprocessor.py	2013-01-22 00:33:13 +0000
+++ lib/lp/archiveuploader/uploadprocessor.py	2013-06-26 06:24:33 +0000
@@ -60,12 +60,12 @@
 from lp.archiveuploader.nascentupload import (
     EarlyReturnUploadError,
     NascentUpload,
-    UploadError,
     )
 from lp.archiveuploader.uploadpolicy import (
     BuildDaemonUploadPolicy,
     UploadPolicyError,
     )
+from lp.archiveuploader.utils import UploadError
 from lp.buildmaster.enums import BuildStatus
 from lp.buildmaster.interfaces.buildfarmjob import ISpecificBuildFarmJobSource
 from lp.code.interfaces.sourcepackagerecipebuild import (

=== modified file 'lib/lp/archiveuploader/utils.py'
--- lib/lp/archiveuploader/utils.py	2012-06-19 22:53:13 +0000
+++ lib/lp/archiveuploader/utils.py	2013-06-26 06:24:33 +0000
@@ -6,8 +6,15 @@
 __metaclass__ = type
 
 __all__ = [
+    'determine_binary_file_type',
+    'determine_source_file_type',
     'DpkgSourceError',
     'extract_dpkg_source',
+    'get_source_file_extension',
+    'merge_file_lists',
+    'parse_file_list',
+    'ParseMaintError',
+    'prefix_multi_line_string',
     're_taint_free',
     're_isadeb',
     're_issource',
@@ -18,15 +25,13 @@
     're_valid_pkg_name',
     're_changes_file_name',
     're_extract_src_version',
-    'get_source_file_extension',
-    'determine_binary_file_type',
-    'determine_source_file_type',
-    'prefix_multi_line_string',
     'safe_fix_maintainer',
-    'ParseMaintError',
+    'UploadError',
+    'UploadWarning',
     ]
 
 
+from collections import defaultdict
 import email.Header
 import os
 import re
@@ -40,6 +45,14 @@
 from lp.soyuz.enums import BinaryPackageFileType
 
 
+class UploadError(Exception):
+    """All upload errors are returned in this form."""
+
+
+class UploadWarning(Warning):
+    """All upload warnings are returned in this form."""
+
+
 class DpkgSourceError(Exception):
 
     _fmt = "Unable to unpack source package (%(result)s): %(output)s"
@@ -293,3 +306,76 @@
     if result != 0:
         dpkg_output = prefix_multi_line_string(output, "  ")
         raise DpkgSourceError(result=result, output=dpkg_output, command=args)
+
+
+def parse_file_list(s, field_name, count):
+    if s is None:
+        return None
+    processed = []
+    for line in s.strip().split('\n'):
+        split = line.strip().split()
+        if len(split) != count:
+            raise UploadError(
+                "Wrong number of fields in %s field line." % field_name)
+        processed.append(split)
+    return processed
+
+
+def merge_file_lists(files, checksums_sha1, checksums_sha256, changes=True):
+    """Merge Files, Checksums-Sha1 and Checksums-Sha256 fields.
+
+    Turns lists of (MD5, size, [extras, ...,] filename),
+    (SHA1, size, filename) and (SHA256, size, filename) into a list of
+    (filename, {algo: hash}, size, [extras, ...], filename).
+
+    Duplicate filenames, size conflicts, and files with missing hashes
+    will cause an UploadError.
+
+    'extras' is (section, priority) if changes=True, otherwise it is omitted.
+    """
+    # Preprocess the additional hashes, counting each (filename, size)
+    # that we see.
+    file_hashes = defaultdict(dict)
+    hash_files = defaultdict(lambda: defaultdict(int))
+    for (algo, checksums) in [
+            ('SHA1', checksums_sha1), ('SHA256', checksums_sha256)]:
+        if checksums is None:
+            continue
+        for hash, size, filename in checksums:
+            file_hashes[filename][algo] = hash
+            hash_files[algo][(filename, size)] += 1
+
+    # Produce a file list containing all of the present hashes, counting
+    # each filename and (filename, size) that we see. We'll throw away
+    # the complete list later if we discover that there are duplicates
+    # or mismatches with the Checksums-* fields.
+    complete_files = []
+    file_counter = defaultdict(int)
+    for attrs in files:
+        if changes:
+            md5, size, section, priority, filename = attrs
+        else:
+            md5, size, filename = attrs
+        file_hashes[filename]['MD5'] = md5
+        file_counter[filename] += 1
+        hash_files['MD5'][(filename, size)] += 1
+        if changes:
+            complete_files.append(
+                (filename, file_hashes[filename], size, section, priority))
+        else:
+            complete_files.append(
+                (filename, file_hashes[filename], size))
+
+    # Ensure that each filename was only listed in Files once.
+    if set(file_counter.itervalues()) - set([1]):
+        raise UploadError("Duplicate filenames in Files field.")
+
+    # Ensure that the Checksums-Sha1 and Checksums-Sha256 fields, if
+    # present, list the same filenames and sizes as the Files field.
+    for field, algo in [
+            ('Checksums-Sha1', 'SHA1'), ('Checksums-Sha256', 'SHA256')]:
+        if algo in hash_files and hash_files[algo] != hash_files['MD5']:
+            raise UploadError("Mismatch between %s and Files fields." % field)
+    return complete_files
+
+