launchpad-reviewers team mailing list archive
  
  - 
     launchpad-reviewers team launchpad-reviewers team
- 
    Mailing list archive
  
- 
    Message #20585
  
Re:  [Merge] lp:~apw/launchpad/signing-add-sha256-checksums into lp:launchpad
  
Review: Approve
I much prefer the DirectoryHash abstraction, thanks.  Here's a boring review full of mostly style nits, after which this should be good to land.
Diff comments:
> === modified file 'lib/lp/archivepublisher/publishing.py'
> --- lib/lp/archivepublisher/publishing.py	2016-04-29 12:57:58 +0000
> +++ lib/lp/archivepublisher/publishing.py	2016-06-06 15:41:47 +0000
> @@ -1462,3 +1472,58 @@
>              count += 1
>          self.archive.name = new_name
>          self.log.info("Renamed deleted archive '%s'.", self.archive.reference)
> +
> +
> +class DirectoryHash:
> +    """Represents a directory hierachy for hashing."""
hierarchy
> +
> +    def __init__(self, root, tmpdir, log):
> +        self.root = root
> +        self.tmpdir = tmpdir
> +        self.log = log
> +        self.checksum_hash = []
> +
> +        for usable in self._usable_archive_hashes:
> +            checksum_file = os.path.join(self.root, usable.dh_name)
> +            self.checksum_hash.append(
> +                (RepositoryIndexFile(checksum_file, self.tmpdir), usable))
> +
> +    def __enter__(self):
> +        return self
> +
> +    def __exit__(self, type, value, traceback):
> +        self.close()
> +
> +    @property
> +    def _usable_archive_hashes(self):
> +        usable = []
> +        for archive_hash in archive_hashes:
> +            if archive_hash.write_directory_hash:
> +                usable.append(archive_hash)
> +        return usable
I'd probably write this as a generator:
    @property
    def _usable_archive_hashes(self):
        for archive_hash in archive_hashes:
            if archive_hash.write_directory_hash:
                yield archive_hash
> +
> +    def add(self, path):
> +        """Add a path to be checksummed."""
> +        hashes = [
> +            (checksum_file, archive_hash.hash_factory())
> +            for (checksum_file, archive_hash) in self.checksum_hash]
> +        size = 0
> +        with open(path, 'rb') as in_file:
> +            for chunk in iter(lambda: in_file.read(256 * 1024), ""):
> +                for (checksum_file, hashobj) in hashes:
> +                    hashobj.update(chunk)
> +                size += len(chunk)
You never actually use size.
> +
> +        for (checksum_file, hashobj) in hashes:
> +            checksum_file.write("%s *%s\n" %
> +                (hashobj.hexdigest(), path[len(self.root) + 1:]))
> +
> +    def add_dir(self, path):
> +        """Recursively add a directory path to be checksummed."""
> +        for dirpath, dirnames, filenames in os.walk(path):
> +            for filename in filenames:
> +                self.add(os.path.join(dirpath, filename))
> +
> +    def close(self):
> +        for (checksum_file, archive_hash) in self.checksum_hash:
> +            checksum_file.close()
> 
> === modified file 'lib/lp/archivepublisher/signing.py'
> --- lib/lp/archivepublisher/signing.py	2016-05-31 12:40:38 +0000
> +++ lib/lp/archivepublisher/signing.py	2016-06-06 15:41:47 +0000
> @@ -303,6 +304,13 @@
>          if 'tarball' in self.signing_options:
>              self.convertToTarball()
>  
> +        # Avoid circular import.
> +        from lp.archivepublisher.publishing import DirectoryHash
The circular-import avoidance is fine, but put the import at the top of the method, just below the docstring.
> +
> +        versiondir = os.path.join(self.tmpdir, self.version)
> +        with DirectoryHash(versiondir, self.tmpdir, self.logger) as hasher:
> +            hasher.add_dir(versiondir)
> +
>      def shouldInstall(self, filename):
>          return filename.startswith("%s/" % self.version)
>  
> 
> === modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
> --- lib/lp/archivepublisher/tests/test_publisher.py	2016-05-14 10:17:36 +0000
> +++ lib/lp/archivepublisher/tests/test_publisher.py	2016-06-06 15:41:47 +0000
> @@ -24,6 +24,8 @@
>  import time
>  
>  from debian.deb822 import Release
> +
> +
No need for these; this is all in the "third-party libraries" block, so no intervening blank lines.  It's possible that format-imports is slightly buggy and doesn't recognise try/except ImportError blocks.
>  try:
>      import lzma
>  except ImportError:
> @@ -92,7 +96,10 @@
>  from lp.soyuz.interfaces.archive import IArchiveSet
>  from lp.soyuz.interfaces.archivefile import IArchiveFileSet
>  from lp.soyuz.tests.test_publishing import TestNativePublishingBase
> -from lp.testing import TestCaseWithFactory
> +from lp.testing import (
> +    TestCase,
> +    TestCaseWithFactory
Trailing comma.
> +    )
>  from lp.testing.fakemethod import FakeMethod
>  from lp.testing.gpgkeys import gpgkeysdir
>  from lp.testing.keyserver import KeyServerTac
> @@ -3160,3 +3167,99 @@
>  
>          partner = self.factory.makeArchive(purpose=ArchivePurpose.PARTNER)
>          self.assertEqual([], self.makePublisher(partner).subcomponents)
> +
> +
> +class TestDirectoryHash(TestCase):
> +    """Unit tests for DirectoryHash object."""
> +
> +    def createTestFile(self, path, content):
> +        with open(path, "w") as tfd:
> +            tfd.write(content)
> +        return hashlib.sha256(content).hexdigest()
> +
> +    @property
> +    def allHashFiles(self):
> +        return [hash.dh_name for hash in archive_hashes]
> +
> +    @property
> +    def expectedHashFiles(self):
> +        return [hash.dh_name for hash in archive_hashes
> +                if hash.write_directory_hash]
https://dev.launchpad.net/PythonStyleGuide#Naming says all_hash_files and expected_hash_files for these.
I can see the argument either way, but I think here I would tend to write out the specific expected hash algorithms in test code rather than getting it from archive_hashes.  createTestFile is SHA-256-specific anyway, so this is a slightly misleading generalisation.
> +
> +    def fetchSums(self, rootdir):
> +        result = []
> +        for dh_file in self.allHashFiles:
> +            checksum_file = os.path.join(rootdir, dh_file)
> +            if os.path.exists(checksum_file):
> +                with open(checksum_file, "r") as sfd:
> +                    for line in sfd:
> +                        result.append(line.strip().split(' '))
> +        return result
This will be quite weird if more than one checksum file starts existing.  Shouldn't it be a dictionary mapping "SHA256SUMS" etc. to processed lines?
> +
> +    def test_checksum_files_created(self):
> +        tmpdir = unicode(self.makeTemporaryDirectory())
> +        rootdir = unicode(self.makeTemporaryDirectory())
> +
> +        for dh_file in self.allHashFiles:
> +            checksum_file = os.path.join(rootdir, dh_file)
> +            self.assertFalse(os.path.exists(checksum_file))
> +
> +        with DirectoryHash(rootdir, tmpdir, None) as dh:
> +            pass
> +
> +        for dh_file in self.allHashFiles:
> +            checksum_file = os.path.join(rootdir, dh_file)
> +            if dh_file in self.expectedHashFiles:
> +                self.assertTrue(os.path.exists(checksum_file))
> +            else:
> +                self.assertFalse(os.path.exists(checksum_file))
> +
> +    def test_basic_file_add(self):
> +        tmpdir = unicode(self.makeTemporaryDirectory())
> +        rootdir = unicode(self.makeTemporaryDirectory())
> +        test1_file = os.path.join(rootdir, "test1")
> +        test1_hash = self.createTestFile(test1_file, "test1")
> +
> +        test2_file = os.path.join(rootdir, "test2")
> +        test2_hash = self.createTestFile(test2_file, "test2")
> +
> +        os.mkdir(os.path.join(rootdir, "subdir1"))
> +
> +        test3_file = os.path.join(rootdir, "subdir1", "test3")
> +        test3_hash = self.createTestFile(test3_file, "test3")
> +
> +        with DirectoryHash(rootdir, tmpdir, None) as dh:
> +            dh.add(test1_file)
> +            dh.add(test2_file)
> +            dh.add(test3_file)
> +
> +        expected = [
> +            [test1_hash, "*test1"],
> +            [test2_hash, "*test2"],
> +            [test3_hash, "*subdir1/test3"],
> +            ]
> +        self.assertContentEqual(expected, self.fetchSums(rootdir))
> +
> +    def test_basic_directory_add(self):
> +        tmpdir = unicode(self.makeTemporaryDirectory())
> +        rootdir = unicode(self.makeTemporaryDirectory())
> +        test1_file = os.path.join(rootdir, "test1")
> +        test1_hash = self.createTestFile(test1_file, "test1 dir")
> +
> +        test2_file = os.path.join(rootdir, "test2")
> +        test2_hash = self.createTestFile(test2_file, "test2 dir")
> +
> +        os.mkdir(os.path.join(rootdir, "subdir1"))
> +
> +        test3_file = os.path.join(rootdir, "subdir1", "test3")
> +        test3_hash = self.createTestFile(test3_file, "test3 dir")
> +
> +        with DirectoryHash(rootdir, tmpdir, None) as dh:
> +            dh.add_dir(rootdir)
> +
> +        expected = [
> +            [test1_hash, "*test1"],
> +            [test2_hash, "*test2"],
> +            [test3_hash, "*subdir1/test3"],
> +            ]
> +        self.assertContentEqual(expected, self.fetchSums(rootdir))
-- 
https://code.launchpad.net/~apw/launchpad/signing-add-sha256-checksums/+merge/295615
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
References