launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #20585
Re: [Merge] lp:~apw/launchpad/signing-add-sha256-checksums into lp:launchpad
Review: Approve
I much prefer the DirectoryHash abstraction, thanks. Here's a boring review full of mostly style nits, after which this should be good to land.
Diff comments:
> === modified file 'lib/lp/archivepublisher/publishing.py'
> --- lib/lp/archivepublisher/publishing.py 2016-04-29 12:57:58 +0000
> +++ lib/lp/archivepublisher/publishing.py 2016-06-06 15:41:47 +0000
> @@ -1462,3 +1472,58 @@
> count += 1
> self.archive.name = new_name
> self.log.info("Renamed deleted archive '%s'.", self.archive.reference)
> +
> +
> +class DirectoryHash:
> + """Represents a directory hierachy for hashing."""
hierarchy
> +
> + def __init__(self, root, tmpdir, log):
> + self.root = root
> + self.tmpdir = tmpdir
> + self.log = log
> + self.checksum_hash = []
> +
> + for usable in self._usable_archive_hashes:
> + checksum_file = os.path.join(self.root, usable.dh_name)
> + self.checksum_hash.append(
> + (RepositoryIndexFile(checksum_file, self.tmpdir), usable))
> +
> + def __enter__(self):
> + return self
> +
> + def __exit__(self, type, value, traceback):
> + self.close()
> +
> + @property
> + def _usable_archive_hashes(self):
> + usable = []
> + for archive_hash in archive_hashes:
> + if archive_hash.write_directory_hash:
> + usable.append(archive_hash)
> + return usable
I'd probably write this as a generator:
@property
def _usable_archive_hashes(self):
for archive_hash in archive_hashes:
if archive_hash.write_directory_hash:
yield archive_hash
> +
> + def add(self, path):
> + """Add a path to be checksummed."""
> + hashes = [
> + (checksum_file, archive_hash.hash_factory())
> + for (checksum_file, archive_hash) in self.checksum_hash]
> + size = 0
> + with open(path, 'rb') as in_file:
> + for chunk in iter(lambda: in_file.read(256 * 1024), ""):
> + for (checksum_file, hashobj) in hashes:
> + hashobj.update(chunk)
> + size += len(chunk)
You never actually use size.
> +
> + for (checksum_file, hashobj) in hashes:
> + checksum_file.write("%s *%s\n" %
> + (hashobj.hexdigest(), path[len(self.root) + 1:]))
> +
> + def add_dir(self, path):
> + """Recursively add a directory path to be checksummed."""
> + for dirpath, dirnames, filenames in os.walk(path):
> + for filename in filenames:
> + self.add(os.path.join(dirpath, filename))
> +
> + def close(self):
> + for (checksum_file, archive_hash) in self.checksum_hash:
> + checksum_file.close()
>
> === modified file 'lib/lp/archivepublisher/signing.py'
> --- lib/lp/archivepublisher/signing.py 2016-05-31 12:40:38 +0000
> +++ lib/lp/archivepublisher/signing.py 2016-06-06 15:41:47 +0000
> @@ -303,6 +304,13 @@
> if 'tarball' in self.signing_options:
> self.convertToTarball()
>
> + # Avoid circular import.
> + from lp.archivepublisher.publishing import DirectoryHash
The circular-import avoidance is fine, but put the import at the top of the method, just below the docstring.
> +
> + versiondir = os.path.join(self.tmpdir, self.version)
> + with DirectoryHash(versiondir, self.tmpdir, self.logger) as hasher:
> + hasher.add_dir(versiondir)
> +
> def shouldInstall(self, filename):
> return filename.startswith("%s/" % self.version)
>
>
> === modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
> --- lib/lp/archivepublisher/tests/test_publisher.py 2016-05-14 10:17:36 +0000
> +++ lib/lp/archivepublisher/tests/test_publisher.py 2016-06-06 15:41:47 +0000
> @@ -24,6 +24,8 @@
> import time
>
> from debian.deb822 import Release
> +
> +
No need for these; this is all in the "third-party libraries" block, so no intervening blank lines. It's possible that format-imports is slightly buggy and doesn't recognise try/except ImportError blocks.
> try:
> import lzma
> except ImportError:
> @@ -92,7 +96,10 @@
> from lp.soyuz.interfaces.archive import IArchiveSet
> from lp.soyuz.interfaces.archivefile import IArchiveFileSet
> from lp.soyuz.tests.test_publishing import TestNativePublishingBase
> -from lp.testing import TestCaseWithFactory
> +from lp.testing import (
> + TestCase,
> + TestCaseWithFactory
Trailing comma.
> + )
> from lp.testing.fakemethod import FakeMethod
> from lp.testing.gpgkeys import gpgkeysdir
> from lp.testing.keyserver import KeyServerTac
> @@ -3160,3 +3167,99 @@
>
> partner = self.factory.makeArchive(purpose=ArchivePurpose.PARTNER)
> self.assertEqual([], self.makePublisher(partner).subcomponents)
> +
> +
> +class TestDirectoryHash(TestCase):
> + """Unit tests for DirectoryHash object."""
> +
> + def createTestFile(self, path, content):
> + with open(path, "w") as tfd:
> + tfd.write(content)
> + return hashlib.sha256(content).hexdigest()
> +
> + @property
> + def allHashFiles(self):
> + return [hash.dh_name for hash in archive_hashes]
> +
> + @property
> + def expectedHashFiles(self):
> + return [hash.dh_name for hash in archive_hashes
> + if hash.write_directory_hash]
https://dev.launchpad.net/PythonStyleGuide#Naming says all_hash_files and expected_hash_files for these.
I can see the argument either way, but I think here I would tend to write out the specific expected hash algorithms in test code rather than getting it from archive_hashes. createTestFile is SHA-256-specific anyway, so this is a slightly misleading generalisation.
> +
> + def fetchSums(self, rootdir):
> + result = []
> + for dh_file in self.allHashFiles:
> + checksum_file = os.path.join(rootdir, dh_file)
> + if os.path.exists(checksum_file):
> + with open(checksum_file, "r") as sfd:
> + for line in sfd:
> + result.append(line.strip().split(' '))
> + return result
This will be quite weird if more than one checksum file starts existing. Shouldn't it be a dictionary mapping "SHA256SUMS" etc. to processed lines?
> +
> + def test_checksum_files_created(self):
> + tmpdir = unicode(self.makeTemporaryDirectory())
> + rootdir = unicode(self.makeTemporaryDirectory())
> +
> + for dh_file in self.allHashFiles:
> + checksum_file = os.path.join(rootdir, dh_file)
> + self.assertFalse(os.path.exists(checksum_file))
> +
> + with DirectoryHash(rootdir, tmpdir, None) as dh:
> + pass
> +
> + for dh_file in self.allHashFiles:
> + checksum_file = os.path.join(rootdir, dh_file)
> + if dh_file in self.expectedHashFiles:
> + self.assertTrue(os.path.exists(checksum_file))
> + else:
> + self.assertFalse(os.path.exists(checksum_file))
> +
> + def test_basic_file_add(self):
> + tmpdir = unicode(self.makeTemporaryDirectory())
> + rootdir = unicode(self.makeTemporaryDirectory())
> + test1_file = os.path.join(rootdir, "test1")
> + test1_hash = self.createTestFile(test1_file, "test1")
> +
> + test2_file = os.path.join(rootdir, "test2")
> + test2_hash = self.createTestFile(test2_file, "test2")
> +
> + os.mkdir(os.path.join(rootdir, "subdir1"))
> +
> + test3_file = os.path.join(rootdir, "subdir1", "test3")
> + test3_hash = self.createTestFile(test3_file, "test3")
> +
> + with DirectoryHash(rootdir, tmpdir, None) as dh:
> + dh.add(test1_file)
> + dh.add(test2_file)
> + dh.add(test3_file)
> +
> + expected = [
> + [test1_hash, "*test1"],
> + [test2_hash, "*test2"],
> + [test3_hash, "*subdir1/test3"],
> + ]
> + self.assertContentEqual(expected, self.fetchSums(rootdir))
> +
> + def test_basic_directory_add(self):
> + tmpdir = unicode(self.makeTemporaryDirectory())
> + rootdir = unicode(self.makeTemporaryDirectory())
> + test1_file = os.path.join(rootdir, "test1")
> + test1_hash = self.createTestFile(test1_file, "test1 dir")
> +
> + test2_file = os.path.join(rootdir, "test2")
> + test2_hash = self.createTestFile(test2_file, "test2 dir")
> +
> + os.mkdir(os.path.join(rootdir, "subdir1"))
> +
> + test3_file = os.path.join(rootdir, "subdir1", "test3")
> + test3_hash = self.createTestFile(test3_file, "test3 dir")
> +
> + with DirectoryHash(rootdir, tmpdir, None) as dh:
> + dh.add_dir(rootdir)
> +
> + expected = [
> + [test1_hash, "*test1"],
> + [test2_hash, "*test2"],
> + [test3_hash, "*subdir1/test3"],
> + ]
> + self.assertContentEqual(expected, self.fetchSums(rootdir))
--
https://code.launchpad.net/~apw/launchpad/signing-add-sha256-checksums/+merge/295615
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
References