← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/by-hash-symlinks into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/by-hash-symlinks into lp:launchpad.

Commit message:
Symlink weaker by-hash entries to the strongest one.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/by-hash-symlinks/+merge/290872

Symlink weaker by-hash entries to the strongest one.  This saves a bit of disk space, and it doesn't significantly complicate state tracking since the tracking is mostly done at the ArchiveFile level anyway.  We'll have trouble if there's ever a collision in one of the weaker hashes, but it would have to be a collision within a single by-hash directory, and those only ever contain a handful of entries in practice.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/by-hash-symlinks into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py	2016-04-02 21:22:50 +0000
+++ lib/lp/archivepublisher/publishing.py	2016-04-04 12:00:41 +0000
@@ -313,16 +313,23 @@
             for newly-added files to avoid needing to commit the transaction
             before calling this method.
         """
-        for archive_hash in archive_hashes:
+        best_hash = archive_hashes[-1]
+        best_digest = getattr(lfa.content, best_hash.lfc_name)
+        for archive_hash in reversed(archive_hashes):
             digest = getattr(lfa.content, archive_hash.lfc_name)
             digest_path = os.path.join(
                 self.path, archive_hash.apt_name, digest)
             self.known_digests[archive_hash.apt_name][digest].add(name)
-            if not os.path.exists(digest_path):
+            if not os.path.lexists(digest_path):
                 self.log.debug(
                     "by-hash: Creating %s for %s" % (digest_path, name))
                 ensure_directory_exists(os.path.dirname(digest_path))
-                if copy_from_path is not None:
+                if archive_hash != best_hash:
+                    os.symlink(
+                        os.path.join(
+                            os.pardir, best_hash.apt_name, best_digest),
+                        digest_path)
+                elif copy_from_path is not None:
                     os.link(
                         os.path.join(self.root, copy_from_path), digest_path)
                 else:

=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py	2016-04-02 21:22:50 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py	2016-04-04 12:00:41 +0000
@@ -41,6 +41,7 @@
     MatchesStructure,
     Not,
     PathExists,
+    SamePath,
     )
 import transaction
 from zope.component import getUtility
@@ -468,10 +469,18 @@
             if mismatch is not None:
                 return mismatch
             for digest, content in digests.items():
-                mismatch = FileContains(content).match(
-                    os.path.join(path, digest))
-                if mismatch is not None:
-                    return mismatch
+                full_path = os.path.join(path, digest)
+                if hashname == "SHA256":
+                    mismatch = FileContains(content).match(full_path)
+                    if mismatch is not None:
+                        return mismatch
+                else:
+                    best_path = os.path.join(
+                        by_hash_path, "SHA256",
+                        hashlib.sha256(content).hexdigest())
+                    mismatch = SamePath(best_path).match(full_path)
+                    if mismatch is not None:
+                        return mismatch
 
 
 class ByHashesHaveContents(Matcher):
@@ -532,12 +541,16 @@
         content = "abc\n"
         lfa = self.factory.makeLibraryFileAlias(content=content)
         by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
-        for hashname, hashattr in (
-                ("MD5Sum", "md5"), ("SHA1", "sha1"), ("SHA256", "sha256")):
+        sha256_digest = hashlib.sha256(content).hexdigest()
+        with open_for_writing(
+                os.path.join(by_hash_path, "SHA256", sha256_digest), "w") as f:
+            f.write(content)
+        for hashname, hashattr in (("MD5Sum", "md5"), ("SHA1", "sha1")):
             digest = getattr(hashlib, hashattr)(content).hexdigest()
-            with open_for_writing(
-                    os.path.join(by_hash_path, hashname, digest), "w") as f:
-                f.write(content)
+            os.makedirs(os.path.join(by_hash_path, hashname))
+            os.symlink(
+                os.path.join(os.pardir, "SHA256", sha256_digest),
+                os.path.join(by_hash_path, hashname, digest))
         by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger())
         self.assertThat(by_hash_path, ByHashHasContents([content]))
         by_hash.add("Sources", lfa)


Follow ups