launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #20189
[Merge] lp:~cjwatson/launchpad/by-hash-symlinks into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/by-hash-symlinks into lp:launchpad.
Commit message:
Symlink weaker by-hash entries to the strongest one.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/by-hash-symlinks/+merge/290872
Symlink weaker by-hash entries to the strongest one. This saves a bit of disk space, and it doesn't significantly complicate state tracking since the tracking is mostly done at the ArchiveFile level anyway. We'll have trouble if there's ever a collision in one of the weaker hashes, but it would have to be a collision within a single by-hash directory, and those only ever contain a handful of entries in practice.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/by-hash-symlinks into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py 2016-04-02 21:22:50 +0000
+++ lib/lp/archivepublisher/publishing.py 2016-04-04 12:00:41 +0000
@@ -313,16 +313,23 @@
for newly-added files to avoid needing to commit the transaction
before calling this method.
"""
- for archive_hash in archive_hashes:
+ best_hash = archive_hashes[-1]
+ best_digest = getattr(lfa.content, best_hash.lfc_name)
+ for archive_hash in reversed(archive_hashes):
digest = getattr(lfa.content, archive_hash.lfc_name)
digest_path = os.path.join(
self.path, archive_hash.apt_name, digest)
self.known_digests[archive_hash.apt_name][digest].add(name)
- if not os.path.exists(digest_path):
+ if not os.path.lexists(digest_path):
self.log.debug(
"by-hash: Creating %s for %s" % (digest_path, name))
ensure_directory_exists(os.path.dirname(digest_path))
- if copy_from_path is not None:
+ if archive_hash != best_hash:
+ os.symlink(
+ os.path.join(
+ os.pardir, best_hash.apt_name, best_digest),
+ digest_path)
+ elif copy_from_path is not None:
os.link(
os.path.join(self.root, copy_from_path), digest_path)
else:
=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py 2016-04-02 21:22:50 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py 2016-04-04 12:00:41 +0000
@@ -41,6 +41,7 @@
MatchesStructure,
Not,
PathExists,
+ SamePath,
)
import transaction
from zope.component import getUtility
@@ -468,10 +469,18 @@
if mismatch is not None:
return mismatch
for digest, content in digests.items():
- mismatch = FileContains(content).match(
- os.path.join(path, digest))
- if mismatch is not None:
- return mismatch
+ full_path = os.path.join(path, digest)
+ if hashname == "SHA256":
+ mismatch = FileContains(content).match(full_path)
+ if mismatch is not None:
+ return mismatch
+ else:
+ best_path = os.path.join(
+ by_hash_path, "SHA256",
+ hashlib.sha256(content).hexdigest())
+ mismatch = SamePath(best_path).match(full_path)
+ if mismatch is not None:
+ return mismatch
class ByHashesHaveContents(Matcher):
@@ -532,12 +541,16 @@
content = "abc\n"
lfa = self.factory.makeLibraryFileAlias(content=content)
by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
- for hashname, hashattr in (
- ("MD5Sum", "md5"), ("SHA1", "sha1"), ("SHA256", "sha256")):
+ sha256_digest = hashlib.sha256(content).hexdigest()
+ with open_for_writing(
+ os.path.join(by_hash_path, "SHA256", sha256_digest), "w") as f:
+ f.write(content)
+ for hashname, hashattr in (("MD5Sum", "md5"), ("SHA1", "sha1")):
digest = getattr(hashlib, hashattr)(content).hexdigest()
- with open_for_writing(
- os.path.join(by_hash_path, hashname, digest), "w") as f:
- f.write(content)
+ os.makedirs(os.path.join(by_hash_path, hashname))
+ os.symlink(
+ os.path.join(os.pardir, "SHA256", sha256_digest),
+ os.path.join(by_hash_path, hashname, digest))
by_hash = ByHash(root, "dists/foo/main/source", DevNullLogger())
self.assertThat(by_hash_path, ByHashHasContents([content]))
by_hash.add("Sources", lfa)
Follow ups