launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #20147
Re: [Merge] lp:~cjwatson/launchpad/archive-index-by-hash into lp:launchpad
Diff comments:
>
> === modified file 'lib/lp/archivepublisher/publishing.py'
> --- lib/lp/archivepublisher/publishing.py 2016-03-17 17:08:49 +0000
> +++ lib/lp/archivepublisher/publishing.py 2016-03-22 12:51:52 +0000
> @@ -231,6 +247,141 @@
> return max(len(str(item['size'])) for item in self[key])
>
>
> +class IArchiveHash(Interface):
> + """Represents a hash algorithm used for index files."""
> +
> + hash_factory = Attribute("A hashlib class suitable for this algorithm.")
> + deb822_name = Attribute(
> + "Algorithm name expected by debian.deb822.Release.")
> + apt_name = Attribute(
> + "Algorithm name used by apt in Release files and by-hash "
> + "subdirectories.")
> + lfc_name = Attribute(
> + "LibraryFileContent attribute name corresponding to this algorithm.")
> +
> +
> +@implementer(IArchiveHash)
> +class MD5ArchiveHash:
> + hash_factory = hashlib.md5
> + deb822_name = "md5sum"
> + apt_name = "MD5Sum"
> + lfc_name = "md5"
> +
> +
> +@implementer(IArchiveHash)
> +class SHA1ArchiveHash:
> + hash_factory = hashlib.sha1
> + deb822_name = "sha1"
> + apt_name = "SHA1"
> + lfc_name = "sha1"
> +
> +
> +@implementer(IArchiveHash)
> +class SHA256ArchiveHash:
> + hash_factory = hashlib.sha256
> + deb822_name = "sha256"
> + apt_name = "SHA256"
> + lfc_name = "sha256"
> +
> +
> +archive_hashes = [
> + MD5ArchiveHash(),
> + SHA1ArchiveHash(),
> + SHA256ArchiveHash(),
> + ]
> +
> +
> +class ByHash:
> + """Represents a single by-hash directory tree."""
> +
> + def __init__(self, root, key):
> + self.root = root
> + self.path = os.path.join(root, key, "by-hash")
> + self.known_digests = defaultdict(set)
> +
> + def add(self, lfa, copy_from_path=None):
> + """Ensure that by-hash entries for a single file exist.
> +
> + :param lfa: The `ILibraryFileAlias` to add.
> + :param copy_from_path: If not None, copy file content from here
> + rather than fetching it from the librarian. This can be used
> + for newly-added files to avoid needing to commit the transaction
> + before calling this method.
> + """
> + for archive_hash in archive_hashes:
> + digest = getattr(lfa.content, archive_hash.lfc_name)
> + digest_path = os.path.join(
> + self.path, archive_hash.apt_name, digest)
> + self.known_digests[archive_hash.apt_name].add(digest)
> + if not os.path.exists(digest_path):
> + ensure_directory_exists(os.path.dirname(digest_path))
> + if copy_from_path is not None:
> + os.link(
> + os.path.join(self.root, copy_from_path), digest_path)
> + else:
> + with open(digest_path, "wb") as outfile:
> + lfa.open()
> + try:
> + shutil.copyfileobj(lfa, outfile, 4 * 1024 * 1024)
> + finally:
> + lfa.close()
> +
> + def known(self, hashname, digest):
> + """Do we know about a file with this digest?"""
> + return digest in self.known_digests[hashname]
> +
> + def prune(self):
> + """Remove all by-hash entries that we have not been told to add.
> +
> + This also removes the by-hash directory itself if no entries remain.
> + """
> + prune_directory = True
> + for archive_hash in archive_hashes:
> + hash_path = os.path.join(self.path, archive_hash.apt_name)
> + if os.path.exists(hash_path):
> + prune_hash_directory = True
> + for digest in list(os.listdir(hash_path)):
> + if not self.known(archive_hash.apt_name, digest):
> + os.unlink(os.path.join(hash_path, digest))
> + else:
> + prune_hash_directory = False
> + if prune_hash_directory:
> + os.rmdir(hash_path)
> + else:
> + prune_directory = False
> + if prune_directory and os.path.exists(self.path):
> + os.rmdir(self.path)
> +
> +
> +class ByHashes:
> + """Represents all by-hash directory trees in an archive."""
> +
> + def __init__(self, root):
> + self.root = root
> + self.children = {}
> +
> + def registerChild(self, path):
> + """Register a single by-hash directory.
path isn't actually the directory, but a child of it.
> +
> + Only directories that have been registered here will be pruned by
> + the `prune` method.
> + """
> + key = os.path.dirname(path)
> + if key not in self.children:
> + self.children[key] = ByHash(self.root, key)
> + return self.children[key]
> +
> + def add(self, path, lfa, copy_from_path=None):
> + self.registerChild(path).add(lfa, copy_from_path=copy_from_path)
> +
> + def known(self, path, hashname, digest):
> + return self.registerChild(path).known(hashname, digest)
> +
> + def prune(self):
> + for child in self.children.values():
> + child.prune()
> +
> +
> class Publisher(object):
> """Publisher is the class used to provide the facility to publish
> files in the pool of a Distribution. The publisher objects will be
--
https://code.launchpad.net/~cjwatson/launchpad/archive-index-by-hash/+merge/289379
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
References