launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #20102
[Merge] lp:~cjwatson/launchpad/archive-index-by-hash into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/archive-index-by-hash into lp:launchpad with lp:~cjwatson/launchpad/ds-publish-by-hash as a prerequisite.
Commit message:
Add files indexed by Release to the librarian and to ArchiveFile. Publish them in by-hash directories, keeping old versions for a day.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #1430011 in Launchpad itself: "support apt by-hash mirrors"
https://bugs.launchpad.net/launchpad/+bug/1430011
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/archive-index-by-hash/+merge/289379
Add files indexed by Release to the librarian and to ArchiveFile. Publish them in by-hash directories, keeping old versions for a day.
DistroSeries.publish_by_hash is useful so that we only do this for series with a version of apt that can make use of it, but it also serves as a circuit breaker in case something goes wrong.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/archive-index-by-hash into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/model/ftparchive.py'
--- lib/lp/archivepublisher/model/ftparchive.py 2016-02-09 15:51:19 +0000
+++ lib/lp/archivepublisher/model/ftparchive.py 2016-03-17 14:51:01 +0000
@@ -54,10 +54,14 @@
"""Ensure that the path exists and is an empty directory."""
if os.path.isdir(path):
for name in os.listdir(path):
+ if name == "by-hash":
+ # Ignore existing by-hash directories; they will be cleaned
+ # up to match the rest of the directory tree later.
+ continue
child_path = os.path.join(path, name)
# Directories containing index files should never have
- # subdirectories. Guard against expensive mistakes by not
- # recursing here.
+ # subdirectories other than by-hash. Guard against expensive
+ # mistakes by not recursing here.
os.unlink(child_path)
else:
os.makedirs(path, 0o755)
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py 2016-03-11 11:45:56 +0000
+++ lib/lp/archivepublisher/publishing.py 2016-03-17 14:51:01 +0000
@@ -12,7 +12,11 @@
__metaclass__ = type
import bz2
-from datetime import datetime
+from collections import defaultdict
+from datetime import (
+ datetime,
+ timedelta,
+ )
import errno
import gzip
import hashlib
@@ -32,6 +36,7 @@
from storm.expr import Desc
from zope.component import getUtility
+from lp.app.errors import NotFoundError
from lp.app.interfaces.launchpad import ILaunchpadCelebrities
from lp.archivepublisher import HARDCODED_COMPONENT_ORDER
from lp.archivepublisher.config import getPubConfig
@@ -64,7 +69,9 @@
from lp.services.database.constants import UTC_NOW
from lp.services.database.interfaces import IStore
from lp.services.features import getFeatureFlag
+from lp.services.helpers import filenameToContentType
from lp.services.librarian.client import LibrarianClient
+from lp.services.osutils import open_for_writing
from lp.services.utils import file_exists
from lp.soyuz.enums import (
ArchivePurpose,
@@ -73,6 +80,7 @@
PackagePublishingStatus,
)
from lp.soyuz.interfaces.archive import NoSuchPPA
+from lp.soyuz.interfaces.archivefile import IArchiveFileSet
from lp.soyuz.interfaces.publishing import (
active_publishing_status,
IPublishingSet,
@@ -95,6 +103,10 @@
}
+# Number of days before unreferenced files are removed from by-hash.
+BY_HASH_STAY_OF_EXECUTION = 1
+
+
def reorder_components(components):
"""Return a list of the components provided.
@@ -231,6 +243,93 @@
return max(len(str(item['size'])) for item in self[key])
+class ByHash:
+ """Represents a single by-hash directory tree."""
+
+ # Subdirectory names expected by apt.
+ supported_hashes = ("MD5Sum", "SHA1", "SHA256")
+
+ def __init__(self, root, key):
+ self.root = root
+ self.path = os.path.join(root, key, "by-hash")
+ self.known_digests = defaultdict(set)
+
+ @staticmethod
+ def getHashFromLFA(lfa, name):
+ attr = {
+ "MD5Sum": "md5",
+ "SHA1": "sha1",
+ "SHA256": "sha256",
+ }[name]
+ return getattr(lfa.content, attr)
+
+ def add(self, lfa, copy_from_path=None):
+ """Ensure that by-hash entries for a single file exist.
+
+ :param lfa: The `ILibraryFileAlias` to add.
+ :param copy_from_path: If not None, copy file content from here
+ rather than fetching it from the librarian. This can be used
+ for newly-added files to avoid needing to commit the transaction
+ before calling this method.
+ """
+ for hashname in self.supported_hashes:
+ digest = self.getHashFromLFA(lfa, hashname)
+ digest_path = os.path.join(self.path, hashname, digest)
+ self.known_digests[hashname].add(digest)
+ if not os.path.exists(digest_path):
+ with open_for_writing(digest_path, "wb") as outfile:
+ if copy_from_path is not None:
+ infile = open(
+ os.path.join(self.root, copy_from_path), "rb")
+ else:
+ lfa.open()
+ infile = lfa
+ try:
+ shutil.copyfileobj(infile, outfile, 4 * 1024 * 1024)
+ finally:
+ infile.close()
+
+ def exists(self, hashname, digest):
+ """Do we know about a file with this digest?"""
+ return digest in self.known_digests[hashname]
+
+ def prune(self):
+ """Remove all by-hash entries that we have not been told to add."""
+ if any(self.known_digests.values()):
+ for hashname in self.supported_hashes:
+ hash_path = os.path.join(self.path, hashname)
+ if os.path.exists(hash_path):
+ for digest in list(os.listdir(hash_path)):
+ if not self.exists(hashname, digest):
+ os.unlink(os.path.join(hash_path, digest))
+ elif os.path.exists(self.path):
+ shutil.rmtree(self.path)
+
+
+class ByHashes:
+ """Represents all by-hash directory trees in an archive."""
+
+ def __init__(self, root):
+ self.root = root
+ self.children = {}
+
+ def getChild(self, path):
+ key = os.path.dirname(path)
+ if key not in self.children:
+ self.children[key] = ByHash(self.root, key)
+ return self.children[key]
+
+ def add(self, path, lfa, copy_from_path=None):
+ self.getChild(path).add(lfa, copy_from_path=copy_from_path)
+
+ def exists(self, path, hashname, digest):
+ return self.getChild(path).exists(hashname, digest)
+
+ def prune(self):
+ for child in self.children.values():
+ child.prune()
+
+
class Publisher(object):
"""Publisher is the class used to provide the facility to publish
files in the pool of a Distribution. The publisher objects will be
@@ -501,7 +600,18 @@
*conditions).config(distinct=True).order_by(
DistroSeries.id, BinaryPackagePublishingHistory.pocket)
- for distroseries, pocket in chain(source_suites, binary_suites):
+ archive_file_suites = []
+ for container in getUtility(IArchiveFileSet).getContainersToReap(
+ self.archive, container_prefix=u"release:"):
+ try:
+ distroseries, pocket = self.distro.getDistroSeriesAndPocket(
+ container[len(u"release:"):])
+ archive_file_suites.append((distroseries, pocket))
+ except NotFoundError:
+ pass
+
+ for distroseries, pocket in chain(
+ source_suites, binary_suites, archive_file_suites):
if self.isDirty(distroseries, pocket):
continue
if (cannot_modify_suite(self.archive, distroseries, pocket)
@@ -796,6 +906,69 @@
return self.distro.displayname
return "LP-PPA-%s" % get_ppa_reference(self.archive)
+ def _updateByHash(self, suite, release_data):
+ """Update by-hash files for a suite."""
+ archive_file_set = getUtility(IArchiveFileSet)
+ by_hashes = ByHashes(self._config.archiveroot)
+ suite_dir = os.path.relpath(
+ os.path.join(self._config.distsroot, suite),
+ self._config.archiveroot)
+ container = "release:%s" % suite
+
+ # Remove any condemned files from the database. We ensure that we
+ # know about all the relevant by-hash directory trees before doing
+ # any removals so that we can prune them properly later.
+ for archive_file in archive_file_set.getByArchive(
+ self.archive, container=container):
+ by_hashes.getChild(archive_file.path)
+ archive_file_set.reap(self.archive, container=container)
+
+ # Gather information.
+ archive_files = archive_file_set.getByArchive(
+ self.archive, container=container, eager_load=True)
+ active_files = {}
+ for active_entry in release_data["SHA256"]:
+ path = os.path.join(suite_dir, active_entry["name"])
+ active_files[path] = (active_entry["size"], active_entry["sha256"])
+
+ # Ensure that all files recorded in the database are in by-hash.
+ current_files = {}
+ for archive_file in archive_files:
+ by_hashes.add(archive_file.path, archive_file.library_file)
+ if archive_file.scheduled_deletion_date is None:
+ current_files[archive_file.path] = archive_file
+
+ # Supersede any database records that do not correspond to active
+ # index files.
+ superseded_files = set()
+ for archive_file in archive_files:
+ path = archive_file.path
+ if (path not in active_files or
+ not by_hashes.exists(
+ path, "SHA256", active_files[path][1])):
+ superseded_files.add(archive_file)
+ archive_file_set.scheduleDeletion(
+ superseded_files, timedelta(days=BY_HASH_STAY_OF_EXECUTION))
+
+ # Ensure that all the active index files are in by-hash and have
+ # corresponding database entries.
+ # XXX cjwatson 2016-03-15: This should possibly use bulk creation,
+ # although we can only avoid about a third of the queries since the
+ # librarian client has no bulk upload methods.
+ for path, (size, sha256) in active_files.items():
+ full_path = os.path.join(self._config.archiveroot, path)
+ if (os.path.exists(full_path) and
+ not by_hashes.exists(path, "SHA256", sha256)):
+ archive_file = archive_file_set.newFromFile(
+ self.archive, container, self._config.archiveroot, path,
+ size, filenameToContentType(path))
+ by_hashes.add(
+ path, archive_file.library_file, copy_from_path=path)
+
+ # Finally, remove any files from disk that aren't recorded in the
+ # database and aren't active.
+ by_hashes.prune()
+
def _writeReleaseFile(self, suite, release_data):
"""Write a Release file to the archive.
@@ -907,6 +1080,10 @@
release_file.setdefault("SHA1", []).append(hashes["sha1"])
release_file.setdefault("SHA256", []).append(hashes["sha256"])
+ if distroseries.publish_by_hash:
+ self._updateByHash(suite, release_file)
+ release_file["Acquire-By-Hash"] = "yes"
+
self._writeReleaseFile(suite, release_file)
core_files.add("Release")
@@ -1014,16 +1191,14 @@
# Schedule this for inclusion in the Release file.
all_series_files.add(os.path.join(component, "i18n", "Index"))
- def _readIndexFileHashes(self, distroseries_name, file_name,
- subpath=None):
+ def _readIndexFileHashes(self, suite, file_name, subpath=None):
"""Read an index file and return its hashes.
- :param distroseries_name: Distro series name
+ :param suite: Suite name.
:param file_name: Filename relative to the parent container directory.
- :param subpath: Optional subpath within the distroseries root.
- Generated indexes will not include this path. If omitted,
- filenames are assumed to be relative to the distroseries
- root.
+ :param subpath: Optional subpath within the suite root. Generated
+ indexes will not include this path. If omitted, filenames are
+ assumed to be relative to the suite root.
:return: A dictionary mapping hash field names to dictionaries of
their components as defined by debian.deb822.Release (e.g.
{"md5sum": {"md5sum": ..., "size": ..., "name": ...}}), or None
@@ -1031,8 +1206,7 @@
"""
open_func = open
full_name = os.path.join(
- self._config.distsroot, distroseries_name, subpath or '.',
- file_name)
+ self._config.distsroot, suite, subpath or '.', file_name)
if not os.path.exists(full_name):
if os.path.exists(full_name + '.gz'):
open_func = gzip.open
=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py 2016-03-11 11:45:56 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py 2016-03-17 14:51:01 +0000
@@ -7,6 +7,10 @@
import bz2
import crypt
+from datetime import (
+ datetime,
+ timedelta,
+ )
from functools import partial
import gzip
import hashlib
@@ -22,9 +26,17 @@
import lzma
except ImportError:
from backports import lzma
+import pytz
from testtools.matchers import (
ContainsAll,
+ DirContains,
+ Equals,
+ FileContains,
LessThan,
+ Matcher,
+ MatchesSetwise,
+ Not,
+ PathExists,
)
import transaction
from zope.component import getUtility
@@ -36,6 +48,8 @@
IArchiveSigningKey,
)
from lp.archivepublisher.publishing import (
+ ByHash,
+ ByHashes,
getPublisher,
I18nIndex,
Publisher,
@@ -58,6 +72,7 @@
BufferLogger,
DevNullLogger,
)
+from lp.services.osutils import open_for_writing
from lp.services.utils import file_exists
from lp.soyuz.enums import (
ArchivePurpose,
@@ -68,12 +83,16 @@
PackageUploadStatus,
)
from lp.soyuz.interfaces.archive import IArchiveSet
+from lp.soyuz.interfaces.archivefile import IArchiveFileSet
from lp.soyuz.tests.test_publishing import TestNativePublishingBase
from lp.testing import TestCaseWithFactory
from lp.testing.fakemethod import FakeMethod
from lp.testing.gpgkeys import gpgkeysdir
from lp.testing.keyserver import KeyServerTac
-from lp.testing.layers import ZopelessDatabaseLayer
+from lp.testing.layers import (
+ LaunchpadZopelessLayer,
+ ZopelessDatabaseLayer,
+ )
RELEASE = PackagePublishingPocket.RELEASE
@@ -423,6 +442,223 @@
'i386', publications[0].distroarchseries.architecturetag)
+class ByHashHasContents(Matcher):
+ """Matches if a by-hash directory has exactly the specified contents."""
+
+ def __init__(self, contents):
+ self.contents = contents
+
+ def match(self, by_hash_path):
+ mismatch = DirContains(["MD5Sum", "SHA1", "SHA256"]).match(
+ by_hash_path)
+ if mismatch is not None:
+ return mismatch
+ for hashname, hashattr in (
+ ("MD5Sum", "md5"), ("SHA1", "sha1"), ("SHA256", "sha256")):
+ digests = {
+ getattr(hashlib, hashattr)(content).hexdigest(): content
+ for content in self.contents}
+ path = os.path.join(by_hash_path, hashname)
+ mismatch = DirContains(digests.keys()).match(path)
+ if mismatch is not None:
+ return mismatch
+ for digest, content in digests.items():
+ mismatch = FileContains(content).match(
+ os.path.join(path, digest))
+ if mismatch is not None:
+ return mismatch
+
+
+class ByHashesHaveContents(Matcher):
+ """Matches if only these by-hash directories exist with proper contents."""
+
+ def __init__(self, path_contents):
+ self.path_contents = path_contents
+
+ def match(self, root):
+ children = set()
+ for dirpath, dirnames, _ in os.walk(root):
+ if "by-hash" in dirnames:
+ children.add(os.path.relpath(dirpath, root))
+ mismatch = MatchesSetwise(
+ *(Equals(path) for path in self.path_contents)).match(children)
+ if mismatch is not None:
+ return mismatch
+ for path, contents in self.path_contents.items():
+ by_hash_path = os.path.join(root, path, "by-hash")
+ mismatch = ByHashHasContents(contents).match(by_hash_path)
+ if mismatch is not None:
+ return mismatch
+
+
+class TestByHash(TestCaseWithFactory):
+ """Unit tests for details of handling a single by-hash directory tree."""
+
+ layer = LaunchpadZopelessLayer
+
+ def test_add(self):
+ root = self.makeTemporaryDirectory()
+ contents = ["abc\n", "def\n"]
+ lfas = [
+ self.factory.makeLibraryFileAlias(content=content)
+ for content in contents]
+ transaction.commit()
+ by_hash = ByHash(root, "dists/foo/main/source")
+ for lfa in lfas:
+ by_hash.add(lfa)
+ by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
+ self.assertThat(by_hash_path, ByHashHasContents(contents))
+
+ def test_add_copy_from_path(self):
+ root = self.makeTemporaryDirectory()
+ content = "abc\n"
+ sources_path = "dists/foo/main/source/Sources"
+ with open_for_writing(
+ os.path.join(root, sources_path), "w") as sources:
+ sources.write(content)
+ lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True)
+ by_hash = ByHash(root, "dists/foo/main/source")
+ by_hash.add(lfa, copy_from_path=sources_path)
+ by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
+ self.assertThat(by_hash_path, ByHashHasContents([content]))
+
+ def test_add_existing(self):
+ root = self.makeTemporaryDirectory()
+ content = "abc\n"
+ lfa = self.factory.makeLibraryFileAlias(content=content)
+ by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
+ for hashname, hashattr in (
+ ("MD5Sum", "md5"), ("SHA1", "sha1"), ("SHA256", "sha256")):
+ digest = getattr(hashlib, hashattr)(content).hexdigest()
+ with open_for_writing(
+ os.path.join(by_hash_path, hashname, digest), "w") as f:
+ f.write(content)
+ by_hash = ByHash(root, "dists/foo/main/source")
+ self.assertThat(by_hash_path, ByHashHasContents([content]))
+ by_hash.add(lfa)
+ self.assertThat(by_hash_path, ByHashHasContents([content]))
+
+ def test_exists(self):
+ root = self.makeTemporaryDirectory()
+ content = "abc\n"
+ with open_for_writing(os.path.join(root, "abc"), "w") as f:
+ f.write(content)
+ lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True)
+ by_hash = ByHash(root, "")
+ md5 = hashlib.md5(content).hexdigest()
+ sha1 = hashlib.sha1(content).hexdigest()
+ sha256 = hashlib.sha256(content).hexdigest()
+ self.assertFalse(by_hash.exists("MD5Sum", md5))
+ self.assertFalse(by_hash.exists("SHA1", sha1))
+ self.assertFalse(by_hash.exists("SHA256", sha256))
+ by_hash.add(lfa, copy_from_path="abc")
+ self.assertTrue(by_hash.exists("MD5Sum", md5))
+ self.assertTrue(by_hash.exists("SHA1", sha1))
+ self.assertTrue(by_hash.exists("SHA256", sha256))
+
+ def test_prune(self):
+ root = self.makeTemporaryDirectory()
+ content = "abc\n"
+ sources_path = "dists/foo/main/source/Sources"
+ with open_for_writing(os.path.join(root, sources_path), "w") as f:
+ f.write(content)
+ lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True)
+ by_hash = ByHash(root, "dists/foo/main/source")
+ by_hash.add(lfa, copy_from_path=sources_path)
+ by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
+ with open_for_writing(os.path.join(by_hash_path, "MD5Sum/0"), "w"):
+ pass
+ self.assertThat(by_hash_path, Not(ByHashHasContents([content])))
+ by_hash.prune()
+ self.assertThat(by_hash_path, ByHashHasContents([content]))
+
+ def test_prune_empty(self):
+ root = self.makeTemporaryDirectory()
+ by_hash = ByHash(root, "dists/foo/main/source")
+ by_hash_path = os.path.join(root, "dists/foo/main/source/by-hash")
+ with open_for_writing(os.path.join(by_hash_path, "MD5Sum/0"), "w"):
+ pass
+ self.assertThat(by_hash_path, PathExists())
+ by_hash.prune()
+ self.assertThat(by_hash_path, Not(PathExists()))
+
+
+class TestByHashes(TestCaseWithFactory):
+ """Unit tests for details of handling a set of by-hash directory trees."""
+
+ layer = LaunchpadZopelessLayer
+
+ def test_add(self):
+ root = self.makeTemporaryDirectory()
+ self.assertThat(root, ByHashesHaveContents({}))
+ path_contents = {
+ "dists/foo/main/source": {"Sources": "abc\n"},
+ "dists/foo/main/binary-amd64": {
+ "Packages.gz": "def\n", "Packages.xz": "ghi\n"},
+ }
+ by_hashes = ByHashes(root)
+ for dirpath, contents in path_contents.items():
+ for name, content in contents.items():
+ path = os.path.join(dirpath, name)
+ with open_for_writing(os.path.join(root, path), "w") as f:
+ f.write(content)
+ lfa = self.factory.makeLibraryFileAlias(
+ content=content, db_only=True)
+ by_hashes.add(path, lfa, copy_from_path=path)
+ self.assertThat(root, ByHashesHaveContents({
+ path: contents.values()
+ for path, contents in path_contents.items()}))
+
+ def test_exists(self):
+ root = self.makeTemporaryDirectory()
+ content = "abc\n"
+ sources_path = "dists/foo/main/source/Sources"
+ with open_for_writing(os.path.join(root, sources_path), "w") as f:
+ f.write(content)
+ lfa = self.factory.makeLibraryFileAlias(content=content, db_only=True)
+ by_hashes = ByHashes(root)
+ md5 = hashlib.md5(content).hexdigest()
+ sha1 = hashlib.sha1(content).hexdigest()
+ sha256 = hashlib.sha256(content).hexdigest()
+ self.assertFalse(by_hashes.exists(sources_path, "MD5Sum", md5))
+ self.assertFalse(by_hashes.exists(sources_path, "SHA1", sha1))
+ self.assertFalse(by_hashes.exists(sources_path, "SHA256", sha256))
+ by_hashes.add(sources_path, lfa, copy_from_path=sources_path)
+ self.assertTrue(by_hashes.exists(sources_path, "MD5Sum", md5))
+ self.assertTrue(by_hashes.exists(sources_path, "SHA1", sha1))
+ self.assertTrue(by_hashes.exists(sources_path, "SHA256", sha256))
+
+ def test_prune(self):
+ root = self.makeTemporaryDirectory()
+ path_contents = {
+ "dists/foo/main/source": {"Sources": "abc\n"},
+ "dists/foo/main/binary-amd64": {
+ "Packages.gz": "def\n", "Packages.xz": "ghi\n"},
+ }
+ by_hashes = ByHashes(root)
+ for dirpath, contents in path_contents.items():
+ for name, content in contents.items():
+ path = os.path.join(dirpath, name)
+ with open_for_writing(os.path.join(root, path), "w") as f:
+ f.write(content)
+ lfa = self.factory.makeLibraryFileAlias(
+ content=content, db_only=True)
+ by_hashes.add(path, lfa, copy_from_path=path)
+ strays = [
+ "dists/foo/main/source/by-hash/MD5Sum/0",
+ "dists/foo/main/binary-amd64/by-hash/MD5Sum/0",
+ ]
+ for stray in strays:
+ with open_for_writing(os.path.join(root, stray), "w"):
+ pass
+ matcher = ByHashesHaveContents({
+ path: contents.values()
+ for path, contents in path_contents.items()})
+ self.assertThat(root, Not(matcher))
+ by_hashes.prune()
+ self.assertThat(root, matcher)
+
+
class TestPublisher(TestPublisherBase):
"""Testing `Publisher` behaviour."""
@@ -1557,6 +1793,34 @@
# are marked as dirty.
self.checkDirtyPockets(publisher, expected=allowed_suites)
+ def testDirtyingPocketsWithReapableArchiveFiles(self):
+ """Pockets are dirty if they contain reapable archive files."""
+ allowed_suites = []
+ publisher = getPublisher(
+ self.ubuntutest.main_archive, allowed_suites, self.logger)
+ publisher.A2_markPocketsWithDeletionsDirty()
+ self.checkDirtyPockets(publisher, expected=[])
+
+ lfa = self.factory.makeLibraryFileAlias()
+ getUtility(IArchiveFileSet).new(
+ self.ubuntutest.main_archive, u"stray", u"foo", lfa)
+ publisher.A2_markPocketsWithDeletionsDirty()
+ self.checkDirtyPockets(publisher, expected=[])
+
+ archive_file = getUtility(IArchiveFileSet).new(
+ self.ubuntutest.main_archive, u"release:breezy-autotest", u"foo",
+ lfa)
+ publisher.A2_markPocketsWithDeletionsDirty()
+ self.checkDirtyPockets(publisher, expected=[])
+
+ removeSecurityProxy(archive_file).scheduled_deletion_date = (
+ datetime.now(pytz.UTC) - timedelta(days=1))
+ publisher.A2_markPocketsWithDeletionsDirty()
+ expected_dirty_pockets = [
+ ('breezy-autotest', PackagePublishingPocket.RELEASE),
+ ]
+ self.checkDirtyPockets(publisher, expected=expected_dirty_pockets)
+
def testReleaseFile(self):
"""Test release file writing.
@@ -1908,6 +2172,160 @@
os.stat(os.path.join(dep11_path, name)).st_mtime,
LessThan(now - 59))
+ def testUpdateByHashDisabled(self):
+ # The publisher does not create by-hash directories if it is
+ # disabled in the series configuration.
+ self.assertFalse(self.breezy_autotest.publish_by_hash)
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+
+ self.getPubSource(filecontent='Source: foo\n')
+
+ publisher.A_publish(False)
+ publisher.C_doFTPArchive(False)
+ publisher.D_writeReleaseFiles(False)
+
+ suite_path = partial(
+ os.path.join, self.config.distsroot, 'breezy-autotest')
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'), Not(PathExists()))
+ release = self.parseRelease(suite_path('Release'))
+ self.assertNotIn('Acquire-By-Hash', release)
+
+ def testUpdateByHashInitial(self):
+ # An initial publisher run populates by-hash directories and leaves
+ # no archive files scheduled for deletion.
+ self.breezy_autotest.publish_by_hash = True
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+
+ self.getPubSource(filecontent='Source: foo\n')
+
+ publisher.A_publish(False)
+ publisher.C_doFTPArchive(False)
+ publisher.D_writeReleaseFiles(False)
+
+ suite_path = partial(
+ os.path.join, self.config.distsroot, 'breezy-autotest')
+ contents = []
+ for name in ('Release', 'Sources.gz', 'Sources.bz2'):
+ with open(suite_path('main', 'source', name), 'rb') as f:
+ contents.append(f.read())
+
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(contents))
+
+ archive_files = getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive)
+ self.assertNotEqual([], archive_files)
+ self.assertEqual([], [
+ archive_file for archive_file in archive_files
+ if archive_file.scheduled_deletion_date is not None])
+
+ def testUpdateByHashSubsequent(self):
+ # A subsequent publisher run updates by-hash directories where
+ # necessary, and marks inactive index files for later deletion.
+ self.breezy_autotest.publish_by_hash = True
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+
+ self.getPubSource(filecontent='Source: foo\n')
+
+ publisher.A_publish(False)
+ publisher.C_doFTPArchive(False)
+ publisher.D_writeReleaseFiles(False)
+
+ suite_path = partial(
+ os.path.join, self.config.distsroot, 'breezy-autotest')
+ main_contents = []
+ universe_contents = []
+ for name in ('Release', 'Sources.gz', 'Sources.bz2'):
+ with open(suite_path('main', 'source', name), 'rb') as f:
+ main_contents.append(f.read())
+ with open(suite_path('universe', 'source', name), 'rb') as f:
+ universe_contents.append(f.read())
+
+ self.getPubSource(sourcename='baz', filecontent='Source: baz\n')
+
+ publisher.A_publish(False)
+ publisher.C_doFTPArchive(False)
+ publisher.D_writeReleaseFiles(False)
+
+ for name in ('Release', 'Sources.gz', 'Sources.bz2'):
+ with open(suite_path('main', 'source', name), 'rb') as f:
+ main_contents.append(f.read())
+
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(main_contents))
+ self.assertThat(
+ suite_path('universe', 'source', 'by-hash'),
+ ByHashHasContents(universe_contents))
+
+ archive_files = getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive)
+ self.assertContentEqual(
+ ['dists/breezy-autotest/main/source/Sources.bz2',
+ 'dists/breezy-autotest/main/source/Sources.gz'],
+ [archive_file.path for archive_file in archive_files
+ if archive_file.scheduled_deletion_date is not None])
+
+ def testUpdateByHashPrune(self):
+ # The publisher prunes files from by-hash that were superseded more
+ # than a day ago.
+ self.breezy_autotest.publish_by_hash = True
+ publisher = Publisher(
+ self.logger, self.config, self.disk_pool,
+ self.ubuntutest.main_archive)
+
+ suite_path = partial(
+ os.path.join, self.config.distsroot, 'breezy-autotest')
+ main_contents = set()
+ for sourcename in ('foo', 'bar'):
+ self.getPubSource(
+ sourcename=sourcename, filecontent='Source: %s\n' % sourcename)
+ publisher.A_publish(False)
+ publisher.C_doFTPArchive(False)
+ publisher.D_writeReleaseFiles(False)
+ for name in ('Release', 'Sources.gz', 'Sources.bz2'):
+ with open(suite_path('main', 'source', name), 'rb') as f:
+ main_contents.add(f.read())
+ transaction.commit()
+
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(main_contents))
+ old_archive_files = []
+ for archive_file in getUtility(IArchiveFileSet).getByArchive(
+ self.ubuntutest.main_archive):
+ if ('main/source' in archive_file.path and
+ archive_file.scheduled_deletion_date is not None):
+ old_archive_files.append(archive_file)
+ self.assertEqual(2, len(old_archive_files))
+
+ now = datetime.now(pytz.UTC)
+ removeSecurityProxy(old_archive_files[0]).scheduled_deletion_date = (
+ now + timedelta(hours=12))
+ removeSecurityProxy(old_archive_files[1]).scheduled_deletion_date = (
+ now - timedelta(hours=12))
+ old_archive_files[1].library_file.open()
+ try:
+ main_contents.remove(old_archive_files[1].library_file.read())
+ finally:
+ old_archive_files[1].library_file.close()
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ Not(ByHashHasContents(main_contents)))
+
+ publisher.D_writeReleaseFiles(True)
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(main_contents))
+
def testCreateSeriesAliasesNoAlias(self):
"""createSeriesAliases has nothing to do by default."""
publisher = Publisher(
=== modified file 'lib/lp/services/helpers.py'
--- lib/lp/services/helpers.py 2014-05-07 15:28:50 +0000
+++ lib/lp/services/helpers.py 2016-03-17 14:51:01 +0000
@@ -10,6 +10,7 @@
__metaclass__ = type
+from collections import OrderedDict
from difflib import unified_diff
import re
from StringIO import StringIO
@@ -224,19 +225,37 @@
>>> filenameToContentType('test.tgz')
'application/octet-stream'
+
+ Build logs
+ >>> filenameToContentType('buildlog.txt.gz')
+ 'text/plain'
+
+ Various compressed files
+
+ >>> filenameToContentType('Packages.gz')
+ 'application/x-gzip'
+ >>> filenameToContentType('Packages.bz2')
+ 'application/x-bzip2'
+ >>> filenameToContentType('Packages.xz')
+ 'application/x-xz'
"""
- ftmap = {".dsc": "text/plain",
- ".changes": "text/plain",
- ".deb": "application/x-debian-package",
- ".udeb": "application/x-debian-package",
- ".txt": "text/plain",
- # For the build master logs
- ".txt.gz": "text/plain",
- # For live filesystem builds
- ".manifest": "text/plain",
- ".manifest-remove": "text/plain",
- ".size": "text/plain",
- }
+ ftmap = OrderedDict([
+ (".dsc", "text/plain"),
+ (".changes", "text/plain"),
+ (".deb", "application/x-debian-package"),
+ (".udeb", "application/x-debian-package"),
+ (".txt", "text/plain"),
+ # For the build master logs
+ (".txt.gz", "text/plain"),
+ # For live filesystem builds
+ (".manifest", "text/plain"),
+ (".manifest-remove", "text/plain"),
+ (".size", "text/plain"),
+ # Compressed files
+ (".gz", "application/x-gzip"),
+ (".bz2", "application/x-bzip2"),
+ (".xz", "application/x-xz"),
+ ])
for ending in ftmap:
if fname.endswith(ending):
return ftmap[ending]
Follow ups