← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:archive-get-pool-file-by-path into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:archive-get-pool-file-by-path into launchpad:master.

Commit message:
Add Archive.getPoolFileByPath

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/430331

This will soon be useful as part of a system to look up files in archives without reference to files published on a local file system.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:archive-get-pool-file-by-path into launchpad:master.
diff --git a/lib/lp/archivepublisher/diskpool.py b/lib/lp/archivepublisher/diskpool.py
index 17cbd99..c7b2576 100644
--- a/lib/lp/archivepublisher/diskpool.py
+++ b/lib/lp/archivepublisher/diskpool.py
@@ -6,13 +6,14 @@ __all__ = [
     "DiskPoolEntry",
     "FileAddActionEnum",
     "poolify",
+    "unpoolify",
 ]
 
 import logging
 import os
 import tempfile
-from pathlib import Path
-from typing import TYPE_CHECKING, Optional, Union
+from pathlib import Path, PurePath
+from typing import TYPE_CHECKING, Optional, Tuple, Union
 
 from lp.archivepublisher import HARDCODED_COMPONENT_ORDER
 from lp.services.librarian.utils import copy_and_close, sha1_from_path
@@ -26,17 +27,57 @@ from lp.soyuz.interfaces.publishing import (
 )
 
 
-def poolify(source: str, component: Optional[str] = None) -> Path:
-    """Poolify a given source and component name."""
+def get_source_prefix(source: str) -> str:
+    """Get the prefix for a pooled source package name.
+
+    In the Debian repository format, packages are published to directories
+    of the form `pool/<component>/<source prefix>/<source name>/`, perhaps
+    best described here::
+
+        https://lists.debian.org/debian-devel/2000/10/msg01340.html
+
+    The directory here called `<source prefix>` (there doesn't seem to be a
+    canonical term for this) is formed by taking the first character of the
+    source name, except when the source name starts with "lib" in which case
+    it's formed by taking the first four characters of the source name.
+    This was originally in order to behave reasonably on file systems such
+    as ext2, but is now entrenched and expected.
+    """
     if source.startswith("lib"):
-        path = Path(source[:4]) / source
+        return source[:4]
     else:
-        path = Path(source[:1]) / source
+        return source[:1]
+
+
+def poolify(source: str, component: Optional[str] = None) -> Path:
+    """Poolify a given source and component name."""
+    path = Path(get_source_prefix(source)) / source
     if component is not None:
         path = Path(component) / path
     return path
 
 
+def unpoolify(path: PurePath) -> Tuple[str, str, Optional[str]]:
+    """Take a path and unpoolify it.
+
+    Return a tuple of component, source, filename.
+    """
+    p = path.parts
+    if len(p) < 3 or len(p) > 4:
+        raise ValueError(
+            "Path '%s' is not in a valid pool form" % path.as_posix()
+        )
+    component, source_prefix, source = p[:3]
+    if source_prefix != get_source_prefix(source):
+        raise ValueError(
+            "Source prefix '%s' does not match source '%s'"
+            % (source_prefix, source)
+        )
+    if len(p) == 4:
+        return component, source, p[3]
+    return component, source, None
+
+
 def relative_symlink(src_path: Path, dst_path: Path) -> None:
     """Path.symlink_to replacement that creates relative symbolic links."""
     src_path = Path(os.path.normpath(str(src_path)))
diff --git a/lib/lp/archivepublisher/tests/test_pool.py b/lib/lp/archivepublisher/tests/test_pool.py
index 3c795e6..a4d4cf5 100644
--- a/lib/lp/archivepublisher/tests/test_pool.py
+++ b/lib/lp/archivepublisher/tests/test_pool.py
@@ -4,15 +4,12 @@
 """Tests for pool.py."""
 
 import hashlib
-import shutil
-import unittest
-from pathlib import Path
-from tempfile import mkdtemp
+from pathlib import Path, PurePath
 
 from lazr.enum import EnumeratedType, Item
 from zope.interface import alsoProvides, implementer
 
-from lp.archivepublisher.diskpool import DiskPool, poolify
+from lp.archivepublisher.diskpool import DiskPool, poolify, unpoolify
 from lp.services.log.logger import BufferLogger
 from lp.soyuz.enums import ArchiveRepositoryFormat
 from lp.soyuz.interfaces.files import (
@@ -20,6 +17,7 @@ from lp.soyuz.interfaces.files import (
     IPackageReleaseFile,
     ISourcePackageReleaseFile,
 )
+from lp.testing import TestCase
 
 
 class FakeArchive:
@@ -148,8 +146,8 @@ class PoolTestingFile:
         return self.checkExists(component) and not self.checkIsLink(component)
 
 
-class TestPoolification(unittest.TestCase):
-    def testPoolificationOkay(self):
+class TestPoolification(TestCase):
+    def test_poolify_ok(self):
         """poolify should poolify properly"""
         cases = (
             ("foo", "main", Path("main/f/foo")),
@@ -159,19 +157,50 @@ class TestPoolification(unittest.TestCase):
         for case in cases:
             self.assertEqual(case[2], poolify(case[0], case[1]))
 
+    def test_unpoolify_ok(self):
+        cases = (
+            (PurePath("main/f/foo"), "main", "foo", None),
+            (PurePath("main/f/foo/foo_1.0.dsc"), "main", "foo", "foo_1.0.dsc"),
+            (PurePath("universe/f/foo"), "universe", "foo", None),
+            (PurePath("main/libf/libfoo"), "main", "libfoo", None),
+        )
+        for path, component, source, filename in cases:
+            self.assertEqual((component, source, filename), unpoolify(path))
+
+    def test_unpoolify_too_short(self):
+        self.assertRaisesWithContent(
+            ValueError,
+            "Path 'main' is not in a valid pool form",
+            unpoolify,
+            PurePath("main"),
+        )
+
+    def test_unpoolify_too_long(self):
+        self.assertRaisesWithContent(
+            ValueError,
+            "Path 'main/f/foo/bar/baz' is not in a valid pool form",
+            unpoolify,
+            PurePath("main/f/foo/bar/baz"),
+        )
 
-class TestPool(unittest.TestCase):
+    def test_unpoolify_prefix_mismatch(self):
+        self.assertRaisesWithContent(
+            ValueError,
+            "Source prefix 'a' does not match source 'foo'",
+            unpoolify,
+            PurePath("main/a/foo"),
+        )
+
+
+class TestPool(TestCase):
     def setUp(self):
-        self.pool_path = mkdtemp()
-        self.temp_path = mkdtemp()
+        super().setUp()
+        self.pool_path = self.makeTemporaryDirectory()
+        self.temp_path = self.makeTemporaryDirectory()
         self.pool = DiskPool(
             FakeArchive(), self.pool_path, self.temp_path, BufferLogger()
         )
 
-    def tearDown(self):
-        shutil.rmtree(self.pool_path)
-        shutil.rmtree(self.temp_path)
-
     def testSimpleAdd(self):
         """Adding a new file should work."""
         foo = PoolTestingFile(
diff --git a/lib/lp/soyuz/interfaces/archive.py b/lib/lp/soyuz/interfaces/archive.py
index ab50238..37b81a7 100644
--- a/lib/lp/soyuz/interfaces/archive.py
+++ b/lib/lp/soyuz/interfaces/archive.py
@@ -792,6 +792,15 @@ class IArchiveSubscriberView(Interface):
         :return: A collection containing `BinaryPackagePublishingHistory`.
         """
 
+    def getPoolFileByPath(path):
+        """Return the `ILibraryFileAlias` for a path in this archive's pool.
+
+        :param path: A `PurePath` for where a source or binary package file
+            is published in this archive's pool, e.g.
+            "pool/main/p/package/package_1.dsc".
+        :return: An `ILibraryFileAlias`, or None.
+        """
+
 
 class IArchiveView(IHasBuildRecords):
     """Archive interface for operations restricted by view privilege."""
diff --git a/lib/lp/soyuz/model/archive.py b/lib/lp/soyuz/model/archive.py
index 02581d4..f14b8f2 100644
--- a/lib/lp/soyuz/model/archive.py
+++ b/lib/lp/soyuz/model/archive.py
@@ -14,6 +14,7 @@ __all__ = [
 import re
 import typing
 from operator import attrgetter
+from pathlib import PurePath
 
 import six
 from lazr.lifecycle.event import ObjectCreatedEvent
@@ -49,8 +50,13 @@ from lp.app.interfaces.launchpad import ILaunchpadCelebrities
 from lp.app.interfaces.security import IAuthorization
 from lp.app.validators.name import valid_name
 from lp.archivepublisher.debversion import Version
+from lp.archivepublisher.diskpool import unpoolify
 from lp.archivepublisher.interfaces.publisherconfig import IPublisherConfigSet
-from lp.archiveuploader.utils import re_isadeb, re_issource
+from lp.archiveuploader.utils import (
+    determine_binary_file_type,
+    re_isadeb,
+    re_issource,
+)
 from lp.buildmaster.enums import BuildQueueStatus, BuildStatus
 from lp.buildmaster.interfaces.buildfarmjob import IBuildFarmJobSet
 from lp.buildmaster.interfaces.processor import IProcessorSet
@@ -2013,6 +2019,68 @@ class Archive(SQLBase):
             raise NotFoundError(filename)
         return archive_file
 
+    def getPoolFileByPath(
+        self, path: PurePath
+    ) -> typing.Optional[LibraryFileAlias]:
+        """See `IArchive`."""
+        try:
+            component, source, filename = unpoolify(PurePath(*path.parts[1:]))
+        except ValueError:
+            return None
+        if filename is None:
+            return None
+
+        store = IStore(LibraryFileAlias)
+        clauses = [
+            Component.name == component,
+            SourcePackageName.name == source,
+            LibraryFileAlias.filename == filename,
+        ]
+        # Decide whether to look for source or binary publications.  We
+        # could just try both and UNION them, but this query is likely to be
+        # hot and is complex enough as it is, so don't push our luck.
+        binary = determine_binary_file_type(filename) is not None
+        if binary:
+            xPPH = BinaryPackagePublishingHistory
+            xPF = BinaryPackageFile
+            # XXX cjwatson 20220922: Simplify this once
+            # BinaryPackagePublishingHistory.sourcepackagename has finished
+            # populating.
+            clauses.extend(
+                [
+                    BinaryPackagePublishingHistory.binarypackagerelease
+                    == BinaryPackageRelease.id,
+                    BinaryPackageRelease.build == BinaryPackageBuild.id,
+                    BinaryPackageBuild.source_package_name
+                    == SourcePackageName.id,
+                    BinaryPackagePublishingHistory.binarypackagerelease
+                    == BinaryPackageFile.binarypackagereleaseID,
+                ]
+            )
+        else:
+            xPPH = SourcePackagePublishingHistory
+            xPF = SourcePackageReleaseFile
+            clauses.extend(
+                [
+                    SourcePackagePublishingHistory.sourcepackagename
+                    == SourcePackageName.id,
+                    SourcePackagePublishingHistory.sourcepackagerelease
+                    == SourcePackageReleaseFile.sourcepackagereleaseID,
+                ]
+            )
+        clauses.extend(
+            [
+                xPPH.archive == self,
+                xPPH.component == Component.id,
+                xPPH.datepublished != None,
+                xPPH.dateremoved == None,
+                xPF.libraryfile == LibraryFileAlias.id,
+            ]
+        )
+        return (
+            store.find(LibraryFileAlias, *clauses).config(distinct=True).one()
+        )
+
     def getBinaryPackageRelease(self, name, version, archtag):
         """See `IArchive`."""
         from lp.soyuz.model.distroarchseries import DistroArchSeries
diff --git a/lib/lp/soyuz/tests/test_archive.py b/lib/lp/soyuz/tests/test_archive.py
index 6a8d792..6ab3352 100644
--- a/lib/lp/soyuz/tests/test_archive.py
+++ b/lib/lp/soyuz/tests/test_archive.py
@@ -7,6 +7,7 @@ import doctest
 import http.client
 import os.path
 from datetime import date, datetime, timedelta
+from pathlib import PurePath
 from urllib.parse import urlsplit
 
 import responses
@@ -3204,6 +3205,219 @@ class TestGetSourceFileByName(TestCaseWithFactory):
         )
 
 
+class TestGetPoolFileByPath(TestCaseWithFactory):
+
+    layer = DatabaseFunctionalLayer
+
+    def test_file_name_too_short(self):
+        archive = self.factory.makeArchive()
+        self.assertIsNone(
+            archive.getPoolFileByPath(PurePath("pool/nonexistent"))
+        )
+
+    def test_file_name_too_long(self):
+        archive = self.factory.makeArchive()
+        self.assertIsNone(
+            archive.getPoolFileByPath(
+                PurePath("pool/main/p/package/nonexistent/path")
+            )
+        )
+
+    def test_mismatched_source_prefix(self):
+        archive = self.factory.makeArchive()
+        spph = self.factory.makeSourcePackagePublishingHistory(
+            archive=archive,
+            status=PackagePublishingStatus.PUBLISHED,
+            sourcepackagename="test-package",
+            component="main",
+        )
+        self.factory.makeSourcePackageReleaseFile(
+            sourcepackagerelease=spph.sourcepackagerelease,
+            library_file=self.factory.makeLibraryFileAlias(
+                filename="test-package_1.dsc", db_only=True
+            ),
+        )
+        self.assertIsNone(
+            archive.getPoolFileByPath(
+                PurePath("pool/main/q/test-package/test-package_1.dsc")
+            )
+        )
+
+    def test_source_not_found(self):
+        archive = self.factory.makeArchive()
+        self.factory.makeSourcePackagePublishingHistory(
+            archive=archive,
+            status=PackagePublishingStatus.PUBLISHED,
+            sourcepackagename="test-package",
+            component="main",
+        )
+        self.assertIsNone(
+            archive.getPoolFileByPath(
+                PurePath("pool/main/t/test-package/test-package_1.dsc")
+            )
+        )
+
+    def test_source_wrong_component(self):
+        archive = self.factory.makeArchive()
+        spph = self.factory.makeSourcePackagePublishingHistory(
+            archive=archive,
+            status=PackagePublishingStatus.PUBLISHED,
+            sourcepackagename="test-package",
+            component="main",
+        )
+        self.factory.makeSourcePackageReleaseFile(
+            sourcepackagerelease=spph.sourcepackagerelease,
+            library_file=self.factory.makeLibraryFileAlias(
+                filename="test-package_1.dsc", db_only=True
+            ),
+        )
+        self.assertIsNone(
+            archive.getPoolFileByPath(
+                PurePath("pool/universe/t/test-package/test-package_1.dsc")
+            )
+        )
+
+    def test_source_wrong_source_package_name(self):
+        archive = self.factory.makeArchive()
+        spph = self.factory.makeSourcePackagePublishingHistory(
+            archive=archive,
+            status=PackagePublishingStatus.PUBLISHED,
+            sourcepackagename="test-package",
+            component="main",
+        )
+        self.factory.makeSourcePackageReleaseFile(
+            sourcepackagerelease=spph.sourcepackagerelease,
+            library_file=self.factory.makeLibraryFileAlias(
+                filename="test-package_1.dsc", db_only=True
+            ),
+        )
+        self.assertIsNone(
+            archive.getPoolFileByPath(
+                PurePath("pool/main/o/other-package/test-package_1.dsc")
+            )
+        )
+
+    def test_source_found(self):
+        archive = self.factory.makeArchive()
+        spph = self.factory.makeSourcePackagePublishingHistory(
+            archive=archive,
+            status=PackagePublishingStatus.PUBLISHED,
+            sourcepackagename="test-package",
+            component="main",
+        )
+        sprf = self.factory.makeSourcePackageReleaseFile(
+            sourcepackagerelease=spph.sourcepackagerelease,
+            library_file=self.factory.makeLibraryFileAlias(
+                filename="test-package_1.dsc", db_only=True
+            ),
+        )
+        self.factory.makeSourcePackageReleaseFile(
+            sourcepackagerelease=spph.sourcepackagerelease,
+            library_file=self.factory.makeLibraryFileAlias(
+                filename="test-package_1.tar.xz", db_only=True
+            ),
+        )
+        IStore(sprf).flush()
+        self.assertEqual(
+            sprf.libraryfile,
+            archive.getPoolFileByPath(
+                PurePath("pool/main/t/test-package/test-package_1.dsc")
+            ),
+        )
+
+    def test_binary_not_found(self):
+        archive = self.factory.makeArchive()
+        self.factory.makeBinaryPackagePublishingHistory(
+            archive=archive,
+            status=PackagePublishingStatus.PUBLISHED,
+            sourcepackagename="test-package",
+            component="main",
+        )
+        self.assertIsNone(
+            archive.getPoolFileByPath(
+                PurePath("pool/main/t/test-package/test-package_1_amd64.deb")
+            )
+        )
+
+    def test_binary_wrong_component(self):
+        archive = self.factory.makeArchive()
+        bpph = self.factory.makeBinaryPackagePublishingHistory(
+            archive=archive,
+            status=PackagePublishingStatus.PUBLISHED,
+            sourcepackagename="test-package",
+            component="main",
+        )
+        self.factory.makeBinaryPackageFile(
+            binarypackagerelease=bpph.binarypackagerelease,
+            library_file=self.factory.makeLibraryFileAlias(
+                filename="test-package_1_amd64.deb", db_only=True
+            ),
+        )
+        self.assertIsNone(
+            archive.getPoolFileByPath(
+                PurePath(
+                    "pool/universe/t/test-package/test-package_1_amd64.deb"
+                )
+            )
+        )
+
+    def test_binary_wrong_source_package_name(self):
+        archive = self.factory.makeArchive()
+        bpph = self.factory.makeBinaryPackagePublishingHistory(
+            archive=archive,
+            status=PackagePublishingStatus.PUBLISHED,
+            sourcepackagename="test-package",
+            component="main",
+        )
+        self.factory.makeBinaryPackageFile(
+            binarypackagerelease=bpph.binarypackagerelease,
+            library_file=self.factory.makeLibraryFileAlias(
+                filename="test-package_1_amd64.deb", db_only=True
+            ),
+        )
+        self.assertIsNone(
+            archive.getPoolFileByPath(
+                PurePath(
+                    "pool/universe/o/other-package/test-package_1_amd64.deb"
+                )
+            )
+        )
+
+    def test_binary_found(self):
+        archive = self.factory.makeArchive()
+        bpph = self.factory.makeBinaryPackagePublishingHistory(
+            archive=archive,
+            status=PackagePublishingStatus.PUBLISHED,
+            sourcepackagename="test-package",
+            component="main",
+        )
+        bpf = self.factory.makeBinaryPackageFile(
+            binarypackagerelease=bpph.binarypackagerelease,
+            library_file=self.factory.makeLibraryFileAlias(
+                filename="test-package_1_amd64.deb", db_only=True
+            ),
+        )
+        bpph2 = self.factory.makeBinaryPackagePublishingHistory(
+            archive=archive,
+            status=PackagePublishingStatus.PUBLISHED,
+            sourcepackagename="test-package",
+            component="main",
+        )
+        self.factory.makeBinaryPackageFile(
+            binarypackagerelease=bpph2.binarypackagerelease,
+            library_file=self.factory.makeLibraryFileAlias(
+                filename="test-package_1_i386.deb", db_only=True
+            ),
+        )
+        IStore(bpf).flush()
+        self.assertEqual(
+            bpf.libraryfile,
+            archive.getPoolFileByPath(
+                PurePath("pool/main/t/test-package/test-package_1_amd64.deb")
+            ),
+        )
+
+
 class TestGetPublishedSources(TestCaseWithFactory):
 
     layer = DatabaseFunctionalLayer