← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:artifactory-multiple-orig into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:artifactory-multiple-orig into launchpad:master.

Commit message:
Fix handling of shared files when updating Artifactory properties

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #2002342 in Launchpad itself: "Required deb.* properties are not being set when publishing debs"
  https://bugs.launchpad.net/launchpad/+bug/2002342

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/435618

Some files (notably `.orig.tar.*` files in Debian-format source packages) may be shared between multiple different package versions: for example, `hello 1.0-1` and `hello 1.0-2` will normally share a file called something like `hello_1.0.orig.tar.gz` containing the original upstream source code.

For Artifactory publication, the publisher handled this case poorly.  If two versions were published that shared the same file, but then one of those versions was removed, the shared file was correctly left in place until there are no remaining publications using it; but `Publisher.C_updateArtifactoryProperties` attempted to update properties on all the files associated with the removed version rather than on just the file that was still published, resulting in a `FileNotFoundError`.

Fixing this requires working out the `IPackageReleaseFile` and `IPublishingView` objects to which a given Artifactory path "belongs", which is somewhat complex: we have to go through all the possible release objects, figure out what the paths of all their files would be if published, build mappings of paths back to files and publications, and use those mappings to update only the paths that are still published with the correct properties.

The shared files end up still having `launchpad.release-id` properties associated with the old version, but this seems to be a minor enough problem that we can ignore it for now.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:artifactory-multiple-orig into launchpad:master.
diff --git a/lib/lp/archivepublisher/artifactory.py b/lib/lp/archivepublisher/artifactory.py
index 2d16d99..b0fd664 100644
--- a/lib/lp/archivepublisher/artifactory.py
+++ b/lib/lp/archivepublisher/artifactory.py
@@ -12,7 +12,7 @@ import os
 import tempfile
 from collections import defaultdict
 from pathlib import Path, PurePath
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, Optional
 from urllib.parse import quote_plus
 
 import requests
@@ -553,14 +553,13 @@ class ArtifactoryPool:
         self,
         source_name: str,
         source_version: str,
-        pub_files: List[IPackageReleaseFile],
+        pub_file: IPackageReleaseFile,
         publications,
         old_properties=None,
     ):
         """Update a file's properties in Artifactory."""
-        for pub_file in pub_files:
-            entry = self._getEntry(source_name, source_version, pub_file)
-            entry.updateProperties(publications, old_properties=old_properties)
+        entry = self._getEntry(source_name, source_version, pub_file)
+        entry.updateProperties(publications, old_properties=old_properties)
 
     def getArtifactPatterns(self, repository_format):
         """Get patterns matching artifacts in a repository of this format.
diff --git a/lib/lp/archivepublisher/publishing.py b/lib/lp/archivepublisher/publishing.py
index b1faa80..5778aa9 100644
--- a/lib/lp/archivepublisher/publishing.py
+++ b/lib/lp/archivepublisher/publishing.py
@@ -24,6 +24,7 @@ from functools import partial
 from itertools import chain, groupby
 from operator import attrgetter
 
+from artifactory import ArtifactoryPath
 from debian.deb822 import Release, _multivalued
 from storm.expr import Desc
 from zope.component import getUtility
@@ -817,7 +818,13 @@ class Publisher:
                 # Skip any files that Launchpad didn't put in Artifactory.
                 continue
             plan.append(
-                (source_name[0], source_version[0], release_id[0], properties)
+                (
+                    source_name[0],
+                    source_version[0],
+                    release_id[0],
+                    path,
+                    properties,
+                )
             )
 
         # Releases that have been removed may still have corresponding
@@ -826,7 +833,7 @@ class Publisher:
         # corresponding pool entries.
         missing_sources = set()
         missing_binaries = set()
-        for _, _, release_id, _ in plan:
+        for _, _, release_id, _, _ in plan:
             if release_id in releases_by_id:
                 continue
             match = re.match(r"^source:(\d+)$", release_id)
@@ -841,12 +848,48 @@ class Publisher:
         for bpr in load(BinaryPackageRelease, missing_binaries):
             releases_by_id["binary:%d" % bpr.id] = bpr
 
-        for source_name, source_version, release_id, properties in plan:
+        # Work out the publication files and publications that each path
+        # belongs to (usually just one, but there are cases where one file
+        # may be shared among multiple releases, such as .orig.tar.* files
+        # in Debian-format source packages).
+        pub_files_by_path = defaultdict(set)
+        pubs_by_path = defaultdict(set)
+        for source_name, source_version, release_id, _, _ in plan:
+            for pub_file in releases_by_id[release_id].files:
+                path = self._diskpool.pathFor(
+                    None, source_name, source_version, pub_file
+                )
+                pub_files_by_path[path].add(pub_file)
+                if release_id in pubs_by_id:
+                    pubs_by_path[path].update(pubs_by_id[release_id])
+
+        root_path = ArtifactoryPath(self._config.archiveroot)
+        for source_name, source_version, _, path, properties in plan:
+            full_path = root_path / path
+            # For now, any of the possible publication files matching this
+            # path should do just as well; it's only used to work out the
+            # artifact path.  Just to make sure things are deterministic,
+            # use the one with the lowest ID.
+            pub_file = sorted(
+                pub_files_by_path[full_path], key=attrgetter("id")
+            )[0]
+            # Tell the pool about all the publications that refer to this
+            # file, since it may set some properties to describe the union
+            # of all of them.
+            publications = sorted(
+                pubs_by_path.get(full_path, []), key=attrgetter("id")
+            )
+            # Use the name and version from the first publication if we can,
+            # but if there aren't any then fall back to the property values
+            # from Artifactory.
+            if publications:
+                source_name = publications[0].pool_name
+                source_version = publications[0].pool_version
             self._diskpool.updateProperties(
                 source_name,
                 source_version,
-                releases_by_id[release_id].files,
-                pubs_by_id.get(release_id),
+                pub_file,
+                publications,
                 old_properties=properties,
             )
 
diff --git a/lib/lp/archivepublisher/tests/test_artifactory.py b/lib/lp/archivepublisher/tests/test_artifactory.py
index da1a31a..924d12c 100644
--- a/lib/lp/archivepublisher/tests/test_artifactory.py
+++ b/lib/lp/archivepublisher/tests/test_artifactory.py
@@ -536,7 +536,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             },
             path.properties,
         )
-        pool.updateProperties(spr.name, spr.version, [sprf], spphs)
+        pool.updateProperties(spr.name, spr.version, sprf, spphs)
         self.assertEqual(
             {
                 "launchpad.release-id": ["source:%d" % spr.id],
@@ -612,7 +612,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             path.properties,
         )
         pool.updateProperties(
-            bpr.sourcepackagename, bpr.sourcepackageversion, [bpf], bpphs
+            bpr.sourcepackagename, bpr.sourcepackageversion, bpf, bpphs
         )
         self.assertEqual(
             {
@@ -682,7 +682,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             path.properties,
         )
         pool.updateProperties(
-            bpr.sourcepackagename, bpr.sourcepackageversion, [bpf], bpphs
+            bpr.sourcepackagename, bpr.sourcepackageversion, bpf, bpphs
         )
         self.assertEqual(
             {
@@ -759,7 +759,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             },
             path.properties,
         )
-        pool.updateProperties(spr.name, spr.version, [sprf], spphs)
+        pool.updateProperties(spr.name, spr.version, sprf, spphs)
         self.assertEqual(
             {
                 "launchpad.release-id": ["source:%d" % spr.id],
@@ -837,7 +837,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             },
             path.properties,
         )
-        pool.updateProperties(spr.name, spr.version, [sprf], spphs)
+        pool.updateProperties(spr.name, spr.version, sprf, spphs)
         self.assertEqual(
             {
                 "launchpad.release-id": ["source:%d" % spr.id],
@@ -914,7 +914,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             },
             path.properties,
         )
-        pool.updateProperties(spr.name, spr.version, [sprf], spphs)
+        pool.updateProperties(spr.name, spr.version, sprf, spphs)
         self.assertEqual(
             {
                 "launchpad.release-id": ["source:%d" % spr.id],
@@ -1008,7 +1008,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             },
             path.properties,
         )
-        pool.updateProperties(bpph.pool_name, bpph.pool_version, [bpf], bpphs)
+        pool.updateProperties(bpph.pool_name, bpph.pool_version, bpf, bpphs)
         self.assertEqual(
             {
                 "launchpad.release-id": ["binary:%d" % bpr.id],
@@ -1093,7 +1093,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             },
             path.properties,
         )
-        pool.updateProperties(bpph.pool_name, bpph.pool_version, [bpf], bpphs)
+        pool.updateProperties(bpph.pool_name, bpph.pool_version, bpf, bpphs)
         self.assertEqual(
             {
                 "launchpad.release-id": ["binary:%d" % bpr.id],
@@ -1178,7 +1178,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             },
             path.properties,
         )
-        pool.updateProperties(bpph.pool_name, bpph.pool_version, [bpf], bpphs)
+        pool.updateProperties(bpph.pool_name, bpph.pool_version, bpf, bpphs)
         self.assertEqual(
             {
                 "launchpad.release-id": ["binary:%d" % bpr.id],
@@ -1261,7 +1261,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             },
             path.properties,
         )
-        pool.updateProperties(spr.name, spr.version, [sprf], spphs)
+        pool.updateProperties(spr.name, spr.version, sprf, spphs)
         self.assertEqual(
             {
                 "generic.name": ["foo"],
@@ -1350,7 +1350,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             },
             path.properties,
         )
-        pool.updateProperties(bpph.pool_name, bpph.pool_version, [bpf], bpphs)
+        pool.updateProperties(bpph.pool_name, bpph.pool_version, bpf, bpphs)
         self.assertEqual(
             {
                 "generic.name": ["foo"],
@@ -1423,7 +1423,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             path.properties,
         )
         pool.updateProperties(
-            bpr.sourcepackagename, bpr.sourcepackageversion, [bpf], bpphs
+            bpr.sourcepackagename, bpr.sourcepackageversion, bpf, bpphs
         )
         self.assertEqual(
             {
@@ -1485,7 +1485,7 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
         ]["pypi.summary"] = ["text with special characters: ;=|,\\"]
 
         pool.updateProperties(
-            bpr.sourcepackagename, bpr.sourcepackageversion, [bpf], bpphs
+            bpr.sourcepackagename, bpr.sourcepackageversion, bpf, bpphs
         )
 
         self.assertEqual(
diff --git a/lib/lp/archivepublisher/tests/test_publisher.py b/lib/lp/archivepublisher/tests/test_publisher.py
index 7d9abb4..c675a51 100644
--- a/lib/lp/archivepublisher/tests/test_publisher.py
+++ b/lib/lp/archivepublisher/tests/test_publisher.py
@@ -4374,6 +4374,80 @@ class TestArtifactoryPublishing(TestPublisherBase):
             binary_path.properties,
         )
 
+    def test_update_properties_shared_orig(self):
+        """An .orig from a previous Debian revision doesn't confuse matters."""
+        self.setUpArtifactory(ArchiveRepositoryFormat.DEBIAN)
+        publisher = Publisher(
+            self.logger, self.config, self.disk_pool, self.archive
+        )
+        orig_tar_file = self.addMockFile(
+            "hello_1.0.orig.tar.xz", b"An orig tarball"
+        )
+        source_1 = self.getPubSource(
+            sourcename="hello",
+            version="1.0-1",
+            archive=self.archive,
+        )
+        source_1.sourcepackagerelease.addFile(
+            self.addMockFile("hello_1.0-1.debian.tar.xz", b"A tarball")
+        )
+        source_1.sourcepackagerelease.addFile(orig_tar_file)
+        transaction.commit()
+
+        publisher.A_publish(False)
+        publisher.C_updateArtifactoryProperties(False)
+
+        # Publish 1.0-2, remove 1.0-1, and simulate process-death-row.
+        source_2 = self.getPubSource(
+            sourcename="hello",
+            version="1.0-2",
+            archive=self.archive,
+        )
+        source_2.sourcepackagerelease.addFile(
+            self.addMockFile("hello_1.0-2.debian.tar.xz", b"A tarball")
+        )
+        source_2.sourcepackagerelease.addFile(orig_tar_file)
+        source_1.requestDeletion(self.archive.owner)
+        for pub_file in source_1.files:
+            if not pub_file.libraryfile.filename.endswith(".orig.tar.xz"):
+                self.disk_pool.removeFile("main", "hello", "1.0-1", pub_file)
+        transaction.commit()
+
+        publisher.A_publish(False)
+        publisher.C_updateArtifactoryProperties(False)
+
+        for name in "hello_1.0-1.dsc", "hello_1.0-1.debian.tar.xz":
+            self.assertFalse(
+                (self.disk_pool.rootpath / "h" / "hello" / name).exists()
+            )
+        for spph, name in (
+            (source_2, "hello_1.0-2.dsc"),
+            (source_2, "hello_1.0-2.debian.tar.xz"),
+            # The shared .orig file ends up still having a
+            # launchpad.release-id property associated with version 1.0-1,
+            # because ArtifactoryPoolEntry.updateProperties always keeps the
+            # release-id from the properties passed to it from Artifactory.
+            # This isn't ideal, but is a relatively minor problem, so allow
+            # it for now.
+            (source_1, "hello_1.0.orig.tar.xz"),
+        ):
+            self.assertEqual(
+                {
+                    "deb.component": ["main"],
+                    "deb.distribution": ["breezy-autotest"],
+                    "deb.name": ["hello"],
+                    "deb.version": ["1.0-2"],
+                    "launchpad.release-id": [
+                        "source:%d" % spph.sourcepackagereleaseID
+                    ],
+                    "launchpad.source-name": ["hello"],
+                    "launchpad.source-version": ["1.0-2"],
+                    "soss.license": ["debian/copyright"],
+                    "soss.type": ["source"],
+                },
+                (self.disk_pool.rootpath / "h" / "hello" / name).properties,
+            )
+
     def test_remove_properties(self):
         """We remove properties if a file is no longer published anywhere."""
         self.setUpArtifactory(ArchiveRepositoryFormat.DEBIAN)