← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:artifactory-quoting into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:artifactory-quoting into launchpad:master.

Commit message:
Fix quoting of semicolons in Artifactory properties

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/425378

Despite what the Artifactory documentation says, semicolons in properties must be quoted in order to protect them from framing issues, since properties are themselves separated by semicolons.  The exact observed behaviour of the implementation doesn't seem to be documented anywhere I've found, and it's quite strange in places, but I've done the best I could to make the test fixture agree with reality as closely as possible.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:artifactory-quoting into launchpad:master.
diff --git a/lib/lp/archivepublisher/artifactory.py b/lib/lp/archivepublisher/artifactory.py
index e2fa1e8..1aad3d1 100644
--- a/lib/lp/archivepublisher/artifactory.py
+++ b/lib/lp/archivepublisher/artifactory.py
@@ -207,19 +207,19 @@ class ArtifactoryPoolEntry:
                     # version properties for them.  Partially work around
                     # this by setting those properties manually (although
                     # this doesn't cause Sources files to exist).
-                    properties["deb.name"] = self.source_name
-                    properties["deb.version"] = self.source_version
+                    properties["deb.name"] = [self.source_name]
+                    properties["deb.version"] = [self.source_version]
                     # Debian-format source packages have a license path
                     # required by policy.
-                    properties["soss.license"] = "debian/copyright"
+                    properties["soss.license"] = ["debian/copyright"]
                 elif release_id.startswith("binary:"):
                     # Debian-format binary packages have a license path
                     # required by policy (although examining it may require
                     # dereferencing symlinks).
-                    properties["soss.license"] = (
+                    properties["soss.license"] = [
                         "/usr/share/doc/%s/copyright"
                         % publications[0].binary_package_name
-                    )
+                    ]
             else:
                 properties["launchpad.channel"] = sorted(
                     {
@@ -236,10 +236,10 @@ class ArtifactoryPoolEntry:
             if ci_build is not None:
                 properties.update(
                     {
-                        "soss.source_url": (
+                        "soss.source_url": [
                             ci_build.git_repository.getCodebrowseUrl()
-                        ),
-                        "soss.commit_id": ci_build.commit_sha1,
+                        ],
+                        "soss.commit_id": [ci_build.commit_sha1],
                     }
                 )
         return properties
@@ -292,6 +292,19 @@ class ArtifactoryPoolEntry:
             and key not in self.owned_properties
         }
         new_properties.update(properties)
+        # https://www.jfrog.com/confluence/display/JFROG/Artifactory+REST+API
+        # claims that ",", backslash, "|", and "=" require quoting with a
+        # leading URL-encoded backslash.  The artifactory module quotes ",",
+        # "|", and "=" (and handles URL-encoding at a lower layer).  In
+        # practice, backslash must apparently not be quoted, and ";" must be
+        # quoted or we run into framing problems with ";"-separation of
+        # properties.  This is not very satisfactory as we're essentially
+        # playing core wars with the artifactory module, but this solves the
+        # problem for now.
+        new_properties = {
+            key: [v.replace(";", r"\;") for v in value]
+            for key, value in new_properties.items()
+        }
         if old_properties != new_properties:
             # We could use the ArtifactoryPath.properties setter, but that
             # will fetch the old properties again when we already have them
diff --git a/lib/lp/archivepublisher/tests/artifactory_fixture.py b/lib/lp/archivepublisher/tests/artifactory_fixture.py
index c642031..f606e8f 100644
--- a/lib/lp/archivepublisher/tests/artifactory_fixture.py
+++ b/lib/lp/archivepublisher/tests/artifactory_fixture.py
@@ -112,15 +112,36 @@ class FakeArtifactoryFixture(Fixture):
         else:
             return 404, {}, "Unable to find item"
 
+    def _split(self, delimiter, text):
+        """Split text on a delimiter the way that Artifactory does.
+
+        We need to skip delimiters quoted by a leading backslash.
+        """
+        assert delimiter in ",|=;"
+        return re.findall(r"((?:\\[,|=;]|[^%s])+)" % delimiter, text)
+
+    def _unquote(self, text):
+        """Unquote something the way that Artifactory does.
+
+        This is poorly-documented.
+        https://www.jfrog.com/confluence/display/JFROG/Artifactory+REST+API
+        says "In order to supply special characters (comma (,), backslash
+        (\\), pipe (|), equals (=)) as key/value you must add an encoded
+        backslash (%5C) before them"; but in practice semicolon also needs
+        to be quoted in this way to avoid framing problems with
+        semicolon-separation of properties, and quoting a backslash like
+        this simply results in a double backslash.
+        """
+        return re.sub(r"\\([,|=;])", r"\1", unquote(text))
+
     def _decode_properties(self, encoded):
         """Decode properties that were encoded as part of a request."""
         properties = {}
-        for param in encoded.split(";"):
-            key, value = param.split("=", 1)
-            # XXX cjwatson 2022-04-19: Check whether this unquoting approach
-            # actually matches the artifactory module and Artifactory
-            # itself.
-            properties[unquote(key)] = [unquote(v) for v in value.split(",")]
+        for param in self._split(";", encoded):
+            key, value = re.match(r"((?:\\[,|=;]|[^=])+)=(.*)", param).groups()
+            properties[self._unquote(key)] = [
+                self._unquote(v) for v in self._split(",", value)
+            ]
         return properties
 
     def _handle_upload(self, request):
diff --git a/lib/lp/archivepublisher/tests/test_artifactory.py b/lib/lp/archivepublisher/tests/test_artifactory.py
index c89e462..0e013e5 100644
--- a/lib/lp/archivepublisher/tests/test_artifactory.py
+++ b/lib/lp/archivepublisher/tests/test_artifactory.py
@@ -897,3 +897,56 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
             },
             path.properties,
         )
+
+    def test_updateProperties_encodes_special_characters(self):
+        pool = self.makePool(ArchiveRepositoryFormat.PYTHON)
+        ds = self.factory.makeDistroSeries(
+            distribution=pool.archive.distribution
+        )
+        das = self.factory.makeDistroArchSeries(distroseries=ds)
+        spr = self.factory.makeSourcePackageRelease(
+            archive=pool.archive,
+            sourcepackagename="foo",
+            version="1.0",
+            format=SourcePackageType.SDIST,
+        )
+        bpph = self.factory.makeBinaryPackagePublishingHistory(
+            archive=pool.archive,
+            distroarchseries=das,
+            pocket=PackagePublishingPocket.RELEASE,
+            component="main",
+            source_package_release=spr,
+            binarypackagename="foo",
+            binpackageformat=BinaryPackageFormat.WHL,
+            architecturespecific=False,
+            channel="edge",
+        )
+        bpr = bpph.binarypackagerelease
+        bpf = self.factory.makeBinaryPackageFile(
+            binarypackagerelease=bpr,
+            library_file=self.factory.makeLibraryFileAlias(
+                filename="foo-1.0-py3-none-any.whl"
+            ),
+            filetype=BinaryPackageFileType.WHL,
+        )
+        bpphs = [bpph]
+        transaction.commit()
+        pool.addFile(
+            None, bpr.sourcepackagename, bpr.sourcepackageversion, bpf
+        )
+        path = pool.rootpath / "foo" / "1.0" / "foo-1.0-py3-none-any.whl"
+        self.assertTrue(path.exists())
+        self.assertFalse(path.is_symlink())
+        # Simulate Artifactory scanning the package.
+        self.artifactory._fs["/foo/1.0/foo-1.0-py3-none-any.whl"][
+            "properties"
+        ]["pypi.summary"] = ["text with special characters: ;=|,\\"]
+
+        pool.updateProperties(
+            bpr.sourcepackagename, bpr.sourcepackageversion, [bpf], bpphs
+        )
+
+        self.assertEqual(
+            ["text with special characters: ;=|,\\"],
+            path.properties["pypi.summary"],
+        )