← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:refactor-ci-build-upload-job-scan-file into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:refactor-ci-build-upload-job-scan-file into launchpad:master with ~cjwatson/launchpad:refactor-ci-build-upload-job as a prerequisite.

Commit message:
Refactor CIBuildUploadJob._scanFile

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/426425

`CIBuildUploadJob._scanFile` was structured as a big conditional chain, which was getting quite unwieldy.  Break it up into one method per file type and a short table-driven method to try each one in turn.

This changes behaviour slightly: we now keep trying other scanners even if one fails, which will be necessary in the long run since some package types have ambiguous file names.  This means that errors from an individual scanner are merely logged as warnings.  To avoid hiding problems in typical cases, we now raise an exception at the end of the job if no usable files at all were found.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:refactor-ci-build-upload-job-scan-file into launchpad:master.
diff --git a/lib/lp/soyuz/model/archivejob.py b/lib/lp/soyuz/model/archivejob.py
index ab98138..65a62af 100644
--- a/lib/lp/soyuz/model/archivejob.py
+++ b/lib/lp/soyuz/model/archivejob.py
@@ -315,6 +315,25 @@ class CIBuildUploadJob(ArchiveJobDerived):
     def target_channel(self):
         return self.metadata["target_channel"]
 
+    def _scanWheel(self, path):
+        try:
+            parsed_path = parse_wheel_filename(path)
+            wheel = Wheel(path)
+        except Exception as e:
+            logger.warning(
+                "Failed to scan %s as a Python wheel: %s",
+                os.path.basename(path), e)
+            return None
+        return {
+            "name": wheel.name,
+            "version": wheel.version,
+            "summary": wheel.summary or "",
+            "description": wheel.description,
+            "binpackageformat": BinaryPackageFormat.WHL,
+            "architecturespecific": "any" not in parsed_path.platform_tags,
+            "homepage": wheel.home_page or "",
+            }
+
     def _scanCondaMetadata(self, index, about):
         return {
             "name": index["name"],
@@ -330,57 +349,60 @@ class CIBuildUploadJob(ArchiveJobDerived):
             "user_defined_fields": [("subdir", index["subdir"])],
             }
 
-    def _scanFile(self, path):
-        # XXX cjwatson 2022-06-10: We should probably start splitting this
-        # up some more.
-        name = os.path.basename(path)
-        if path.endswith(".whl"):
-            try:
-                parsed_path = parse_wheel_filename(path)
-                wheel = Wheel(path)
-            except Exception as e:
-                raise ScanException("Failed to scan %s" % name) from e
-            return {
-                "name": wheel.name,
-                "version": wheel.version,
-                "summary": wheel.summary or "",
-                "description": wheel.description,
-                "binpackageformat": BinaryPackageFormat.WHL,
-                "architecturespecific": "any" not in parsed_path.platform_tags,
-                "homepage": wheel.home_page or "",
-                }
-        elif path.endswith(".tar.bz2"):
-            try:
-                with tarfile.open(path) as tar:
+    def _scanCondaV1(self, path):
+        try:
+            with tarfile.open(path) as tar:
+                index = json.loads(
+                    tar.extractfile("info/index.json").read().decode())
+                about = json.loads(
+                    tar.extractfile("info/about.json").read().decode())
+        except Exception as e:
+            logger.warning(
+                "Failed to scan %s as a Conda v1 package: %s",
+                os.path.basename(path), e)
+            return None
+        scanned = {"binpackageformat": BinaryPackageFormat.CONDA_V1}
+        scanned.update(self._scanCondaMetadata(index, about))
+        return scanned
+
+    def _scanCondaV2(self, path):
+        try:
+            with zipfile.ZipFile(path) as zipf:
+                base_name = os.path.basename(path)[:-len(".conda")]
+                info = io.BytesIO()
+                with zipf.open("info-%s.tar.zst" % base_name) as raw_info:
+                    zstandard.ZstdDecompressor().copy_stream(raw_info, info)
+                info.seek(0)
+                with tarfile.open(fileobj=info) as tar:
                     index = json.loads(
                         tar.extractfile("info/index.json").read().decode())
                     about = json.loads(
                         tar.extractfile("info/about.json").read().decode())
-            except Exception as e:
-                raise ScanException("Failed to scan %s" % name) from e
-            scanned = {"binpackageformat": BinaryPackageFormat.CONDA_V1}
-            scanned.update(self._scanCondaMetadata(index, about))
-            return scanned
-        elif path.endswith(".conda"):
-            try:
-                with zipfile.ZipFile(path) as zipf:
-                    base_name = os.path.basename(path)[:-len(".conda")]
-                    info = io.BytesIO()
-                    with zipf.open("info-%s.tar.zst" % base_name) as raw_info:
-                        zstandard.ZstdDecompressor().copy_stream(
-                            raw_info, info)
-                    info.seek(0)
-                    with tarfile.open(fileobj=info) as tar:
-                        index = json.loads(
-                            tar.extractfile("info/index.json").read().decode())
-                        about = json.loads(
-                            tar.extractfile("info/about.json").read().decode())
-            except Exception as e:
-                raise ScanException("Failed to scan %s" % name) from e
-            scanned = {"binpackageformat": BinaryPackageFormat.CONDA_V2}
-            scanned.update(self._scanCondaMetadata(index, about))
-            return scanned
+        except Exception as e:
+            logger.warning(
+                "Failed to scan %s as a Conda v2 package: %s",
+                os.path.basename(path), e)
+            return None
+        scanned = {"binpackageformat": BinaryPackageFormat.CONDA_V2}
+        scanned.update(self._scanCondaMetadata(index, about))
+        return scanned
+
+    def _scanFile(self, path):
+        _scanners = (
+            (".whl", self._scanWheel),
+            (".tar.bz2", self._scanCondaV1),
+            (".conda", self._scanCondaV2),
+            )
+        found_scanner = False
+        for suffix, scanner in _scanners:
+            if path.endswith(suffix):
+                found_scanner = True
+                scanned = scanner(path)
+                if scanned is not None:
+                    return scanned
         else:
+            if not found_scanner:
+                logger.info("No upload handler for %s", os.path.basename(path))
             return None
 
     def _scanArtifacts(self, artifacts):
@@ -406,7 +428,6 @@ class CIBuildUploadJob(ArchiveJobDerived):
                 copy_and_close(artifact.library_file, open(contents, "wb"))
                 metadata = self._scanFile(contents)
                 if metadata is None:
-                    logger.info("No upload handler for %s", name)
                     continue
                 if metadata["binpackageformat"] not in allowed_binary_formats:
                     logger.info(
@@ -468,3 +489,7 @@ class CIBuildUploadJob(ArchiveJobDerived):
         scanned = self._scanArtifacts(artifacts)
         if scanned:
             self._uploadBinaries(scanned)
+        else:
+            names = [artifact.library_file.filename for artifact in artifacts]
+            raise ScanException(
+                "Could not find any usable files in %s" % names)
diff --git a/lib/lp/soyuz/tests/test_archivejob.py b/lib/lp/soyuz/tests/test_archivejob.py
index e0df1c8..b744bc8 100644
--- a/lib/lp/soyuz/tests/test_archivejob.py
+++ b/lib/lp/soyuz/tests/test_archivejob.py
@@ -4,7 +4,10 @@
 import os.path
 
 from debian.deb822 import Changes
-from fixtures import MockPatch
+from fixtures import (
+    FakeLogger,
+    MockPatch,
+    )
 from testtools.matchers import (
     ContainsDict,
     Equals,
@@ -678,6 +681,7 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
     def test_run_failed(self):
         # A failed run sets the job status to FAILED and notifies the
         # requester.
+        logger = self.useFixture(FakeLogger())
         archive = self.factory.makeArchive(
             repository_format=ArchiveRepositoryFormat.PYTHON)
         distroseries = self.factory.makeDistroSeries(
@@ -698,6 +702,14 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
             JobRunner([job]).runAll()
 
         self.assertEqual(JobStatus.FAILED, job.job.status)
+        expected_logs = [
+            "Running %r (ID %d) in status Waiting" % (job, job.job_id),
+            "Failed to scan _invalid.whl as a Python wheel: Invalid wheel "
+            "filename: '_invalid.whl'",
+            "%r (ID %d) failed with user error ScanException(\"Could not find "
+            "any usable files in ['_invalid.whl']\",)." % (job, job.job_id),
+            ]
+        self.assertEqual(expected_logs, logger.output.splitlines())
         [notification] = self.assertEmailQueueLength(1)
         self.assertThat(dict(notification), ContainsDict({
             "From": Equals(config.canonical.noreply_from_address),
@@ -709,7 +721,8 @@ class TestCIBuildUploadJob(TestCaseWithFactory):
             }))
         self.assertEqual(
             "Launchpad encountered an error during the following operation: "
-            "uploading %s to %s.  Failed to scan _invalid.whl" % (
+            "uploading %s to %s.  "
+            "Could not find any usable files in ['_invalid.whl']" % (
                 build.title, archive.reference),
             notification.get_payload(decode=True).decode())
 

Follow ups