launchpad-reviewers team mailing list archive

Thread
Date

[Merge] ~cjwatson/launchpad-buildd:ci-multi-job-stages into launchpad-buildd:master

To: mp+414165@xxxxxxxxxxxxxxxxxx
From: Colin Watson <mp+414165@xxxxxxxxxxxxxxxxxx>
Date: Fri, 14 Jan 2022 11:46:28 -0000
Reply-to: mp+414165@xxxxxxxxxxxxxxxxxx
Sender: noreply@xxxxxxxxxxxxx

Colin Watson has proposed merging ~cjwatson/launchpad-buildd:ci-multi-job-stages into launchpad-buildd:master with ~cjwatson/launchpad-buildd:ci-individual-results as a prerequisite.

Commit message:
Support CI pipeline stages with multiple jobs

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad-buildd/+git/launchpad-buildd/+merge/414165

The specification calls for either of these kinds of pipeline stage to be valid:

    pipeline:
      # this stage is a list, which means jobs are executed in parallel
      - [test, lint]
      # this stage will only execute if previous steps in the pipeline
      # passed
      - build-wheel

In order to support this, we need to run all jobs in a stage (though not yet in parallel) even if some of them fail, and only proceed to the next stage if they all succeed.  We need to iterate over the pipeline in a rather different way to support reasonable error reporting.

Note that this changes the format of the "jobs" argument passed to the builder: it's now a list of lists of jobs, rather than a list of jobs.  This is OK since Launchpad hasn't started using it yet.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad-buildd:ci-multi-job-stages into launchpad-buildd:master.

diff --git a/debian/changelog b/debian/changelog
index b2bcf77..9d5305f 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,7 @@
 launchpad-buildd (207) UNRELEASED; urgency=medium
 
   * Return results from individual CI jobs.
+  * Support CI pipeline stages with multiple jobs.
 
  -- Colin Watson <cjwatson@xxxxxxxxxx>  Thu, 13 Jan 2022 14:51:09 +0000
 
diff --git a/lpbuildd/ci.py b/lpbuildd/ci.py
index 14ac7c6..3256782 100644
--- a/lpbuildd/ci.py
+++ b/lpbuildd/ci.py
@@ -30,6 +30,10 @@ RESULT_SUCCEEDED = "SUCCEEDED"
 RESULT_FAILED = "FAILED"
 
 
+def _make_job_id(job_name, job_index):
+    return "%s:%s" % (job_name, job_index)
+
+
 class CIBuildState(DebianBuildState):
     PREPARE = "PREPARE"
     RUN_JOB = "RUN_JOB"
@@ -84,9 +88,22 @@ class CIBuildManager(BuildManagerProxyMixin, DebianBuildManager):
             pass
         self.runTargetSubProcess("run-ci-prepare", *args)
 
+    @property
+    def current_job(self):
+        return self.jobs[self.stage_index][self.job_index]
+
+    @property
+    def has_current_job(self):
+        try:
+            self.current_job
+            return True
+        except IndexError:
+            return False
+
     def iterate_PREPARE(self, retcode):
         """Finished preparing for running CI jobs."""
-        self.remaining_jobs = list(self.jobs)
+        self.stage_index = 0
+        self.job_index = 0
         if retcode == RETCODE_SUCCESS:
             pass
         elif (retcode >= RETCODE_FAILURE_INSTALL and
@@ -99,7 +116,7 @@ class CIBuildManager(BuildManagerProxyMixin, DebianBuildManager):
             if not self.alreadyfailed:
                 self._builder.builderFail()
             self.alreadyfailed = True
-        if self.remaining_jobs and not self.alreadyfailed:
+        if self.has_current_job and not self.alreadyfailed:
             self._state = CIBuildState.RUN_JOB
             self.runNextJob()
         else:
@@ -115,11 +132,15 @@ class CIBuildManager(BuildManagerProxyMixin, DebianBuildManager):
         self._state = DebianBuildState.UMOUNT
         self.doUnmounting()
 
+    @staticmethod
+    def _makeJobID(job_name, job_index):
+        return "%s:%s" % (job_name, job_index)
+
     def runNextJob(self):
         """Run the next CI job."""
         args = list(self.proxy_args)
-        job_name, job_index = self.remaining_jobs.pop(0)
-        self.current_job_id = "%s:%s" % (job_name, job_index)
+        job_name, job_index = self.current_job
+        self.current_job_id = _make_job_id(job_name, job_index)
         args.extend([job_name, str(job_index)])
         self.runTargetSubProcess("run-ci", *args)
 
@@ -136,15 +157,39 @@ class CIBuildManager(BuildManagerProxyMixin, DebianBuildManager):
             if (retcode >= RETCODE_FAILURE_INSTALL and
                     retcode <= RETCODE_FAILURE_BUILD):
                 self._builder.log("Job %s failed." % self.current_job_id)
-                if not self.alreadyfailed:
-                    self._builder.buildFail()
+                if len(self.jobs[self.stage_index]) == 1:
+                    # Single-job stage, so fail straight away in order to
+                    # get simpler error messages.
+                    if not self.alreadyfailed:
+                        self._builder.buildFail()
+                    self.alreadyfailed = True
             else:
                 if not self.alreadyfailed:
                     self._builder.builderFail()
-            self.alreadyfailed = True
+                self.alreadyfailed = True
         yield self.deferGatherResults(reap=False)
         self.job_status[self.current_job_id]["result"] = result
-        if self.remaining_jobs and not self.alreadyfailed:
+
+        self.job_index += 1
+        if self.job_index >= len(self.jobs[self.stage_index]):
+            # End of stage.  Fail if any job in this stage has failed.
+            current_stage_job_ids = [
+                _make_job_id(job_name, job_index)
+                for job_name, job_index in self.jobs[self.stage_index]]
+            if any(
+                self.job_status[job_id]["result"] != RESULT_SUCCEEDED
+                for job_id in current_stage_job_ids
+            ):
+                if not self.alreadyfailed:
+                    self._builder.log(
+                        "Some jobs in %s failed; stopping." %
+                        current_stage_job_ids)
+                    self._builder.buildFail()
+                self.alreadyfailed = True
+            self.stage_index += 1
+            self.job_index = 0
+
+        if self.has_current_job and not self.alreadyfailed:
             self.runNextJob()
         else:
             self.stopProxy()
diff --git a/lpbuildd/tests/test_ci.py b/lpbuildd/tests/test_ci.py
index 2cc1387..fee8c01 100644
--- a/lpbuildd/tests/test_ci.py
+++ b/lpbuildd/tests/test_ci.py
@@ -120,7 +120,7 @@ class TestCIBuildManagerIteration(TestCase):
         args = {
             "git_repository": "https://git.launchpad.test/~example/+git/ci";,
             "git_path": "main",
-            "jobs": [("build", "0"), ("test", "0")],
+            "jobs": [[("build", "0")], [("test", "0")]],
             }
         expected_options = [
             "--git-repository", "https://git.launchpad.test/~example/+git/ci";,
@@ -236,7 +236,7 @@ class TestCIBuildManagerIteration(TestCase):
         args = {
             "git_repository": "https://git.launchpad.test/~example/+git/ci";,
             "git_path": "main",
-            "jobs": [("build", "0"), ("test", "0")],
+            "jobs": [[("lint", "0"), ("build", "0")], [("test", "0")]],
             }
         expected_options = [
             "--git-repository", "https://git.launchpad.test/~example/+git/ci";,
@@ -245,12 +245,31 @@ class TestCIBuildManagerIteration(TestCase):
         yield self.startBuild(args, expected_options)
 
         # After preparation, start running the first job.
-        yield self.expectRunJob("build", "0")
+        yield self.expectRunJob("lint", "0")
+        self.buildmanager.backend.add_file(
+            "/build/output/lint:0.log", b"I am a failing CI lint job log.")
+
+        # Collect the output of the first job and start running the second.
+        # (Note that `retcode` is the return code of the *first* job, not the
+        # second.)
+        yield self.expectRunJob("build", "0", retcode=RETCODE_FAILURE_BUILD)
         self.buildmanager.backend.add_file(
-            "/build/output/build:0.log", b"I am a failing CI build job log.")
+            "/build/output/build:0.log", b"I am a CI build job log.")
 
-        # If the first job fails, then the build fails here.
-        yield self.buildmanager.iterate(RETCODE_FAILURE_BUILD)
+        # Output from the first job is visible in the status response.
+        extra_status = self.buildmanager.status()
+        self.assertEqual(
+            {
+                "lint:0": {
+                    "log": self.builder.waitingfiles["lint:0.log"],
+                    "result": RESULT_FAILED,
+                    },
+                },
+            extra_status["jobs"])
+
+        # Since the first pipeline stage failed, we won't go any further, and
+        # expect to start reaping processes.
+        yield self.buildmanager.iterate(0)
         expected_command = [
             "sharepath/bin/in-target", "in-target", "scan-for-processes",
             "--backend=lxd", "--series=focal", "--arch=amd64", self.buildid,
@@ -261,16 +280,22 @@ class TestCIBuildManagerIteration(TestCase):
             self.buildmanager.iterate, self.buildmanager.iterators[-1])
         self.assertTrue(self.builder.wasCalled("buildFail"))
         self.assertThat(self.builder, HasWaitingFiles.byEquality({
-            "build:0.log": b"I am a failing CI build job log.",
+            "lint:0.log": b"I am a failing CI lint job log.",
+            "build:0.log": b"I am a CI build job log.",
             }))
 
-        # Output from the first job is visible in the status response.
+        # Output from the two jobs in the first pipeline stage is visible in
+        # the status response.
         extra_status = self.buildmanager.status()
         self.assertEqual(
             {
+                "lint:0": {
+                    "log": self.builder.waitingfiles["lint:0.log"],
+                    "result": RESULT_FAILED,
+                    },
                 "build:0": {
                     "log": self.builder.waitingfiles["build:0.log"],
-                    "result": RESULT_FAILED,
+                    "result": RESULT_SUCCEEDED,
                     },
                 },
             extra_status["jobs"])