← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wgrant/launchpad/buildd-manager-handoff into lp:launchpad

 

William Grant has proposed merging lp:~wgrant/launchpad/buildd-manager-handoff into lp:launchpad with lp:~wgrant/launchpad/buildd-manager-robustification as a prerequisite.

Commit message:
Hand builds off from buildd-manager to process-upload atomically. UPLOADING is only set as the BuildQueue is destroyed, and binary uploads for invalid statuses are failed rather than ignored.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1422616 in Launchpad itself: "process-upload can work on a build before buildd-manager is done with it"
  https://bugs.launchpad.net/launchpad/+bug/1422616

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/buildd-manager-handoff/+merge/250246

Hand builds off from buildd-manager to process-upload atomically. buildd-manager now only sets UPLOADING in the same transaction as the BuildQueue destruction, ensuring that process-upload doesn't touch the build until buildd-manager is guaranteed to never touch it again. process-upload also now fails any upload that has a build which is neither BUILDING nor UPLOADING, as that would indicate a bug.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/buildd-manager-handoff into lp:launchpad.
=== modified file 'lib/lp/archiveuploader/tests/test_uploadprocessor.py'
--- lib/lp/archiveuploader/tests/test_uploadprocessor.py	2015-01-23 17:57:59 +0000
+++ lib/lp/archiveuploader/tests/test_uploadprocessor.py	2015-02-19 01:43:44 +0000
@@ -142,6 +142,7 @@
 
         self.queue_folder = tempfile.mkdtemp()
         self.incoming_folder = os.path.join(self.queue_folder, "incoming")
+        self.failed_folder = os.path.join(self.queue_folder, "failed")
         os.makedirs(self.incoming_folder)
 
         self.test_files_dir = os.path.join(config.root,
@@ -2258,9 +2259,7 @@
         self.assertIsNot(None, build.duration)
         self.assertIs(None, build.upload_log)
 
-    def testBuildWithInvalidStatus(self):
-        # Builds with an invalid (non-UPLOADING) status should trigger
-        # a warning but be left alone.
+    def processUploadWithBuildStatus(self, status):
         upload_dir = self.queueUpload("bar_1.0-1")
         self.processUpload(self.uploadprocessor, upload_dir)
         source_pub = self.publishPackage('bar', '1.0-1')
@@ -2272,9 +2271,10 @@
             status=PackageUploadStatus.ACCEPTED,
             version=u"1.0-1", name=u"bar")
         queue_item.setDone()
+        stub.test_emails.pop()
 
         build.buildqueue_record.markAsBuilding(self.factory.makeBuilder())
-        build.updateStatus(BuildStatus.BUILDING)
+        build.updateStatus(status)
         self.switchToUploader()
 
         shutil.rmtree(upload_dir)
@@ -2287,17 +2287,40 @@
             "bar_1.0-1_binary", queue_entry=leaf_name)
         self.options.context = 'buildd'
         self.options.builds = True
-        len(stub.test_emails)
+        self.assertEqual([], stub.test_emails)
         BuildUploadHandler(self.uploadprocessor, self.incoming_folder,
             leaf_name).process()
         self.layer.txn.commit()
+
+        return build, leaf_name
+
+    def testBuildStillBuilding(self):
+        # Builds that are still BUILDING should be left alone.  The
+        # upload directory may already be in place, but buildd-manager
+        # will set the status to UPLOADING when it's handed off.
+        build, leaf_name = self.processUploadWithBuildStatus(
+            BuildStatus.BUILDING)
         # The build status is not changed
         self.assertTrue(
             os.path.exists(os.path.join(self.incoming_folder, leaf_name)))
         self.assertEquals(BuildStatus.BUILDING, build.status)
+        self.assertLogContains("Build status is BUILDING. Ignoring.")
+
+    def testBuildWithInvalidStatus(self):
+        # Builds with an invalid (not UPLOADING or BUILDING) status
+        # should trigger a failure. We've probably raced with
+        # buildd-manager due to a new and assuredly extra-special bug.
+        build, leaf_name = self.processUploadWithBuildStatus(
+            BuildStatus.NEEDSBUILD)
+        # The build status is not changed, but the upload has moved.
+        self.assertFalse(
+            os.path.exists(os.path.join(self.incoming_folder, leaf_name)))
+        self.assertTrue(
+            os.path.exists(os.path.join(self.failed_folder, leaf_name)))
+        self.assertEquals(BuildStatus.NEEDSBUILD, build.status)
         self.assertLogContains(
-            "Expected build status to be 'UPLOADING', was BUILDING. "
-            "Ignoring.")
+            "Expected build status to be UPLOADING or BUILDING, was "
+            "NEEDSBUILD.")
 
     def testOrderFilenames(self):
         """orderFilenames sorts _source.changes ahead of other files."""

=== modified file 'lib/lp/archiveuploader/uploadprocessor.py'
--- lib/lp/archiveuploader/uploadprocessor.py	2015-01-23 17:57:59 +0000
+++ lib/lp/archiveuploader/uploadprocessor.py	2015-02-19 01:43:44 +0000
@@ -618,10 +618,21 @@
         Build uploads always contain a single package per leaf.
         """
         logger = BufferLogger()
-        if self.build.status != BuildStatus.UPLOADING:
+        if self.build.status == BuildStatus.BUILDING:
+            # Handoff from buildd-manager isn't complete until the
+            # status is UPLOADING.
+            self.processor.log.info("Build status is BUILDING. Ignoring.")
+            return
+        elif self.build.status != BuildStatus.UPLOADING:
+            # buildd-manager should only have given us a directory
+            # during BUILDING or UPLOADING. Anything else indicates a
+            # bug, so get the upload out of the queue before the status
+            # changes to something that might accidentally let it be
+            # accepted.
             self.processor.log.warn(
-                "Expected build status to be 'UPLOADING', was %s. Ignoring." %
+                "Expected build status to be UPLOADING or BUILDING, was %s.",
                 self.build.status.name)
+            self.moveUpload(UploadStatusEnum.FAILED, logger)
             return
         try:
             # The recipe may have been deleted so we need to flag that here
@@ -659,7 +670,7 @@
             if recipe_deleted:
                 # For a deleted recipe, no need to notify that uploading has
                 # failed - we just log a warning.
-                self.processor.log.warn(
+                self.processor.log.info(
                     "Recipe for build %s was deleted. Ignoring." %
                     self.upload)
             else:

=== modified file 'lib/lp/buildmaster/model/buildfarmjobbehaviour.py'
--- lib/lp/buildmaster/model/buildfarmjobbehaviour.py	2015-02-15 23:33:23 +0000
+++ lib/lp/buildmaster/model/buildfarmjobbehaviour.py	2015-02-19 01:43:44 +0000
@@ -220,6 +220,7 @@
             'Processing finished job %s (%s) from builder %s: %s'
             % (self.build.build_cookie, self.build.title,
                self.build.buildqueue_record.builder.name, status))
+        build_status = None
         if status == 'OK':
             yield self.storeLogFromSlave()
             # handleSuccess will sometimes perform write operations
@@ -227,18 +228,26 @@
             # here and the commit can cause duplicated results. For
             # example, a BinaryPackageBuild will end up in the upload
             # queue twice if notify() crashes.
-            yield self.handleSuccess(slave_status, logger)
+            build_status = yield self.handleSuccess(slave_status, logger)
         elif status in fail_status_map:
             # XXX wgrant: The builder should be set long before here, but
             # currently isn't.
             yield self.storeLogFromSlave()
-            self.build.updateStatus(
-                fail_status_map[status],
-                builder=self.build.buildqueue_record.builder,
-                slave_status=slave_status)
+            build_status = fail_status_map[status]
         else:
             raise BuildDaemonError(
                 "Build returned unexpected status: %r" % status)
+
+        # Set the status and dequeue the build atomically. Setting the
+        # status to UPLOADING constitutes handoff to process-upload, so
+        # doing that before we've removed the BuildQueue causes races.
+
+        # XXX wgrant: The builder should be set long before here, but
+        # currently isn't.
+        self.build.updateStatus(
+            build_status,
+            builder=self.build.buildqueue_record.builder,
+            slave_status=slave_status)
         if notify:
             self.build.notify()
         self.build.buildqueue_record.destroySelf()
@@ -260,8 +269,7 @@
         if build.job_type == BuildFarmJobType.PACKAGEBUILD:
             build = build.buildqueue_record.specific_build
             if not build.current_source_publication:
-                build.updateStatus(BuildStatus.SUPERSEDED)
-                return
+                defer.returnValue(BuildStatus.SUPERSEDED)
 
         self.verifySuccessfulBuild()
 
@@ -295,11 +303,6 @@
             filenames_to_download.append((filemap[filename], out_file_name))
         yield self._slave.getFiles(filenames_to_download)
 
-        # XXX wgrant: The builder should be set long before here, but
-        # currently isn't.
-        build.updateStatus(
-            BuildStatus.UPLOADING, builder=build.buildqueue_record.builder,
-            slave_status=slave_status)
         transaction.commit()
 
         # Move the directory used to grab the binaries into incoming
@@ -312,3 +315,5 @@
         if not os.path.exists(target_dir):
             os.mkdir(target_dir)
         os.rename(grab_dir, os.path.join(target_dir, upload_leaf))
+
+        defer.returnValue(BuildStatus.UPLOADING)

=== modified file 'lib/lp/buildmaster/tests/test_manager.py'
--- lib/lp/buildmaster/tests/test_manager.py	2015-02-19 01:43:44 +0000
+++ lib/lp/buildmaster/tests/test_manager.py	2015-02-19 01:43:44 +0000
@@ -582,9 +582,7 @@
         # Mock out the build behaviour's handleSuccess so it doesn't
         # try to upload things to the librarian or queue.
         def handleSuccess(self, slave_status, logger):
-            build.updateStatus(
-                BuildStatus.UPLOADING, builder, slave_status=slave_status)
-            transaction.commit()
+            return BuildStatus.UPLOADING
         self.patch(
             BinaryPackageBuildBehaviour, 'handleSuccess', handleSuccess)
 

=== modified file 'lib/lp/translations/model/translationtemplatesbuildbehaviour.py'
--- lib/lp/translations/model/translationtemplatesbuildbehaviour.py	2014-06-26 12:33:51 +0000
+++ lib/lp/translations/model/translationtemplatesbuildbehaviour.py	2015-02-19 01:43:44 +0000
@@ -101,10 +101,6 @@
         If this fails for whatever unforeseen reason, a future run will
         retry it.
         """
-        self.build.updateStatus(
-            BuildStatus.UPLOADING,
-            builder=self.build.buildqueue_record.builder)
-        transaction.commit()
         logger.debug("Processing successful templates build.")
         filemap = slave_status.get('filemap')
         filename = yield self._readTarball(
@@ -116,7 +112,6 @@
         # dangerous.
         if filename is None:
             logger.error("Build produced no tarball.")
-            self.build.updateStatus(BuildStatus.FULLYBUILT)
         else:
             tarball_file = open(filename)
             try:
@@ -129,9 +124,9 @@
                     self._uploadTarball(
                         self.build.buildqueue_record.specific_build.branch,
                         tarball, logger)
+                    transaction.commit()
                     logger.debug("Upload complete.")
             finally:
-                self.build.updateStatus(BuildStatus.FULLYBUILT)
                 tarball_file.close()
                 os.remove(filename)
-        transaction.commit()
+        defer.returnValue(BuildStatus.FULLYBUILT)


Follow ups