launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #17896
[Merge] lp:~wgrant/launchpad/buildd-manager-handoff into lp:launchpad
William Grant has proposed merging lp:~wgrant/launchpad/buildd-manager-handoff into lp:launchpad with lp:~wgrant/launchpad/buildd-manager-robustification as a prerequisite.
Commit message:
Hand builds off from buildd-manager to process-upload atomically. UPLOADING is only set as the BuildQueue is destroyed, and binary uploads for invalid statuses are failed rather than ignored.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #1422616 in Launchpad itself: "process-upload can work on a build before buildd-manager is done with it"
https://bugs.launchpad.net/launchpad/+bug/1422616
For more details, see:
https://code.launchpad.net/~wgrant/launchpad/buildd-manager-handoff/+merge/250246
Hand builds off from buildd-manager to process-upload atomically. buildd-manager now only sets UPLOADING in the same transaction as the BuildQueue destruction, ensuring that process-upload doesn't touch the build until buildd-manager is guaranteed to never touch it again. process-upload also now fails any upload that has a build which is neither BUILDING nor UPLOADING, as that would indicate a bug.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/buildd-manager-handoff into lp:launchpad.
=== modified file 'lib/lp/archiveuploader/tests/test_uploadprocessor.py'
--- lib/lp/archiveuploader/tests/test_uploadprocessor.py 2015-01-23 17:57:59 +0000
+++ lib/lp/archiveuploader/tests/test_uploadprocessor.py 2015-02-19 01:43:44 +0000
@@ -142,6 +142,7 @@
self.queue_folder = tempfile.mkdtemp()
self.incoming_folder = os.path.join(self.queue_folder, "incoming")
+ self.failed_folder = os.path.join(self.queue_folder, "failed")
os.makedirs(self.incoming_folder)
self.test_files_dir = os.path.join(config.root,
@@ -2258,9 +2259,7 @@
self.assertIsNot(None, build.duration)
self.assertIs(None, build.upload_log)
- def testBuildWithInvalidStatus(self):
- # Builds with an invalid (non-UPLOADING) status should trigger
- # a warning but be left alone.
+ def processUploadWithBuildStatus(self, status):
upload_dir = self.queueUpload("bar_1.0-1")
self.processUpload(self.uploadprocessor, upload_dir)
source_pub = self.publishPackage('bar', '1.0-1')
@@ -2272,9 +2271,10 @@
status=PackageUploadStatus.ACCEPTED,
version=u"1.0-1", name=u"bar")
queue_item.setDone()
+ stub.test_emails.pop()
build.buildqueue_record.markAsBuilding(self.factory.makeBuilder())
- build.updateStatus(BuildStatus.BUILDING)
+ build.updateStatus(status)
self.switchToUploader()
shutil.rmtree(upload_dir)
@@ -2287,17 +2287,40 @@
"bar_1.0-1_binary", queue_entry=leaf_name)
self.options.context = 'buildd'
self.options.builds = True
- len(stub.test_emails)
+ self.assertEqual([], stub.test_emails)
BuildUploadHandler(self.uploadprocessor, self.incoming_folder,
leaf_name).process()
self.layer.txn.commit()
+
+ return build, leaf_name
+
+ def testBuildStillBuilding(self):
+ # Builds that are still BUILDING should be left alone. The
+ # upload directory may already be in place, but buildd-manager
+ # will set the status to UPLOADING when it's handed off.
+ build, leaf_name = self.processUploadWithBuildStatus(
+ BuildStatus.BUILDING)
# The build status is not changed
self.assertTrue(
os.path.exists(os.path.join(self.incoming_folder, leaf_name)))
self.assertEquals(BuildStatus.BUILDING, build.status)
+ self.assertLogContains("Build status is BUILDING. Ignoring.")
+
+ def testBuildWithInvalidStatus(self):
+ # Builds with an invalid (not UPLOADING or BUILDING) status
+ # should trigger a failure. We've probably raced with
+ # buildd-manager due to a new and assuredly extra-special bug.
+ build, leaf_name = self.processUploadWithBuildStatus(
+ BuildStatus.NEEDSBUILD)
+ # The build status is not changed, but the upload has moved.
+ self.assertFalse(
+ os.path.exists(os.path.join(self.incoming_folder, leaf_name)))
+ self.assertTrue(
+ os.path.exists(os.path.join(self.failed_folder, leaf_name)))
+ self.assertEquals(BuildStatus.NEEDSBUILD, build.status)
self.assertLogContains(
- "Expected build status to be 'UPLOADING', was BUILDING. "
- "Ignoring.")
+ "Expected build status to be UPLOADING or BUILDING, was "
+ "NEEDSBUILD.")
def testOrderFilenames(self):
"""orderFilenames sorts _source.changes ahead of other files."""
=== modified file 'lib/lp/archiveuploader/uploadprocessor.py'
--- lib/lp/archiveuploader/uploadprocessor.py 2015-01-23 17:57:59 +0000
+++ lib/lp/archiveuploader/uploadprocessor.py 2015-02-19 01:43:44 +0000
@@ -618,10 +618,21 @@
Build uploads always contain a single package per leaf.
"""
logger = BufferLogger()
- if self.build.status != BuildStatus.UPLOADING:
+ if self.build.status == BuildStatus.BUILDING:
+ # Handoff from buildd-manager isn't complete until the
+ # status is UPLOADING.
+ self.processor.log.info("Build status is BUILDING. Ignoring.")
+ return
+ elif self.build.status != BuildStatus.UPLOADING:
+ # buildd-manager should only have given us a directory
+ # during BUILDING or UPLOADING. Anything else indicates a
+ # bug, so get the upload out of the queue before the status
+ # changes to something that might accidentally let it be
+ # accepted.
self.processor.log.warn(
- "Expected build status to be 'UPLOADING', was %s. Ignoring." %
+ "Expected build status to be UPLOADING or BUILDING, was %s.",
self.build.status.name)
+ self.moveUpload(UploadStatusEnum.FAILED, logger)
return
try:
# The recipe may have been deleted so we need to flag that here
@@ -659,7 +670,7 @@
if recipe_deleted:
# For a deleted recipe, no need to notify that uploading has
# failed - we just log a warning.
- self.processor.log.warn(
+ self.processor.log.info(
"Recipe for build %s was deleted. Ignoring." %
self.upload)
else:
=== modified file 'lib/lp/buildmaster/model/buildfarmjobbehaviour.py'
--- lib/lp/buildmaster/model/buildfarmjobbehaviour.py 2015-02-15 23:33:23 +0000
+++ lib/lp/buildmaster/model/buildfarmjobbehaviour.py 2015-02-19 01:43:44 +0000
@@ -220,6 +220,7 @@
'Processing finished job %s (%s) from builder %s: %s'
% (self.build.build_cookie, self.build.title,
self.build.buildqueue_record.builder.name, status))
+ build_status = None
if status == 'OK':
yield self.storeLogFromSlave()
# handleSuccess will sometimes perform write operations
@@ -227,18 +228,26 @@
# here and the commit can cause duplicated results. For
# example, a BinaryPackageBuild will end up in the upload
# queue twice if notify() crashes.
- yield self.handleSuccess(slave_status, logger)
+ build_status = yield self.handleSuccess(slave_status, logger)
elif status in fail_status_map:
# XXX wgrant: The builder should be set long before here, but
# currently isn't.
yield self.storeLogFromSlave()
- self.build.updateStatus(
- fail_status_map[status],
- builder=self.build.buildqueue_record.builder,
- slave_status=slave_status)
+ build_status = fail_status_map[status]
else:
raise BuildDaemonError(
"Build returned unexpected status: %r" % status)
+
+ # Set the status and dequeue the build atomically. Setting the
+ # status to UPLOADING constitutes handoff to process-upload, so
+ # doing that before we've removed the BuildQueue causes races.
+
+ # XXX wgrant: The builder should be set long before here, but
+ # currently isn't.
+ self.build.updateStatus(
+ build_status,
+ builder=self.build.buildqueue_record.builder,
+ slave_status=slave_status)
if notify:
self.build.notify()
self.build.buildqueue_record.destroySelf()
@@ -260,8 +269,7 @@
if build.job_type == BuildFarmJobType.PACKAGEBUILD:
build = build.buildqueue_record.specific_build
if not build.current_source_publication:
- build.updateStatus(BuildStatus.SUPERSEDED)
- return
+ defer.returnValue(BuildStatus.SUPERSEDED)
self.verifySuccessfulBuild()
@@ -295,11 +303,6 @@
filenames_to_download.append((filemap[filename], out_file_name))
yield self._slave.getFiles(filenames_to_download)
- # XXX wgrant: The builder should be set long before here, but
- # currently isn't.
- build.updateStatus(
- BuildStatus.UPLOADING, builder=build.buildqueue_record.builder,
- slave_status=slave_status)
transaction.commit()
# Move the directory used to grab the binaries into incoming
@@ -312,3 +315,5 @@
if not os.path.exists(target_dir):
os.mkdir(target_dir)
os.rename(grab_dir, os.path.join(target_dir, upload_leaf))
+
+ defer.returnValue(BuildStatus.UPLOADING)
=== modified file 'lib/lp/buildmaster/tests/test_manager.py'
--- lib/lp/buildmaster/tests/test_manager.py 2015-02-19 01:43:44 +0000
+++ lib/lp/buildmaster/tests/test_manager.py 2015-02-19 01:43:44 +0000
@@ -582,9 +582,7 @@
# Mock out the build behaviour's handleSuccess so it doesn't
# try to upload things to the librarian or queue.
def handleSuccess(self, slave_status, logger):
- build.updateStatus(
- BuildStatus.UPLOADING, builder, slave_status=slave_status)
- transaction.commit()
+ return BuildStatus.UPLOADING
self.patch(
BinaryPackageBuildBehaviour, 'handleSuccess', handleSuccess)
=== modified file 'lib/lp/translations/model/translationtemplatesbuildbehaviour.py'
--- lib/lp/translations/model/translationtemplatesbuildbehaviour.py 2014-06-26 12:33:51 +0000
+++ lib/lp/translations/model/translationtemplatesbuildbehaviour.py 2015-02-19 01:43:44 +0000
@@ -101,10 +101,6 @@
If this fails for whatever unforeseen reason, a future run will
retry it.
"""
- self.build.updateStatus(
- BuildStatus.UPLOADING,
- builder=self.build.buildqueue_record.builder)
- transaction.commit()
logger.debug("Processing successful templates build.")
filemap = slave_status.get('filemap')
filename = yield self._readTarball(
@@ -116,7 +112,6 @@
# dangerous.
if filename is None:
logger.error("Build produced no tarball.")
- self.build.updateStatus(BuildStatus.FULLYBUILT)
else:
tarball_file = open(filename)
try:
@@ -129,9 +124,9 @@
self._uploadTarball(
self.build.buildqueue_record.specific_build.branch,
tarball, logger)
+ transaction.commit()
logger.debug("Upload complete.")
finally:
- self.build.updateStatus(BuildStatus.FULLYBUILT)
tarball_file.close()
os.remove(filename)
- transaction.commit()
+ defer.returnValue(BuildStatus.FULLYBUILT)
Follow ups