launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #14981
[Merge] lp:~wgrant/launchpad/commit-ALL-the-build-changes into lp:launchpad
William Grant has proposed merging lp:~wgrant/launchpad/commit-ALL-the-build-changes into lp:launchpad.
Commit message:
Fix all BuildFarmJobBehaviors to commit any pending database changes before yielding to the reactor. Otherwise an intermediate scan may fail and cause the changes to be aborted.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #717969 in Launchpad itself: "storeBuildInfo is sometimes ineffective"
https://bugs.launchpad.net/launchpad/+bug/717969
For more details, see:
https://code.launchpad.net/~wgrant/launchpad/commit-ALL-the-build-changes/+merge/144440
This branch hopefully fixes bug #717969 and various other build inconsistencies that we see from buildd-manager.
buildd-manager is thoroughly Twisted and interacts with the DB. All the main operations occur in a single thread, so there's a single current transaction shared across all of the builders' scanners. Because all the build handling code was ported from the pre-Twisted buildd-manager/sequencer, it wasn't designed with this transaction sharing in mind. If a BuildFarmJobBehavior yielded back to the reactor before committing, it was possible that a scan from another builder would fail and abort the transaction before the original scan resumed to perform the commit.
Since I moved all the Twisted code into BFJB and derivatives last week, it's now relatively easy to ensure that a commit precedes each yield to the reactor. So I did that manually, inserting a transaction.commit() between any DB change and a call that might yield.
--
https://code.launchpad.net/~wgrant/launchpad/commit-ALL-the-build-changes/+merge/144440
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/commit-ALL-the-build-changes into lp:launchpad.
=== modified file 'lib/lp/buildmaster/model/buildfarmjobbehavior.py'
--- lib/lp/buildmaster/model/buildfarmjobbehavior.py 2013-01-22 07:24:05 +0000
+++ lib/lp/buildmaster/model/buildfarmjobbehavior.py 2013-01-23 06:50:26 +0000
@@ -16,7 +16,7 @@
import socket
import xmlrpclib
-from storm.store import Store
+import transaction
from twisted.internet import defer
from zope.component import getUtility
from zope.interface import implements
@@ -103,6 +103,7 @@
lfa_id = yield self.getLogFromSlave(
self.build, build_queue or self.build.buildqueue_record)
self.build.setLog(lfa_id)
+ transaction.commit()
def updateBuild(self, queueItem):
"""See `IBuildFarmJobBehavior`."""
@@ -139,6 +140,7 @@
# known.
queueItem._builder = None
queueItem.setDateStarted(None)
+ transaction.commit()
return
# Since logtail is a xmlrpclib.Binary container and it is
@@ -165,12 +167,14 @@
"Builder %s forgot about buildqueue %d -- resetting buildqueue "
"record" % (queueItem.builder.url, queueItem.id))
queueItem.reset()
+ transaction.commit()
def updateBuild_BUILDING(self, queueItem, slave_status, logtail, logger):
"""Build still building, collect the logtail"""
if queueItem.job.status != JobStatus.RUNNING:
queueItem.job.start()
queueItem.logtail = encoding.guess(str(logtail))
+ transaction.commit()
def updateBuild_ABORTING(self, queueItem, slave_status, logtail, logger):
"""Build was ABORTED.
@@ -178,6 +182,7 @@
Master-side should wait until the slave finish the process correctly.
"""
queueItem.logtail = "Waiting for slave process to be terminated"
+ transaction.commit()
def updateBuild_ABORTED(self, queueItem, slave_status, logtail, logger):
"""ABORTING process has successfully terminated.
@@ -191,6 +196,7 @@
if queueItem.job.status != JobStatus.FAILED:
queueItem.job.fail()
queueItem.specific_job.jobAborted()
+ transaction.commit()
return d.addCallback(got_cleaned)
def extractBuildStatus(self, slave_status):
@@ -269,8 +275,8 @@
if build.build_farm_job_type == BuildFarmJobType.PACKAGEBUILD:
build = build.buildqueue_record.specific_job.build
if not build.current_source_publication:
+ yield self.build.buildqueue_record.builder.cleanSlave()
build.updateStatus(BuildStatus.SUPERSEDED)
- yield self.build.buildqueue_record.builder.cleanSlave()
self.build.buildqueue_record.destroySelf()
return
@@ -324,6 +330,8 @@
build.updateStatus(
status, builder=build.buildqueue_record.builder,
slave_status=slave_status)
+ transaction.commit()
+
yield self.storeLogFromSlave()
# We only attempt the upload if we successfully copied all the
@@ -346,10 +354,7 @@
yield self.build.buildqueue_record.builder.cleanSlave()
self.build.buildqueue_record.destroySelf()
-
- # Commit so there are no race conditions with archiveuploader
- # about build.status.
- Store.of(build).commit()
+ transaction.commit()
# Move the directory used to grab the binaries into
# the incoming directory so the upload processor never
@@ -369,11 +374,13 @@
self.build.updateStatus(
status, builder=self.build.buildqueue_record.builder,
slave_status=slave_status)
+ transaction.commit()
yield self.storeLogFromSlave()
if send_notification:
self.build.notify()
yield self.build.buildqueue_record.builder.cleanSlave()
self.build.buildqueue_record.destroySelf()
+ transaction.commit()
def _handleStatus_PACKAGEFAIL(self, librarian, slave_status, logger,
send_notification):
@@ -405,6 +412,7 @@
self.build.buildqueue_record.builder.failBuilder(
"Builder returned BUILDERFAIL when asked for its status")
self.build.buildqueue_record.reset()
+ transaction.commit()
@defer.inlineCallbacks
def _handleStatus_GIVENBACK(self, librarian, slave_status, logger,
@@ -417,6 +425,7 @@
"""
yield self.build.buildqueue_record.builder.cleanSlave()
self.build.buildqueue_record.reset()
+ transaction.commit()
class IdleBuildBehavior(BuildFarmJobBehaviorBase):
=== modified file 'lib/lp/translations/model/translationtemplatesbuildbehavior.py'
--- lib/lp/translations/model/translationtemplatesbuildbehavior.py 2013-01-22 07:27:17 +0000
+++ lib/lp/translations/model/translationtemplatesbuildbehavior.py 2013-01-23 06:50:26 +0000
@@ -14,6 +14,7 @@
import os
import tempfile
+import transaction
from twisted.internet import defer
from zope.component import getUtility
from zope.interface import implements
@@ -138,6 +139,7 @@
if build_status == 'OK':
self.build.updateStatus(
BuildStatus.UPLOADING, builder=queue_item.builder)
+ transaction.commit()
logger.debug("Processing successful templates build.")
filemap = slave_status.get('filemap')
filename = yield self._readTarball(queue_item, filemap, logger)
@@ -168,8 +170,10 @@
else:
self.build.updateStatus(
BuildStatus.FAILEDTOBUILD, builder=queue_item.builder)
+ transaction.commit()
yield self.storeLogFromSlave(build_queue=queue_item)
yield queue_item.builder.cleanSlave()
queue_item.destroySelf()
+ transaction.commit()