launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #16927
[Merge] lp:~wgrant/launchpad/ppa-reset-2.0-basics into lp:launchpad
William Grant has proposed merging lp:~wgrant/launchpad/ppa-reset-2.0-basics into lp:launchpad with lp:~wgrant/launchpad/ppa-reset-2.0-model as a prerequisite.
Requested reviews:
Colin Watson (cjwatson)
For more details, see:
https://code.launchpad.net/~wgrant/launchpad/ppa-reset-2.0-basics/+merge/223397
--
https://code.launchpad.net/~wgrant/launchpad/ppa-reset-2.0-basics/+merge/223397
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
=== modified file 'lib/lp/buildmaster/interactor.py'
--- lib/lp/buildmaster/interactor.py 2014-05-10 18:40:36 +0000
+++ lib/lp/buildmaster/interactor.py 2014-06-17 13:03:35 +0000
@@ -21,6 +21,7 @@
removeSecurityProxy,
)
+from lp.buildmaster.enums import BuilderCleanStatus
from lp.buildmaster.interfaces.builder import (
BuildDaemonError,
CannotFetchFile,
@@ -216,7 +217,7 @@
BuilderVitals = namedtuple(
'BuilderVitals',
('name', 'url', 'virtualized', 'vm_host', 'builderok', 'manual',
- 'build_queue', 'version'))
+ 'build_queue', 'version', 'clean_status'))
_BQ_UNSPECIFIED = object()
@@ -226,7 +227,8 @@
build_queue = builder.currentjob
return BuilderVitals(
builder.name, builder.url, builder.virtualized, builder.vm_host,
- builder.builderok, builder.manual, build_queue, builder.version)
+ builder.builderok, builder.manual, build_queue, builder.version,
+ builder.clean_status)
class BuilderInteractor(object):
@@ -249,44 +251,6 @@
return behaviour
@classmethod
- @defer.inlineCallbacks
- def rescueIfLost(cls, vitals, slave, slave_status, expected_cookie,
- logger=None):
- """Reset the slave if its job information doesn't match the DB.
-
- This checks the build ID reported in the slave status against
- the given cookie. If it isn't building what we think it should
- be, the current build will be aborted and the slave cleaned in
- preparation for a new task.
-
- :return: A Deferred that fires when the dialog with the slave is
- finished. Its return value is True if the slave is lost,
- False otherwise.
- """
- # Determine the slave's current build cookie.
- status = slave_status['builder_status']
- slave_cookie = slave_status.get('build_id')
-
- if slave_cookie == expected_cookie:
- # The master and slave agree about the current job. Continue.
- defer.returnValue(False)
- else:
- # The master and slave disagree. The master is our master,
- # so try to rescue the slave.
- # An IDLE slave doesn't need rescuing (SlaveScanner.scan
- # will rescue the DB side instead), and we just have to wait
- # out an ABORTING one.
- if status == 'BuilderStatus.WAITING':
- yield slave.clean()
- elif status == 'BuilderStatus.BUILDING':
- yield slave.abort()
- if logger:
- logger.info(
- "Builder slave '%s' rescued from %r (expected %r)." %
- (vitals.name, slave_cookie, expected_cookie))
- defer.returnValue(True)
-
- @classmethod
def resumeSlaveHost(cls, vitals, slave):
"""Resume the slave host to a known good condition.
@@ -323,6 +287,49 @@
@classmethod
@defer.inlineCallbacks
+ def cleanSlave(cls, vitals, slave):
+ """Prepare a slave for a new build.
+
+ :return: A Deferred that fires when this stage of the resume
+ operations finishes. If the value is True, the slave is now clean.
+ If it's False, the clean is still in progress and this must be
+ called again later.
+ """
+ if vitals.virtualized:
+ # If we are building a virtual build, resume the virtual
+ # machine. Before we try and contact the resumed slave,
+ # we're going to send it a message. This is to ensure
+ # it's accepting packets from the outside world, because
+ # testing has shown that the first packet will randomly
+ # fail for no apparent reason. This could be a quirk of
+ # the Xen guest, we're not sure. We also don't care
+ # about the result from this message, just that it's
+ # sent, hence the "addBoth". See bug 586359.
+ yield cls.resumeSlaveHost(vitals, slave)
+ yield slave.echo("ping")
+ defer.returnValue(True)
+ else:
+ slave_status = yield slave.status()
+ status = slave_status.get('builder_status', None)
+ if status == 'BuilderStatus.IDLE':
+ # This is as clean as we can get it.
+ defer.returnValue(True)
+ elif status == 'BuilderStatus.BUILDING':
+ # Asynchronously abort() the slave and wait until WAITING.
+ yield slave.abort()
+ defer.returnValue(False)
+ elif status == 'BuilderStatus.ABORTING':
+ # Wait it out until WAITING.
+ defer.returnValue(False)
+ elif status == 'BuilderStatus.WAITING':
+ # Just a synchronous clean() call and we'll be idle.
+ yield slave.clean()
+ defer.returnValue(True)
+ raise BuildDaemonError(
+ "Invalid status during clean: %r" % status)
+
+ @classmethod
+ @defer.inlineCallbacks
def _startBuild(cls, build_queue_item, vitals, builder, slave, behaviour,
logger):
"""Start a build on this builder.
@@ -342,17 +349,12 @@
raise BuildDaemonError(
"Attempted to start a build on a known-bad builder.")
- # If we are building a virtual build, resume the virtual
- # machine. Before we try and contact the resumed slave, we're
- # going to send it a message. This is to ensure it's accepting
- # packets from the outside world, because testing has shown that
- # the first packet will randomly fail for no apparent reason.
- # This could be a quirk of the Xen guest, we're not sure. We
- # also don't care about the result from this message, just that
- # it's sent, hence the "addBoth". See bug 586359.
- if builder.virtualized:
- yield cls.resumeSlaveHost(vitals, slave)
- yield slave.echo("ping")
+ if builder.clean_status != BuilderCleanStatus.CLEAN:
+ raise BuildDaemonError(
+ "Attempted to start build on a dirty slave.")
+
+ builder.setCleanStatus(BuilderCleanStatus.DIRTY)
+ transaction.commit()
yield behaviour.dispatchBuildToSlave(build_queue_item.id, logger)
@@ -448,9 +450,9 @@
:return: A Deferred that fires when the slave dialog is finished.
"""
# IDLE is deliberately not handled here, because it should be
- # impossible to get past rescueIfLost unless the slave matches
- # the DB, and this method isn't called unless the DB says
- # there's a job.
+ # impossible to get past the cookie check unless the slave
+ # matches the DB, and this method isn't called unless the DB
+ # says there's a job.
builder_status = slave_status['builder_status']
if builder_status == 'BuilderStatus.BUILDING':
# Build still building, collect the logtail.
=== modified file 'lib/lp/buildmaster/manager.py'
--- lib/lp/buildmaster/manager.py 2014-02-05 23:22:30 +0000
+++ lib/lp/buildmaster/manager.py 2014-06-17 13:03:35 +0000
@@ -26,6 +26,7 @@
from zope.component import getUtility
from lp.buildmaster.enums import (
+ BuilderCleanStatus,
BuildQueueStatus,
BuildStatus,
)
@@ -413,70 +414,91 @@
vitals = self.builder_factory.getVitals(self.builder_name)
interactor = self.interactor_factory()
slave = self.slave_factory(vitals)
- if vitals.builderok:
- self.logger.debug("Scanning %s." % self.builder_name)
- slave_status = yield slave.status()
- else:
- slave_status = None
-
- # Confirm that the DB and slave sides are in a valid, mutually
- # agreeable state.
- lost_reason = None
- if not vitals.builderok:
- lost_reason = '%s is disabled' % vitals.name
- else:
- self.updateVersion(vitals, slave_status)
- cancelled = yield self.checkCancellation(vitals, slave, interactor)
- if cancelled:
- return
- assert slave_status is not None
- lost = yield interactor.rescueIfLost(
- vitals, slave, slave_status, self.getExpectedCookie(vitals),
- self.logger)
- if lost:
- lost_reason = '%s is lost' % vitals.name
-
- # The slave is lost or the builder is disabled. We can't
- # continue to update the job status or dispatch a new job, so
- # just rescue the assigned job, if any, so it can be dispatched
- # to another slave.
- if lost_reason is not None:
- if vitals.build_queue is not None:
+
+ if vitals.build_queue is not None:
+ if vitals.clean_status != BuilderCleanStatus.DIRTY:
+ # This is probably a grave bug with security implications,
+ # as a slave that has a job must be cleaned afterwards.
+ raise BuildDaemonError("Non-dirty builder allegedly building.")
+
+ lost_reason = None
+ if not vitals.builderok:
+ lost_reason = '%s is disabled' % vitals.name
+ else:
+ slave_status = yield slave.status()
+ # Ensure that the slave has the job that we think it
+ # should.
+ slave_cookie = slave_status.get('build_id')
+ expected_cookie = self.getExpectedCookie(vitals)
+ if slave_cookie != expected_cookie:
+ lost_reason = (
+ '%s is lost (expected %r, got %r)' % (
+ vitals.name, expected_cookie, slave_cookie))
+
+ if lost_reason is not None:
+ # The slave is either confused or disabled, so reset and
+ # requeue the job. The next scan cycle will clean up the
+ # slave if appropriate.
self.logger.warn(
"%s. Resetting BuildQueue %d.", lost_reason,
vitals.build_queue.id)
vitals.build_queue.reset()
transaction.commit()
- return
-
- # We've confirmed that the slave state matches the DB. Continue
- # with updating the job status, or dispatching a new job if the
- # builder is idle.
- if vitals.build_queue is not None:
- # Scan the slave and get the logtail, or collect the build
- # if it's ready. Yes, "updateBuild" is a bad name.
+ return
+
+ cancelled = yield self.checkCancellation(
+ vitals, slave, interactor)
+ if cancelled:
+ return
+
+ # The slave and DB agree on the builder's state. Scan the
+ # slave and get the logtail, or collect the build if it's
+ # ready. Yes, "updateBuild" is a bad name.
assert slave_status is not None
yield interactor.updateBuild(
vitals, slave, slave_status, self.builder_factory,
self.behaviour_factory)
- elif vitals.manual:
- # If the builder is in manual mode, don't dispatch anything.
- self.logger.debug(
- '%s is in manual mode, not dispatching.' % vitals.name)
else:
- # See if there is a job we can dispatch to the builder slave.
- builder = self.builder_factory[self.builder_name]
- # Try to dispatch the job. If it fails, don't attempt to
- # just retry the scan; we need to reset the job so the
- # dispatch will be reattempted.
- d = interactor.findAndStartJob(vitals, builder, slave)
- d.addErrback(functools.partial(self._scanFailed, False))
- yield d
- if builder.currentjob is not None:
- # After a successful dispatch we can reset the
- # failure_count.
- builder.resetFailureCount()
- transaction.commit()
+ if not vitals.builderok:
+ return
+ # We think the builder is idle. If it's clean, dispatch. If
+ # it's dirty, clean.
+ if vitals.clean_status == BuilderCleanStatus.CLEAN:
+ slave_status = yield slave.status()
+ if slave_status.get('builder_status') != 'BuilderStatus.IDLE':
+ raise BuildDaemonError(
+ 'Allegedly clean slave not idle (%r instead)'
+ % slave_status.get('builder_status'))
+ self.updateVersion(vitals, slave_status)
+ if vitals.manual:
+ # If the builder is in manual mode, don't dispatch
+ # anything.
+ self.logger.debug(
+ '%s is in manual mode, not dispatching.', vitals.name)
+ return
+ # Try to find and dispatch a job. If it fails, don't
+ # attempt to just retry the scan; we need to reset
+ # the job so the dispatch will be reattempted.
+ builder = self.builder_factory[self.builder_name]
+ d = interactor.findAndStartJob(vitals, builder, slave)
+ d.addErrback(functools.partial(self._scanFailed, False))
+ yield d
+ if builder.currentjob is not None:
+ # After a successful dispatch we can reset the
+ # failure_count.
+ builder.resetFailureCount()
+ transaction.commit()
+ else:
+ # Ask the BuilderInteractor to clean the slave. It might
+ # be immediately cleaned on return, in which case we go
+ # straight back to CLEAN, or we might have to spin
+ # through another few cycles.
+ done = yield interactor.cleanSlave(vitals, slave)
+ if done:
+ builder = self.builder_factory[self.builder_name]
+ builder.setCleanStatus(BuilderCleanStatus.CLEAN)
+ self.logger.debug('%s has been cleaned.', vitals.name)
+ transaction.commit()
class NewBuildersScanner:
=== modified file 'lib/lp/buildmaster/tests/mock_slaves.py'
--- lib/lp/buildmaster/tests/mock_slaves.py 2014-06-06 10:32:55 +0000
+++ lib/lp/buildmaster/tests/mock_slaves.py 2014-06-17 13:03:35 +0000
@@ -30,6 +30,7 @@
from twisted.internet import defer
from twisted.web import xmlrpc
+from lp.buildmaster.enums import BuilderCleanStatus
from lp.buildmaster.interactor import BuilderSlave
from lp.buildmaster.interfaces.builder import CannotFetchFile
from lp.services.config import config
@@ -48,7 +49,7 @@
def __init__(self, name='mock-builder', builderok=True, manual=False,
virtualized=True, vm_host=None, url='http://fake:0000',
- version=None):
+ version=None, clean_status=BuilderCleanStatus.DIRTY):
self.currentjob = None
self.builderok = builderok
self.manual = manual
@@ -58,6 +59,10 @@
self.vm_host = vm_host
self.failnotes = None
self.version = version
+ self.clean_status = clean_status
+
+ def setCleanStatus(self, clean_status):
+ self.clean_status = clean_status
def failBuilder(self, reason):
self.builderok = False
=== modified file 'lib/lp/buildmaster/tests/test_interactor.py'
--- lib/lp/buildmaster/tests/test_interactor.py 2014-06-06 11:15:29 +0000
+++ lib/lp/buildmaster/tests/test_interactor.py 2014-06-17 13:03:35 +0000
@@ -16,19 +16,24 @@
SynchronousDeferredRunTest,
)
from testtools.matchers import ContainsAll
+from testtools.testcase import ExpectedException
from twisted.internet import defer
from twisted.internet.task import Clock
from twisted.python.failure import Failure
from twisted.web.client import getPage
from zope.security.proxy import removeSecurityProxy
-from lp.buildmaster.enums import BuildStatus
+from lp.buildmaster.enums import (
+ BuilderCleanStatus,
+ BuildStatus,
+ )
from lp.buildmaster.interactor import (
BuilderInteractor,
BuilderSlave,
extract_vitals_from_db,
)
from lp.buildmaster.interfaces.builder import (
+ BuildDaemonError,
CannotFetchFile,
CannotResumeHost,
)
@@ -151,44 +156,6 @@
self.assertEqual(5, slave.timeout)
@defer.inlineCallbacks
- def test_rescueIfLost_aborts_lost_and_broken_slave(self):
- # A slave that's 'lost' should be aborted; when the slave is
- # broken then abort() should also throw a fault.
- slave = LostBuildingBrokenSlave()
- slave_status = yield slave.status()
- try:
- yield BuilderInteractor.rescueIfLost(
- extract_vitals_from_db(MockBuilder()), slave, slave_status,
- 'trivial')
- except xmlrpclib.Fault:
- self.assertIn('abort', slave.call_log)
- else:
- self.fail("xmlrpclib.Fault not raised")
-
- @defer.inlineCallbacks
- def test_recover_idle_slave(self):
- # An idle slave is not rescued, even if it's not meant to be
- # idle. SlaveScanner.scan() will clean up the DB side, because
- # we still report that it's lost.
- slave = OkSlave()
- slave_status = yield slave.status()
- lost = yield BuilderInteractor.rescueIfLost(
- extract_vitals_from_db(MockBuilder()), slave, slave_status,
- 'trivial')
- self.assertTrue(lost)
- self.assertEqual(['status'], slave.call_log)
-
- @defer.inlineCallbacks
- def test_recover_ok_slave(self):
- # An idle slave that's meant to be idle is not rescued.
- slave = OkSlave()
- slave_status = yield slave.status()
- lost = yield BuilderInteractor.rescueIfLost(
- extract_vitals_from_db(MockBuilder()), slave, slave_status, None)
- self.assertFalse(lost)
- self.assertEqual(['status'], slave.call_log)
-
- @defer.inlineCallbacks
def test_recover_waiting_slave_with_good_id(self):
# rescueIfLost does not attempt to abort or clean a builder that is
# WAITING.
@@ -240,6 +207,76 @@
self.assertEqual(['status', 'abort'], building_slave.call_log)
+class TestBuilderInteractorCleanSlave(TestCase):
+
+ run_tests_with = AsynchronousDeferredRunTest
+
+ @defer.inlineCallbacks
+ def assertCleanCalls(self, builder, slave, calls, done):
+ actually_done = yield BuilderInteractor.cleanSlave(
+ extract_vitals_from_db(builder), slave)
+ self.assertEqual(done, actually_done)
+ self.assertEqual(calls, slave.method_log)
+
+ @defer.inlineCallbacks
+ def test_virtual(self):
+ # We don't care what the status of a virtual builder is; we just
+ # reset its VM and check that it's back.
+ yield self.assertCleanCalls(
+ MockBuilder(
+ virtualized=True, vm_host='lol',
+ clean_status=BuilderCleanStatus.DIRTY),
+ OkSlave(), ['resume', 'echo'], True)
+
+ @defer.inlineCallbacks
+ def test_nonvirtual_idle(self):
+ # An IDLE non-virtual slave is already as clean as we can get it.
+ yield self.assertCleanCalls(
+ MockBuilder(
+ virtualized=False, clean_status=BuilderCleanStatus.DIRTY),
+ OkSlave(), ['status'], True)
+
+ @defer.inlineCallbacks
+ def test_nonvirtual_building(self):
+ # A BUILDING non-virtual slave needs to be aborted. It'll go
+ # through ABORTING and eventually be picked up from WAITING.
+ yield self.assertCleanCalls(
+ MockBuilder(
+ virtualized=False, clean_status=BuilderCleanStatus.DIRTY),
+ BuildingSlave(), ['status', 'abort'], False)
+
+ @defer.inlineCallbacks
+ def test_nonvirtual_aborting(self):
+ # An ABORTING non-virtual slave must be waited out. It should
+ # hit WAITING eventually.
+ yield self.assertCleanCalls(
+ MockBuilder(
+ virtualized=False, clean_status=BuilderCleanStatus.DIRTY),
+ AbortingSlave(), ['status'], False)
+
+ @defer.inlineCallbacks
+ def test_nonvirtual_waiting(self):
+ # A WAITING non-virtual slave just needs clean() called.
+ yield self.assertCleanCalls(
+ MockBuilder(
+ virtualized=False, clean_status=BuilderCleanStatus.DIRTY),
+ WaitingSlave(), ['status', 'clean'], True)
+
+ @defer.inlineCallbacks
+ def test_nonvirtual_broken(self):
+ # A broken non-virtual builder is probably unrecoverable, so the
+ # method just crashes.
+ vitals = extract_vitals_from_db(MockBuilder(
+ virtualized=False, clean_status=BuilderCleanStatus.DIRTY))
+ slave = LostBuildingBrokenSlave()
+ try:
+ yield BuilderInteractor.cleanSlave(vitals, slave)
+ except xmlrpclib.Fault:
+ self.assertEqual(['status', 'abort'], slave.call_log)
+ else:
+ self.fail("abort() should crash.")
+
+
class TestBuilderSlaveStatus(TestCase):
# Verify what BuilderSlave.status returns with slaves in different
# states.
@@ -321,6 +358,7 @@
processor = self.factory.makeProcessor(name="i386")
builder = self.factory.makeBuilder(
processors=[processor], virtualized=True, vm_host="bladh")
+ builder.setCleanStatus(BuilderCleanStatus.CLEAN)
self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
distroseries = self.factory.makeDistroSeries()
das = self.factory.makeDistroArchSeries(
@@ -377,21 +415,30 @@
return d.addCallback(check_build_started)
- def test_virtual_job_dispatch_pings_before_building(self):
- # We need to send a ping to the builder to work around a bug
- # where sometimes the first network packet sent is dropped.
- builder, build = self._setupBinaryBuildAndBuilder()
- candidate = build.queueBuild()
- removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
- result=candidate)
- vitals = extract_vitals_from_db(builder)
- slave = OkSlave()
- d = BuilderInteractor.findAndStartJob(vitals, builder, slave)
-
- def check_build_started(candidate):
- self.assertIn(('echo', 'ping'), slave.call_log)
-
- return d.addCallback(check_build_started)
+ @defer.inlineCallbacks
+ def test_findAndStartJob_requires_clean_slave(self):
+ # findAndStartJob ensures that its slave starts CLEAN.
+ builder, build = self._setupBinaryBuildAndBuilder()
+ builder.setCleanStatus(BuilderCleanStatus.DIRTY)
+ candidate = build.queueBuild()
+ removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
+ result=candidate)
+ vitals = extract_vitals_from_db(builder)
+ with ExpectedException(
+ BuildDaemonError,
+ "Attempted to start build on a dirty slave."):
+ yield BuilderInteractor.findAndStartJob(vitals, builder, OkSlave())
+
+ @defer.inlineCallbacks
+ def test_findAndStartJob_dirties_slave(self):
+ # findAndStartJob marks its builder DIRTY before dispatching.
+ builder, build = self._setupBinaryBuildAndBuilder()
+ candidate = build.queueBuild()
+ removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
+ result=candidate)
+ vitals = extract_vitals_from_db(builder)
+ yield BuilderInteractor.findAndStartJob(vitals, builder, OkSlave())
+ self.assertEqual(BuilderCleanStatus.DIRTY, builder.clean_status)
class TestSlave(TestCase):
=== modified file 'lib/lp/buildmaster/tests/test_manager.py'
--- lib/lp/buildmaster/tests/test_manager.py 2014-06-06 10:32:55 +0000
+++ lib/lp/buildmaster/tests/test_manager.py 2014-06-17 13:03:35 +0000
@@ -13,6 +13,7 @@
AsynchronousDeferredRunTest,
)
from testtools.matchers import Equals
+from testtools.testcase import ExpectedException
import transaction
from twisted.internet import (
defer,
@@ -25,6 +26,7 @@
from zope.security.proxy import removeSecurityProxy
from lp.buildmaster.enums import (
+ BuilderCleanStatus,
BuildQueueStatus,
BuildStatus,
)
@@ -33,7 +35,10 @@
BuilderSlave,
extract_vitals_from_db,
)
-from lp.buildmaster.interfaces.builder import IBuilderSet
+from lp.buildmaster.interfaces.builder import (
+ BuildDaemonError,
+ IBuilderSet,
+ )
from lp.buildmaster.interfaces.buildfarmjobbehaviour import (
IBuildFarmJobBehaviour,
)
@@ -113,6 +118,7 @@
job = builder.currentjob
if job is not None:
job.reset()
+ builder.setCleanStatus(BuilderCleanStatus.CLEAN)
transaction.commit()
@@ -155,6 +161,7 @@
# Set this to 1 here so that _checkDispatch can make sure it's
# reset to 0 after a successful dispatch.
builder.failure_count = 1
+ builder.setCleanStatus(BuilderCleanStatus.CLEAN)
# Run 'scan' and check its result.
switch_dbuser(config.builddmaster.dbuser)
@@ -258,6 +265,7 @@
def test_scan_with_nothing_to_dispatch(self):
factory = LaunchpadObjectFactory()
builder = factory.makeBuilder()
+ builder.setCleanStatus(BuilderCleanStatus.CLEAN)
self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
transaction.commit()
scanner = self._getScanner(builder_name=builder.name)
@@ -422,9 +430,9 @@
self.assertFalse(builder.builderok)
@defer.inlineCallbacks
- def test_fail_to_resume_slave_resets_job(self):
- # If an attempt to resume and dispatch a slave fails, it should
- # reset the job via job.reset()
+ def test_fail_to_resume_leaves_it_dirty(self):
+ # If an attempt to resume a slave fails, its failure count is
+ # incremented and it is left DIRTY.
# Make a slave with a failing resume() method.
slave = OkSlave()
@@ -435,26 +443,21 @@
builder = removeSecurityProxy(
getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME])
self._resetBuilder(builder)
+ builder.setCleanStatus(BuilderCleanStatus.DIRTY)
+ builder.virtualized = True
self.assertEqual(0, builder.failure_count)
self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(slave))
builder.vm_host = "fake_vm_host"
-
- scanner = self._getScanner()
-
- # Get the next job that will be dispatched.
- job = removeSecurityProxy(builder._findBuildCandidate())
- job.virtualized = True
- builder.virtualized = True
transaction.commit()
- yield scanner.singleCycle()
-
- # The failure_count will have been incremented on the builder, we
- # can check that to see that a dispatch attempt did indeed occur.
+
+ # A spin of the scanner will see the DIRTY builder and reset it.
+ # Our patched reset will fail.
+ yield self._getScanner().singleCycle()
+
+ # The failure_count will have been incremented on the builder,
+ # and it will be left DIRTY.
self.assertEqual(1, builder.failure_count)
- # There should also be no builder set on the job.
- self.assertIsNone(job.builder)
- build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(job)
- self.assertEqual(build.status, BuildStatus.NEEDSBUILD)
+ self.assertEqual(BuilderCleanStatus.DIRTY, builder.clean_status)
@defer.inlineCallbacks
def test_update_slave_version(self):
@@ -569,15 +572,25 @@
builder.name, BuilderFactory(), BufferLogger(),
slave_factory=get_slave, clock=clock)
- # The slave is idle and there's a build candidate, so the first
- # scan will reset the builder and dispatch the build.
+ # The slave is idle and dirty, so the first scan will clean it
+ # with a reset.
+ self.assertEqual(BuilderCleanStatus.DIRTY, builder.clean_status)
+ yield scanner.scan()
+ self.assertEqual(['resume', 'echo'], get_slave.result.method_log)
+ self.assertEqual(BuilderCleanStatus.CLEAN, builder.clean_status)
+ self.assertIs(None, builder.currentjob)
+
+ # The slave is idle and clean, and there's a build candidate, so
+ # the next scan will dispatch the build.
+ get_slave.result = OkSlave()
yield scanner.scan()
self.assertEqual(
- ['status', 'resume', 'echo', 'ensurepresent', 'build'],
+ ['status', 'ensurepresent', 'build'],
get_slave.result.method_log)
self.assertEqual(bq, builder.currentjob)
self.assertEqual(BuildQueueStatus.RUNNING, bq.status)
self.assertEqual(BuildStatus.BUILDING, build.status)
+ self.assertEqual(BuilderCleanStatus.DIRTY, builder.clean_status)
# build() has been called, so switch in a BUILDING slave.
# Scans will now just do a status() each, as the logtail is
@@ -596,7 +609,8 @@
# When the build finishes, the scanner will notice, call
# handleStatus(), and then clean the builder. Our fake
# _handleStatus_OK doesn't do anything special, but there'd
- # usually be file retrievals in the middle.
+ # usually be file retrievals in the middle. The builder remains
+ # dirty afterward.
get_slave.result = WaitingSlave(
build_id=IBuildFarmJobBehaviour(build).getBuildCookie())
yield scanner.scan()
@@ -604,13 +618,15 @@
self.assertIs(None, builder.currentjob)
self.assertEqual(BuildStatus.UPLOADING, build.status)
self.assertEqual(builder, build.builder)
+ self.assertEqual(BuilderCleanStatus.DIRTY, builder.clean_status)
- # We're clean, so let's flip back to an idle slave and
- # confirm that a scan does nothing special.
+ # We're idle and dirty, so let's flip back to an idle slave and
+ # confirm that the slave gets cleaned.
get_slave.result = OkSlave()
yield scanner.scan()
- self.assertEqual(['status'], get_slave.result.method_log)
+ self.assertEqual(['resume', 'echo'], get_slave.result.method_log)
self.assertIs(None, builder.currentjob)
+ self.assertEqual(BuilderCleanStatus.CLEAN, builder.clean_status)
class TestPrefetchedBuilderFactory(TestCaseWithFactory):
@@ -740,74 +756,104 @@
run_tests_with = AsynchronousDeferredRunTest
+ def getScanner(self, builder_factory=None, interactor=None, slave=None,
+ behaviour=None):
+ if builder_factory is None:
+ builder_factory = MockBuilderFactory(
+ MockBuilder(virtualized=False), None)
+ if interactor is None:
+ interactor = BuilderInteractor()
+ interactor.updateBuild = FakeMethod()
+ if slave is None:
+ slave = OkSlave()
+ if behaviour is None:
+ behaviour = TrivialBehaviour()
+ return SlaveScanner(
+ 'mock', builder_factory, BufferLogger(),
+ interactor_factory=FakeMethod(interactor),
+ slave_factory=FakeMethod(slave),
+ behaviour_factory=FakeMethod(behaviour))
+
@defer.inlineCallbacks
def test_scan_with_job(self):
# SlaveScanner.scan calls updateBuild() when a job is building.
slave = BuildingSlave('trivial')
bq = FakeBuildQueue()
-
- # Instrument updateBuild.
- interactor = BuilderInteractor()
- interactor.updateBuild = FakeMethod()
-
- scanner = SlaveScanner(
- 'mock', MockBuilderFactory(MockBuilder(), bq), BufferLogger(),
- interactor_factory=FakeMethod(interactor),
- slave_factory=FakeMethod(slave),
- behaviour_factory=FakeMethod(TrivialBehaviour()))
+ scanner = self.getScanner(
+ builder_factory=MockBuilderFactory(MockBuilder(), bq),
+ slave=slave)
yield scanner.scan()
self.assertEqual(['status'], slave.call_log)
- self.assertEqual(1, interactor.updateBuild.call_count)
+ self.assertEqual(
+ 1, scanner.interactor_factory.result.updateBuild.call_count)
self.assertEqual(0, bq.reset.call_count)
@defer.inlineCallbacks
- def test_scan_aborts_lost_slave_with_job(self):
- # SlaveScanner.scan uses BuilderInteractor.rescueIfLost to abort
- # slaves that don't have the expected job.
+ def test_scan_recovers_lost_slave_with_job(self):
+ # SlaveScanner.scan identifies slaves that aren't building what
+ # they should be, resets the jobs, and then aborts the slaves.
slave = BuildingSlave('nontrivial')
bq = FakeBuildQueue()
-
- # Instrument updateBuild.
- interactor = BuilderInteractor()
- interactor.updateBuild = FakeMethod()
-
- scanner = SlaveScanner(
- 'mock', MockBuilderFactory(MockBuilder(), bq), BufferLogger(),
- interactor_factory=FakeMethod(interactor),
- slave_factory=FakeMethod(slave),
- behaviour_factory=FakeMethod(TrivialBehaviour()))
+ builder = MockBuilder(virtualized=False)
+ scanner = self.getScanner(
+ builder_factory=MockBuilderFactory(builder, bq),
+ slave=slave)
# A single scan will call status(), notice that the slave is lost,
- # abort() the slave, then reset() the job without calling
- # updateBuild().
+ # and reset() the job without calling updateBuild().
yield scanner.scan()
- self.assertEqual(['status', 'abort'], slave.call_log)
- self.assertEqual(0, interactor.updateBuild.call_count)
+ self.assertEqual(['status'], slave.call_log)
+ self.assertEqual(
+ 0, scanner.interactor_factory.result.updateBuild.call_count)
self.assertEqual(1, bq.reset.call_count)
+ # The reset would normally have unset build_queue.
+ scanner.builder_factory.updateTestData(builder, None)
+
+ # The next scan will see a dirty idle builder with a BUILDING
+ # slave, and abort() it.
+ yield scanner.scan()
+ self.assertEqual(['status', 'status', 'abort'], slave.call_log)
@defer.inlineCallbacks
- def test_scan_aborts_lost_slave_when_idle(self):
- # SlaveScanner.scan uses BuilderInteractor.rescueIfLost to abort
- # slaves that aren't meant to have a job.
+ def test_scan_recovers_lost_slave_when_idle(self):
+ # SlaveScanner.scan identifies slaves that are building when
+ # they shouldn't be and aborts them.
slave = BuildingSlave()
-
- # Instrument updateBuild.
- interactor = BuilderInteractor()
- interactor.updateBuild = FakeMethod()
-
- scanner = SlaveScanner(
- 'mock', MockBuilderFactory(MockBuilder(), None), BufferLogger(),
- interactor_factory=FakeMethod(interactor),
- slave_factory=FakeMethod(slave),
- behaviour_factory=FakeMethod(None))
-
- # A single scan will call status(), notice that the slave is lost,
- # abort() the slave, then reset() the job without calling
- # updateBuild().
+ scanner = self.getScanner(slave=slave)
yield scanner.scan()
self.assertEqual(['status', 'abort'], slave.call_log)
- self.assertEqual(0, interactor.updateBuild.call_count)
+
+ @defer.inlineCallbacks
+ def test_scan_building_but_not_dirty_builder_explodes(self):
+ # Builders with a build assigned must be dirty for safety
+ # reasons. If we run into one that's clean, we blow up.
+ slave = BuildingSlave()
+ builder = MockBuilder(clean_status=BuilderCleanStatus.CLEAN)
+ bq = FakeBuildQueue()
+ scanner = self.getScanner(
+ slave=slave, builder_factory=MockBuilderFactory(builder, bq))
+
+ with ExpectedException(
+ BuildDaemonError, "Non-dirty builder allegedly building."):
+ yield scanner.scan()
+ self.assertEqual([], slave.call_log)
+
+ @defer.inlineCallbacks
+ def test_scan_clean_but_not_idle_slave_explodes(self):
+ # Clean builders by definition have slaves that are idle. If
+ # an ostensibly clean slave isn't idle, blow up.
+ slave = BuildingSlave()
+ builder = MockBuilder(clean_status=BuilderCleanStatus.CLEAN)
+ scanner = self.getScanner(
+ slave=slave, builder_factory=MockBuilderFactory(builder, None))
+
+ with ExpectedException(
+ BuildDaemonError,
+ "Allegedly clean slave not idle "
+ "\('BuilderStatus.BUILDING' instead\)"):
+ yield scanner.scan()
+ self.assertEqual(['status'], slave.call_log)
def test_getExpectedCookie_caches(self):
bf = MockBuilderFactory(MockBuilder(), FakeBuildQueue())
=== modified file 'lib/lp/soyuz/tests/test_binarypackagebuildbehaviour.py'
--- lib/lp/soyuz/tests/test_binarypackagebuildbehaviour.py 2014-06-06 10:32:55 +0000
+++ lib/lp/soyuz/tests/test_binarypackagebuildbehaviour.py 2014-06-17 13:03:35 +0000
@@ -19,6 +19,7 @@
from zope.security.proxy import removeSecurityProxy
from lp.buildmaster.enums import (
+ BuilderCleanStatus,
BuildQueueStatus,
BuildStatus,
)
@@ -146,6 +147,7 @@
archive = self.factory.makeArchive(virtualized=False)
slave = OkSlave()
builder = self.factory.makeBuilder(virtualized=False)
+ builder.setCleanStatus(BuilderCleanStatus.CLEAN)
vitals = extract_vitals_from_db(builder)
build = self.factory.makeBinaryPackageBuild(
builder=builder, archive=archive)
@@ -170,6 +172,7 @@
slave = OkSlave()
builder = self.factory.makeBuilder(
virtualized=True, vm_host="foohost")
+ builder.setCleanStatus(BuilderCleanStatus.CLEAN)
vitals = extract_vitals_from_db(builder)
build = self.factory.makeBinaryPackageBuild(
builder=builder, archive=archive)
@@ -198,6 +201,7 @@
virtualized=False, purpose=ArchivePurpose.PARTNER)
slave = OkSlave()
builder = self.factory.makeBuilder(virtualized=False)
+ builder.setCleanStatus(BuilderCleanStatus.CLEAN)
vitals = extract_vitals_from_db(builder)
build = self.factory.makeBinaryPackageBuild(
builder=builder, archive=archive)
Follow ups