launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #16926
Re: [Merge] lp:~wgrant/launchpad/ppa-reset-2.0-basics into lp:launchpad
Review: Approve
Looks good to me; just one minor suggestion.
Diff comments:
> === modified file 'lib/lp/buildmaster/interactor.py'
> --- lib/lp/buildmaster/interactor.py 2014-05-10 18:40:36 +0000
> +++ lib/lp/buildmaster/interactor.py 2014-06-17 13:03:35 +0000
> @@ -21,6 +21,7 @@
> removeSecurityProxy,
> )
>
> +from lp.buildmaster.enums import BuilderCleanStatus
> from lp.buildmaster.interfaces.builder import (
> BuildDaemonError,
> CannotFetchFile,
> @@ -216,7 +217,7 @@
> BuilderVitals = namedtuple(
> 'BuilderVitals',
> ('name', 'url', 'virtualized', 'vm_host', 'builderok', 'manual',
> - 'build_queue', 'version'))
> + 'build_queue', 'version', 'clean_status'))
>
> _BQ_UNSPECIFIED = object()
>
> @@ -226,7 +227,8 @@
> build_queue = builder.currentjob
> return BuilderVitals(
> builder.name, builder.url, builder.virtualized, builder.vm_host,
> - builder.builderok, builder.manual, build_queue, builder.version)
> + builder.builderok, builder.manual, build_queue, builder.version,
> + builder.clean_status)
>
>
> class BuilderInteractor(object):
> @@ -249,44 +251,6 @@
> return behaviour
>
> @classmethod
> - @defer.inlineCallbacks
> - def rescueIfLost(cls, vitals, slave, slave_status, expected_cookie,
> - logger=None):
> - """Reset the slave if its job information doesn't match the DB.
> -
> - This checks the build ID reported in the slave status against
> - the given cookie. If it isn't building what we think it should
> - be, the current build will be aborted and the slave cleaned in
> - preparation for a new task.
> -
> - :return: A Deferred that fires when the dialog with the slave is
> - finished. Its return value is True if the slave is lost,
> - False otherwise.
> - """
> - # Determine the slave's current build cookie.
> - status = slave_status['builder_status']
> - slave_cookie = slave_status.get('build_id')
> -
> - if slave_cookie == expected_cookie:
> - # The master and slave agree about the current job. Continue.
> - defer.returnValue(False)
> - else:
> - # The master and slave disagree. The master is our master,
> - # so try to rescue the slave.
> - # An IDLE slave doesn't need rescuing (SlaveScanner.scan
> - # will rescue the DB side instead), and we just have to wait
> - # out an ABORTING one.
> - if status == 'BuilderStatus.WAITING':
> - yield slave.clean()
> - elif status == 'BuilderStatus.BUILDING':
> - yield slave.abort()
> - if logger:
> - logger.info(
> - "Builder slave '%s' rescued from %r (expected %r)." %
> - (vitals.name, slave_cookie, expected_cookie))
> - defer.returnValue(True)
> -
> - @classmethod
> def resumeSlaveHost(cls, vitals, slave):
> """Resume the slave host to a known good condition.
>
> @@ -323,6 +287,49 @@
>
> @classmethod
> @defer.inlineCallbacks
> + def cleanSlave(cls, vitals, slave):
> + """Prepare a slave for a new build.
> +
> + :return: A Deferred that fires when this stage of the resume
> + operations finishes. If the value is True, the slave is now clean.
> + If it's False, the clean is still in progress and this must be
> + called again later.
> + """
> + if vitals.virtualized:
> + # If we are building a virtual build, resume the virtual
> + # machine. Before we try and contact the resumed slave,
> + # we're going to send it a message. This is to ensure
> + # it's accepting packets from the outside world, because
> + # testing has shown that the first packet will randomly
> + # fail for no apparent reason. This could be a quirk of
> + # the Xen guest, we're not sure. We also don't care
> + # about the result from this message, just that it's
> + # sent, hence the "addBoth". See bug 586359.
> + yield cls.resumeSlaveHost(vitals, slave)
> + yield slave.echo("ping")
> + defer.returnValue(True)
> + else:
> + slave_status = yield slave.status()
> + status = slave_status.get('builder_status', None)
> + if status == 'BuilderStatus.IDLE':
> + # This is as clean as we can get it.
> + defer.returnValue(True)
> + elif status == 'BuilderStatus.BUILDING':
> + # Asynchronously abort() the slave and wait until WAITING.
> + yield slave.abort()
> + defer.returnValue(False)
> + elif status == 'BuilderStatus.ABORTING':
> + # Wait it out until WAITING.
> + defer.returnValue(False)
> + elif status == 'BuilderStatus.WAITING':
> + # Just a synchronous clean() call and we'll be idle.
> + yield slave.clean()
> + defer.returnValue(True)
> + raise BuildDaemonError(
> + "Invalid status during clean: %r" % status)
> +
> + @classmethod
> + @defer.inlineCallbacks
> def _startBuild(cls, build_queue_item, vitals, builder, slave, behaviour,
> logger):
> """Start a build on this builder.
> @@ -342,17 +349,12 @@
> raise BuildDaemonError(
> "Attempted to start a build on a known-bad builder.")
>
> - # If we are building a virtual build, resume the virtual
> - # machine. Before we try and contact the resumed slave, we're
> - # going to send it a message. This is to ensure it's accepting
> - # packets from the outside world, because testing has shown that
> - # the first packet will randomly fail for no apparent reason.
> - # This could be a quirk of the Xen guest, we're not sure. We
> - # also don't care about the result from this message, just that
> - # it's sent, hence the "addBoth". See bug 586359.
> - if builder.virtualized:
> - yield cls.resumeSlaveHost(vitals, slave)
> - yield slave.echo("ping")
> + if builder.clean_status != BuilderCleanStatus.CLEAN:
> + raise BuildDaemonError(
> + "Attempted to start build on a dirty slave.")
> +
> + builder.setCleanStatus(BuilderCleanStatus.DIRTY)
> + transaction.commit()
>
> yield behaviour.dispatchBuildToSlave(build_queue_item.id, logger)
>
> @@ -448,9 +450,9 @@
> :return: A Deferred that fires when the slave dialog is finished.
> """
> # IDLE is deliberately not handled here, because it should be
> - # impossible to get past rescueIfLost unless the slave matches
> - # the DB, and this method isn't called unless the DB says
> - # there's a job.
> + # impossible to get past the cookie check unless the slave
> + # matches the DB, and this method isn't called unless the DB
> + # says there's a job.
> builder_status = slave_status['builder_status']
> if builder_status == 'BuilderStatus.BUILDING':
> # Build still building, collect the logtail.
>
> === modified file 'lib/lp/buildmaster/manager.py'
> --- lib/lp/buildmaster/manager.py 2014-02-05 23:22:30 +0000
> +++ lib/lp/buildmaster/manager.py 2014-06-17 13:03:35 +0000
> @@ -26,6 +26,7 @@
> from zope.component import getUtility
>
> from lp.buildmaster.enums import (
> + BuilderCleanStatus,
> BuildQueueStatus,
> BuildStatus,
> )
> @@ -413,70 +414,91 @@
> vitals = self.builder_factory.getVitals(self.builder_name)
> interactor = self.interactor_factory()
> slave = self.slave_factory(vitals)
> - if vitals.builderok:
> - self.logger.debug("Scanning %s." % self.builder_name)
> - slave_status = yield slave.status()
> - else:
> - slave_status = None
> -
> - # Confirm that the DB and slave sides are in a valid, mutually
> - # agreeable state.
> - lost_reason = None
> - if not vitals.builderok:
> - lost_reason = '%s is disabled' % vitals.name
> - else:
> - self.updateVersion(vitals, slave_status)
> - cancelled = yield self.checkCancellation(vitals, slave, interactor)
> - if cancelled:
> - return
> - assert slave_status is not None
> - lost = yield interactor.rescueIfLost(
> - vitals, slave, slave_status, self.getExpectedCookie(vitals),
> - self.logger)
> - if lost:
> - lost_reason = '%s is lost' % vitals.name
> -
> - # The slave is lost or the builder is disabled. We can't
> - # continue to update the job status or dispatch a new job, so
> - # just rescue the assigned job, if any, so it can be dispatched
> - # to another slave.
> - if lost_reason is not None:
> - if vitals.build_queue is not None:
> +
> + if vitals.build_queue is not None:
> + if vitals.clean_status != BuilderCleanStatus.DIRTY:
> + # This is probably a grave bug with security implications,
> + # as a slave that has a job must be cleaned afterwards.
> + raise BuildDaemonError("Non-dirty builder allegedly building.")
> +
> + lost_reason = None
> + if not vitals.builderok:
> + lost_reason = '%s is disabled' % vitals.name
> + else:
> + slave_status = yield slave.status()
> + # Ensure that the slave has the job that we think it
> + # should.
> + slave_cookie = slave_status.get('build_id')
> + expected_cookie = self.getExpectedCookie(vitals)
> + if slave_cookie != expected_cookie:
> + lost_reason = (
> + '%s is lost (expected %r, got %r)' % (
> + vitals.name, expected_cookie, slave_cookie))
> +
> + if lost_reason is not None:
> + # The slave is either confused or disabled, so reset and
> + # requeue the job. The next scan cycle will clean up the
> + # slave if appropriate.
> self.logger.warn(
> "%s. Resetting BuildQueue %d.", lost_reason,
> vitals.build_queue.id)
> vitals.build_queue.reset()
> transaction.commit()
> - return
> -
> - # We've confirmed that the slave state matches the DB. Continue
> - # with updating the job status, or dispatching a new job if the
> - # builder is idle.
> - if vitals.build_queue is not None:
> - # Scan the slave and get the logtail, or collect the build
> - # if it's ready. Yes, "updateBuild" is a bad name.
> + return
> +
> + cancelled = yield self.checkCancellation(
> + vitals, slave, interactor)
> + if cancelled:
> + return
> +
> + # The slave and DB agree on the builder's state. Scan the
> + # slave and get the logtail, or collect the build if it's
> + # ready. Yes, "updateBuild" is a bad name.
> assert slave_status is not None
> yield interactor.updateBuild(
> vitals, slave, slave_status, self.builder_factory,
> self.behaviour_factory)
> - elif vitals.manual:
> - # If the builder is in manual mode, don't dispatch anything.
> - self.logger.debug(
> - '%s is in manual mode, not dispatching.' % vitals.name)
> else:
> - # See if there is a job we can dispatch to the builder slave.
> - builder = self.builder_factory[self.builder_name]
> - # Try to dispatch the job. If it fails, don't attempt to
> - # just retry the scan; we need to reset the job so the
> - # dispatch will be reattempted.
> - d = interactor.findAndStartJob(vitals, builder, slave)
> - d.addErrback(functools.partial(self._scanFailed, False))
> - yield d
> - if builder.currentjob is not None:
> - # After a successful dispatch we can reset the
> - # failure_count.
> - builder.resetFailureCount()
> - transaction.commit()
> + if not vitals.builderok:
> + return
> + # We think the builder is idle. If it's clean, dispatch. If
> + # it's dirty, clean.
> + if vitals.clean_status == BuilderCleanStatus.CLEAN:
> + slave_status = yield slave.status()
> + if slave_status.get('builder_status') != 'BuilderStatus.IDLE':
> + raise BuildDaemonError(
> + 'Allegedly clean slave not idle (%r instead)'
> + % slave_status.get('builder_status'))
> + self.updateVersion(vitals, slave_status)
> + if vitals.manual:
> + # If the builder is in manual mode, don't dispatch
> + # anything.
> + self.logger.debug(
> + '%s is in manual mode, not dispatching.', vitals.name)
> + return
> + # Try to find and dispatch a job. If it fails, don't
> + # attempt to just retry the scan; we need to reset
> + # the job so the dispatch will be reattempted.
> + builder = self.builder_factory[self.builder_name]
> + d = interactor.findAndStartJob(vitals, builder, slave)
> + d.addErrback(functools.partial(self._scanFailed, False))
> + yield d
> + if builder.currentjob is not None:
> + # After a successful dispatch we can reset the
> + # failure_count.
> + builder.resetFailureCount()
> + transaction.commit()
> + else:
Worth an "assert vitals.clean_status == BuilderCleanStatus.DIRTY" or similar here? CLEANING is for the new protocol, and we aren't expecting it yet, but it would have saved me a bit of thought while reading this logic.
> + # Ask the BuilderInteractor to clean the slave. It might
> + # be immediately cleaned on return, in which case we go
> + # straight back to CLEAN, or we might have to spin
> + # through another few cycles.
> + done = yield interactor.cleanSlave(vitals, slave)
> + if done:
> + builder = self.builder_factory[self.builder_name]
> + builder.setCleanStatus(BuilderCleanStatus.CLEAN)
> + self.logger.debug('%s has been cleaned.', vitals.name)
> + transaction.commit()
>
>
> class NewBuildersScanner:
>
> === modified file 'lib/lp/buildmaster/tests/mock_slaves.py'
> --- lib/lp/buildmaster/tests/mock_slaves.py 2014-06-06 10:32:55 +0000
> +++ lib/lp/buildmaster/tests/mock_slaves.py 2014-06-17 13:03:35 +0000
> @@ -30,6 +30,7 @@
> from twisted.internet import defer
> from twisted.web import xmlrpc
>
> +from lp.buildmaster.enums import BuilderCleanStatus
> from lp.buildmaster.interactor import BuilderSlave
> from lp.buildmaster.interfaces.builder import CannotFetchFile
> from lp.services.config import config
> @@ -48,7 +49,7 @@
>
> def __init__(self, name='mock-builder', builderok=True, manual=False,
> virtualized=True, vm_host=None, url='http://fake:0000',
> - version=None):
> + version=None, clean_status=BuilderCleanStatus.DIRTY):
> self.currentjob = None
> self.builderok = builderok
> self.manual = manual
> @@ -58,6 +59,10 @@
> self.vm_host = vm_host
> self.failnotes = None
> self.version = version
> + self.clean_status = clean_status
> +
> + def setCleanStatus(self, clean_status):
> + self.clean_status = clean_status
>
> def failBuilder(self, reason):
> self.builderok = False
>
> === modified file 'lib/lp/buildmaster/tests/test_interactor.py'
> --- lib/lp/buildmaster/tests/test_interactor.py 2014-06-06 11:15:29 +0000
> +++ lib/lp/buildmaster/tests/test_interactor.py 2014-06-17 13:03:35 +0000
> @@ -16,19 +16,24 @@
> SynchronousDeferredRunTest,
> )
> from testtools.matchers import ContainsAll
> +from testtools.testcase import ExpectedException
> from twisted.internet import defer
> from twisted.internet.task import Clock
> from twisted.python.failure import Failure
> from twisted.web.client import getPage
> from zope.security.proxy import removeSecurityProxy
>
> -from lp.buildmaster.enums import BuildStatus
> +from lp.buildmaster.enums import (
> + BuilderCleanStatus,
> + BuildStatus,
> + )
> from lp.buildmaster.interactor import (
> BuilderInteractor,
> BuilderSlave,
> extract_vitals_from_db,
> )
> from lp.buildmaster.interfaces.builder import (
> + BuildDaemonError,
> CannotFetchFile,
> CannotResumeHost,
> )
> @@ -151,44 +156,6 @@
> self.assertEqual(5, slave.timeout)
>
> @defer.inlineCallbacks
> - def test_rescueIfLost_aborts_lost_and_broken_slave(self):
> - # A slave that's 'lost' should be aborted; when the slave is
> - # broken then abort() should also throw a fault.
> - slave = LostBuildingBrokenSlave()
> - slave_status = yield slave.status()
> - try:
> - yield BuilderInteractor.rescueIfLost(
> - extract_vitals_from_db(MockBuilder()), slave, slave_status,
> - 'trivial')
> - except xmlrpclib.Fault:
> - self.assertIn('abort', slave.call_log)
> - else:
> - self.fail("xmlrpclib.Fault not raised")
> -
> - @defer.inlineCallbacks
> - def test_recover_idle_slave(self):
> - # An idle slave is not rescued, even if it's not meant to be
> - # idle. SlaveScanner.scan() will clean up the DB side, because
> - # we still report that it's lost.
> - slave = OkSlave()
> - slave_status = yield slave.status()
> - lost = yield BuilderInteractor.rescueIfLost(
> - extract_vitals_from_db(MockBuilder()), slave, slave_status,
> - 'trivial')
> - self.assertTrue(lost)
> - self.assertEqual(['status'], slave.call_log)
> -
> - @defer.inlineCallbacks
> - def test_recover_ok_slave(self):
> - # An idle slave that's meant to be idle is not rescued.
> - slave = OkSlave()
> - slave_status = yield slave.status()
> - lost = yield BuilderInteractor.rescueIfLost(
> - extract_vitals_from_db(MockBuilder()), slave, slave_status, None)
> - self.assertFalse(lost)
> - self.assertEqual(['status'], slave.call_log)
> -
> - @defer.inlineCallbacks
> def test_recover_waiting_slave_with_good_id(self):
> # rescueIfLost does not attempt to abort or clean a builder that is
> # WAITING.
> @@ -240,6 +207,76 @@
> self.assertEqual(['status', 'abort'], building_slave.call_log)
>
>
> +class TestBuilderInteractorCleanSlave(TestCase):
> +
> + run_tests_with = AsynchronousDeferredRunTest
> +
> + @defer.inlineCallbacks
> + def assertCleanCalls(self, builder, slave, calls, done):
> + actually_done = yield BuilderInteractor.cleanSlave(
> + extract_vitals_from_db(builder), slave)
> + self.assertEqual(done, actually_done)
> + self.assertEqual(calls, slave.method_log)
> +
> + @defer.inlineCallbacks
> + def test_virtual(self):
> + # We don't care what the status of a virtual builder is; we just
> + # reset its VM and check that it's back.
> + yield self.assertCleanCalls(
> + MockBuilder(
> + virtualized=True, vm_host='lol',
> + clean_status=BuilderCleanStatus.DIRTY),
> + OkSlave(), ['resume', 'echo'], True)
> +
> + @defer.inlineCallbacks
> + def test_nonvirtual_idle(self):
> + # An IDLE non-virtual slave is already as clean as we can get it.
> + yield self.assertCleanCalls(
> + MockBuilder(
> + virtualized=False, clean_status=BuilderCleanStatus.DIRTY),
> + OkSlave(), ['status'], True)
> +
> + @defer.inlineCallbacks
> + def test_nonvirtual_building(self):
> + # A BUILDING non-virtual slave needs to be aborted. It'll go
> + # through ABORTING and eventually be picked up from WAITING.
> + yield self.assertCleanCalls(
> + MockBuilder(
> + virtualized=False, clean_status=BuilderCleanStatus.DIRTY),
> + BuildingSlave(), ['status', 'abort'], False)
> +
> + @defer.inlineCallbacks
> + def test_nonvirtual_aborting(self):
> + # An ABORTING non-virtual slave must be waited out. It should
> + # hit WAITING eventually.
> + yield self.assertCleanCalls(
> + MockBuilder(
> + virtualized=False, clean_status=BuilderCleanStatus.DIRTY),
> + AbortingSlave(), ['status'], False)
> +
> + @defer.inlineCallbacks
> + def test_nonvirtual_waiting(self):
> + # A WAITING non-virtual slave just needs clean() called.
> + yield self.assertCleanCalls(
> + MockBuilder(
> + virtualized=False, clean_status=BuilderCleanStatus.DIRTY),
> + WaitingSlave(), ['status', 'clean'], True)
> +
> + @defer.inlineCallbacks
> + def test_nonvirtual_broken(self):
> + # A broken non-virtual builder is probably unrecoverable, so the
> + # method just crashes.
> + vitals = extract_vitals_from_db(MockBuilder(
> + virtualized=False, clean_status=BuilderCleanStatus.DIRTY))
> + slave = LostBuildingBrokenSlave()
> + try:
> + yield BuilderInteractor.cleanSlave(vitals, slave)
> + except xmlrpclib.Fault:
> + self.assertEqual(['status', 'abort'], slave.call_log)
> + else:
> + self.fail("abort() should crash.")
> +
> +
> class TestBuilderSlaveStatus(TestCase):
> # Verify what BuilderSlave.status returns with slaves in different
> # states.
> @@ -321,6 +358,7 @@
> processor = self.factory.makeProcessor(name="i386")
> builder = self.factory.makeBuilder(
> processors=[processor], virtualized=True, vm_host="bladh")
> + builder.setCleanStatus(BuilderCleanStatus.CLEAN)
> self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
> distroseries = self.factory.makeDistroSeries()
> das = self.factory.makeDistroArchSeries(
> @@ -377,21 +415,30 @@
>
> return d.addCallback(check_build_started)
>
> - def test_virtual_job_dispatch_pings_before_building(self):
> - # We need to send a ping to the builder to work around a bug
> - # where sometimes the first network packet sent is dropped.
> - builder, build = self._setupBinaryBuildAndBuilder()
> - candidate = build.queueBuild()
> - removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
> - result=candidate)
> - vitals = extract_vitals_from_db(builder)
> - slave = OkSlave()
> - d = BuilderInteractor.findAndStartJob(vitals, builder, slave)
> -
> - def check_build_started(candidate):
> - self.assertIn(('echo', 'ping'), slave.call_log)
> -
> - return d.addCallback(check_build_started)
> + @defer.inlineCallbacks
> + def test_findAndStartJob_requires_clean_slave(self):
> + # findAndStartJob ensures that its slave starts CLEAN.
> + builder, build = self._setupBinaryBuildAndBuilder()
> + builder.setCleanStatus(BuilderCleanStatus.DIRTY)
> + candidate = build.queueBuild()
> + removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
> + result=candidate)
> + vitals = extract_vitals_from_db(builder)
> + with ExpectedException(
> + BuildDaemonError,
> + "Attempted to start build on a dirty slave."):
> + yield BuilderInteractor.findAndStartJob(vitals, builder, OkSlave())
> +
> + @defer.inlineCallbacks
> + def test_findAndStartJob_dirties_slave(self):
> + # findAndStartJob marks its builder DIRTY before dispatching.
> + builder, build = self._setupBinaryBuildAndBuilder()
> + candidate = build.queueBuild()
> + removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
> + result=candidate)
> + vitals = extract_vitals_from_db(builder)
> + yield BuilderInteractor.findAndStartJob(vitals, builder, OkSlave())
> + self.assertEqual(BuilderCleanStatus.DIRTY, builder.clean_status)
>
>
> class TestSlave(TestCase):
>
> === modified file 'lib/lp/buildmaster/tests/test_manager.py'
> --- lib/lp/buildmaster/tests/test_manager.py 2014-06-06 10:32:55 +0000
> +++ lib/lp/buildmaster/tests/test_manager.py 2014-06-17 13:03:35 +0000
> @@ -13,6 +13,7 @@
> AsynchronousDeferredRunTest,
> )
> from testtools.matchers import Equals
> +from testtools.testcase import ExpectedException
> import transaction
> from twisted.internet import (
> defer,
> @@ -25,6 +26,7 @@
> from zope.security.proxy import removeSecurityProxy
>
> from lp.buildmaster.enums import (
> + BuilderCleanStatus,
> BuildQueueStatus,
> BuildStatus,
> )
> @@ -33,7 +35,10 @@
> BuilderSlave,
> extract_vitals_from_db,
> )
> -from lp.buildmaster.interfaces.builder import IBuilderSet
> +from lp.buildmaster.interfaces.builder import (
> + BuildDaemonError,
> + IBuilderSet,
> + )
> from lp.buildmaster.interfaces.buildfarmjobbehaviour import (
> IBuildFarmJobBehaviour,
> )
> @@ -113,6 +118,7 @@
> job = builder.currentjob
> if job is not None:
> job.reset()
> + builder.setCleanStatus(BuilderCleanStatus.CLEAN)
>
> transaction.commit()
>
> @@ -155,6 +161,7 @@
> # Set this to 1 here so that _checkDispatch can make sure it's
> # reset to 0 after a successful dispatch.
> builder.failure_count = 1
> + builder.setCleanStatus(BuilderCleanStatus.CLEAN)
>
> # Run 'scan' and check its result.
> switch_dbuser(config.builddmaster.dbuser)
> @@ -258,6 +265,7 @@
> def test_scan_with_nothing_to_dispatch(self):
> factory = LaunchpadObjectFactory()
> builder = factory.makeBuilder()
> + builder.setCleanStatus(BuilderCleanStatus.CLEAN)
> self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
> transaction.commit()
> scanner = self._getScanner(builder_name=builder.name)
> @@ -422,9 +430,9 @@
> self.assertFalse(builder.builderok)
>
> @defer.inlineCallbacks
> - def test_fail_to_resume_slave_resets_job(self):
> - # If an attempt to resume and dispatch a slave fails, it should
> - # reset the job via job.reset()
> + def test_fail_to_resume_leaves_it_dirty(self):
> + # If an attempt to resume a slave fails, its failure count is
> + # incremented and it is left DIRTY.
>
> # Make a slave with a failing resume() method.
> slave = OkSlave()
> @@ -435,26 +443,21 @@
> builder = removeSecurityProxy(
> getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME])
> self._resetBuilder(builder)
> + builder.setCleanStatus(BuilderCleanStatus.DIRTY)
> + builder.virtualized = True
> self.assertEqual(0, builder.failure_count)
> self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(slave))
> builder.vm_host = "fake_vm_host"
> -
> - scanner = self._getScanner()
> -
> - # Get the next job that will be dispatched.
> - job = removeSecurityProxy(builder._findBuildCandidate())
> - job.virtualized = True
> - builder.virtualized = True
> transaction.commit()
> - yield scanner.singleCycle()
> -
> - # The failure_count will have been incremented on the builder, we
> - # can check that to see that a dispatch attempt did indeed occur.
> +
> + # A spin of the scanner will see the DIRTY builder and reset it.
> + # Our patched reset will fail.
> + yield self._getScanner().singleCycle()
> +
> + # The failure_count will have been incremented on the builder,
> + # and it will be left DIRTY.
> self.assertEqual(1, builder.failure_count)
> - # There should also be no builder set on the job.
> - self.assertIsNone(job.builder)
> - build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(job)
> - self.assertEqual(build.status, BuildStatus.NEEDSBUILD)
> + self.assertEqual(BuilderCleanStatus.DIRTY, builder.clean_status)
>
> @defer.inlineCallbacks
> def test_update_slave_version(self):
> @@ -569,15 +572,25 @@
> builder.name, BuilderFactory(), BufferLogger(),
> slave_factory=get_slave, clock=clock)
>
> - # The slave is idle and there's a build candidate, so the first
> - # scan will reset the builder and dispatch the build.
> + # The slave is idle and dirty, so the first scan will clean it
> + # with a reset.
> + self.assertEqual(BuilderCleanStatus.DIRTY, builder.clean_status)
> + yield scanner.scan()
> + self.assertEqual(['resume', 'echo'], get_slave.result.method_log)
> + self.assertEqual(BuilderCleanStatus.CLEAN, builder.clean_status)
> + self.assertIs(None, builder.currentjob)
> +
> + # The slave is idle and clean, and there's a build candidate, so
> + # the next scan will dispatch the build.
> + get_slave.result = OkSlave()
> yield scanner.scan()
> self.assertEqual(
> - ['status', 'resume', 'echo', 'ensurepresent', 'build'],
> + ['status', 'ensurepresent', 'build'],
> get_slave.result.method_log)
> self.assertEqual(bq, builder.currentjob)
> self.assertEqual(BuildQueueStatus.RUNNING, bq.status)
> self.assertEqual(BuildStatus.BUILDING, build.status)
> + self.assertEqual(BuilderCleanStatus.DIRTY, builder.clean_status)
>
> # build() has been called, so switch in a BUILDING slave.
> # Scans will now just do a status() each, as the logtail is
> @@ -596,7 +609,8 @@
> # When the build finishes, the scanner will notice, call
> # handleStatus(), and then clean the builder. Our fake
> # _handleStatus_OK doesn't do anything special, but there'd
> - # usually be file retrievals in the middle.
> + # usually be file retrievals in the middle. The builder remains
> + # dirty afterward.
> get_slave.result = WaitingSlave(
> build_id=IBuildFarmJobBehaviour(build).getBuildCookie())
> yield scanner.scan()
> @@ -604,13 +618,15 @@
> self.assertIs(None, builder.currentjob)
> self.assertEqual(BuildStatus.UPLOADING, build.status)
> self.assertEqual(builder, build.builder)
> + self.assertEqual(BuilderCleanStatus.DIRTY, builder.clean_status)
>
> - # We're clean, so let's flip back to an idle slave and
> - # confirm that a scan does nothing special.
> + # We're idle and dirty, so let's flip back to an idle slave and
> + # confirm that the slave gets cleaned.
> get_slave.result = OkSlave()
> yield scanner.scan()
> - self.assertEqual(['status'], get_slave.result.method_log)
> + self.assertEqual(['resume', 'echo'], get_slave.result.method_log)
> self.assertIs(None, builder.currentjob)
> + self.assertEqual(BuilderCleanStatus.CLEAN, builder.clean_status)
>
>
> class TestPrefetchedBuilderFactory(TestCaseWithFactory):
> @@ -740,74 +756,104 @@
>
> run_tests_with = AsynchronousDeferredRunTest
>
> + def getScanner(self, builder_factory=None, interactor=None, slave=None,
> + behaviour=None):
> + if builder_factory is None:
> + builder_factory = MockBuilderFactory(
> + MockBuilder(virtualized=False), None)
> + if interactor is None:
> + interactor = BuilderInteractor()
> + interactor.updateBuild = FakeMethod()
> + if slave is None:
> + slave = OkSlave()
> + if behaviour is None:
> + behaviour = TrivialBehaviour()
> + return SlaveScanner(
> + 'mock', builder_factory, BufferLogger(),
> + interactor_factory=FakeMethod(interactor),
> + slave_factory=FakeMethod(slave),
> + behaviour_factory=FakeMethod(behaviour))
> +
> @defer.inlineCallbacks
> def test_scan_with_job(self):
> # SlaveScanner.scan calls updateBuild() when a job is building.
> slave = BuildingSlave('trivial')
> bq = FakeBuildQueue()
> -
> - # Instrument updateBuild.
> - interactor = BuilderInteractor()
> - interactor.updateBuild = FakeMethod()
> -
> - scanner = SlaveScanner(
> - 'mock', MockBuilderFactory(MockBuilder(), bq), BufferLogger(),
> - interactor_factory=FakeMethod(interactor),
> - slave_factory=FakeMethod(slave),
> - behaviour_factory=FakeMethod(TrivialBehaviour()))
> + scanner = self.getScanner(
> + builder_factory=MockBuilderFactory(MockBuilder(), bq),
> + slave=slave)
>
> yield scanner.scan()
> self.assertEqual(['status'], slave.call_log)
> - self.assertEqual(1, interactor.updateBuild.call_count)
> + self.assertEqual(
> + 1, scanner.interactor_factory.result.updateBuild.call_count)
> self.assertEqual(0, bq.reset.call_count)
>
> @defer.inlineCallbacks
> - def test_scan_aborts_lost_slave_with_job(self):
> - # SlaveScanner.scan uses BuilderInteractor.rescueIfLost to abort
> - # slaves that don't have the expected job.
> + def test_scan_recovers_lost_slave_with_job(self):
> + # SlaveScanner.scan identifies slaves that aren't building what
> + # they should be, resets the jobs, and then aborts the slaves.
> slave = BuildingSlave('nontrivial')
> bq = FakeBuildQueue()
> -
> - # Instrument updateBuild.
> - interactor = BuilderInteractor()
> - interactor.updateBuild = FakeMethod()
> -
> - scanner = SlaveScanner(
> - 'mock', MockBuilderFactory(MockBuilder(), bq), BufferLogger(),
> - interactor_factory=FakeMethod(interactor),
> - slave_factory=FakeMethod(slave),
> - behaviour_factory=FakeMethod(TrivialBehaviour()))
> + builder = MockBuilder(virtualized=False)
> + scanner = self.getScanner(
> + builder_factory=MockBuilderFactory(builder, bq),
> + slave=slave)
>
> # A single scan will call status(), notice that the slave is lost,
> - # abort() the slave, then reset() the job without calling
> - # updateBuild().
> + # and reset() the job without calling updateBuild().
> yield scanner.scan()
> - self.assertEqual(['status', 'abort'], slave.call_log)
> - self.assertEqual(0, interactor.updateBuild.call_count)
> + self.assertEqual(['status'], slave.call_log)
> + self.assertEqual(
> + 0, scanner.interactor_factory.result.updateBuild.call_count)
> self.assertEqual(1, bq.reset.call_count)
> + # The reset would normally have unset build_queue.
> + scanner.builder_factory.updateTestData(builder, None)
> +
> + # The next scan will see a dirty idle builder with a BUILDING
> + # slave, and abort() it.
> + yield scanner.scan()
> + self.assertEqual(['status', 'status', 'abort'], slave.call_log)
>
> @defer.inlineCallbacks
> - def test_scan_aborts_lost_slave_when_idle(self):
> - # SlaveScanner.scan uses BuilderInteractor.rescueIfLost to abort
> - # slaves that aren't meant to have a job.
> + def test_scan_recovers_lost_slave_when_idle(self):
> + # SlaveScanner.scan identifies slaves that are building when
> + # they shouldn't be and aborts them.
> slave = BuildingSlave()
> -
> - # Instrument updateBuild.
> - interactor = BuilderInteractor()
> - interactor.updateBuild = FakeMethod()
> -
> - scanner = SlaveScanner(
> - 'mock', MockBuilderFactory(MockBuilder(), None), BufferLogger(),
> - interactor_factory=FakeMethod(interactor),
> - slave_factory=FakeMethod(slave),
> - behaviour_factory=FakeMethod(None))
> -
> - # A single scan will call status(), notice that the slave is lost,
> - # abort() the slave, then reset() the job without calling
> - # updateBuild().
> + scanner = self.getScanner(slave=slave)
> yield scanner.scan()
> self.assertEqual(['status', 'abort'], slave.call_log)
> - self.assertEqual(0, interactor.updateBuild.call_count)
> +
> + @defer.inlineCallbacks
> + def test_scan_building_but_not_dirty_builder_explodes(self):
> + # Builders with a build assigned must be dirty for safety
> + # reasons. If we run into one that's clean, we blow up.
> + slave = BuildingSlave()
> + builder = MockBuilder(clean_status=BuilderCleanStatus.CLEAN)
> + bq = FakeBuildQueue()
> + scanner = self.getScanner(
> + slave=slave, builder_factory=MockBuilderFactory(builder, bq))
> +
> + with ExpectedException(
> + BuildDaemonError, "Non-dirty builder allegedly building."):
> + yield scanner.scan()
> + self.assertEqual([], slave.call_log)
> +
> + @defer.inlineCallbacks
> + def test_scan_clean_but_not_idle_slave_explodes(self):
> + # Clean builders by definition have slaves that are idle. If
> + # an ostensibly clean slave isn't idle, blow up.
> + slave = BuildingSlave()
> + builder = MockBuilder(clean_status=BuilderCleanStatus.CLEAN)
> + scanner = self.getScanner(
> + slave=slave, builder_factory=MockBuilderFactory(builder, None))
> +
> + with ExpectedException(
> + BuildDaemonError,
> + "Allegedly clean slave not idle "
> + "\('BuilderStatus.BUILDING' instead\)"):
> + yield scanner.scan()
> + self.assertEqual(['status'], slave.call_log)
>
> def test_getExpectedCookie_caches(self):
> bf = MockBuilderFactory(MockBuilder(), FakeBuildQueue())
>
> === modified file 'lib/lp/soyuz/tests/test_binarypackagebuildbehaviour.py'
> --- lib/lp/soyuz/tests/test_binarypackagebuildbehaviour.py 2014-06-06 10:32:55 +0000
> +++ lib/lp/soyuz/tests/test_binarypackagebuildbehaviour.py 2014-06-17 13:03:35 +0000
> @@ -19,6 +19,7 @@
> from zope.security.proxy import removeSecurityProxy
>
> from lp.buildmaster.enums import (
> + BuilderCleanStatus,
> BuildQueueStatus,
> BuildStatus,
> )
> @@ -146,6 +147,7 @@
> archive = self.factory.makeArchive(virtualized=False)
> slave = OkSlave()
> builder = self.factory.makeBuilder(virtualized=False)
> + builder.setCleanStatus(BuilderCleanStatus.CLEAN)
> vitals = extract_vitals_from_db(builder)
> build = self.factory.makeBinaryPackageBuild(
> builder=builder, archive=archive)
> @@ -170,6 +172,7 @@
> slave = OkSlave()
> builder = self.factory.makeBuilder(
> virtualized=True, vm_host="foohost")
> + builder.setCleanStatus(BuilderCleanStatus.CLEAN)
> vitals = extract_vitals_from_db(builder)
> build = self.factory.makeBinaryPackageBuild(
> builder=builder, archive=archive)
> @@ -198,6 +201,7 @@
> virtualized=False, purpose=ArchivePurpose.PARTNER)
> slave = OkSlave()
> builder = self.factory.makeBuilder(virtualized=False)
> + builder.setCleanStatus(BuilderCleanStatus.CLEAN)
> vitals = extract_vitals_from_db(builder)
> build = self.factory.makeBinaryPackageBuild(
> builder=builder, archive=archive)
>
--
https://code.launchpad.net/~wgrant/launchpad/ppa-reset-2.0-basics/+merge/223397
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.