launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #25635
[Merge] ~cjwatson/launchpad:number-cruncher-lost-build-queue into launchpad:master
Colin Watson has proposed merging ~cjwatson/launchpad:number-cruncher-lost-build-queue into launchpad:master.
Commit message:
number-cruncher: Count builders as building if they have a build queue
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/393642
We don't really need to check the build queue status to determine that a builder is building; just having a build queue is sufficient, even if it's e.g. in the process of a cancelling a build. Compare the logic in SlaveScanner.scan.
This also fixes occasional LostObjectError exceptions when a build completes between the start of the transaction taken by `self.builder_factory.update()` and the start of the transaction taken by `self._updateBuilderCounts()`, since evaluating `builder.build_queue.status` can't be done solely using the prefetched builder vitals and requires a database query.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:number-cruncher-lost-build-queue into launchpad:master.
diff --git a/lib/lp/services/statsd/numbercruncher.py b/lib/lp/services/statsd/numbercruncher.py
index 6a43c36..31191d4 100644
--- a/lib/lp/services/statsd/numbercruncher.py
+++ b/lib/lp/services/statsd/numbercruncher.py
@@ -20,10 +20,7 @@ from twisted.internet.task import LoopingCall
from twisted.python import log
from zope.component import getUtility
-from lp.buildmaster.enums import (
- BuilderCleanStatus,
- BuildQueueStatus,
- )
+from lp.buildmaster.enums import BuilderCleanStatus
from lp.buildmaster.interfaces.builder import IBuilderSet
from lp.buildmaster.manager import PrefetchedBuilderFactory
from lp.services.statsd.interfaces.statsd_client import IStatsdClient
@@ -109,8 +106,7 @@ class NumberCruncher(service.Service):
counts['disabled'] += 1
elif builder.clean_status == BuilderCleanStatus.CLEANING:
counts['cleaning'] += 1
- elif (builder.build_queue and
- builder.build_queue.status == BuildQueueStatus.RUNNING):
+ elif builder.build_queue:
counts['building'] += 1
elif builder.clean_status == BuilderCleanStatus.CLEAN:
counts['idle'] += 1
diff --git a/lib/lp/services/statsd/tests/test_numbercruncher.py b/lib/lp/services/statsd/tests/test_numbercruncher.py
index 020ef76..9a43f6f 100644
--- a/lib/lp/services/statsd/tests/test_numbercruncher.py
+++ b/lib/lp/services/statsd/tests/test_numbercruncher.py
@@ -7,6 +7,7 @@ from __future__ import absolute_import, print_function, unicode_literals
__metaclass__ = type
+from storm.store import Store
from testtools.matchers import (
Equals,
MatchesListwise,
@@ -19,6 +20,7 @@ from zope.component import getUtility
from lp.buildmaster.enums import BuilderCleanStatus
from lp.buildmaster.interactor import BuilderSlave
from lp.buildmaster.interfaces.processor import IProcessorSet
+from lp.buildmaster.model.buildqueue import BuildQueue
from lp.buildmaster.tests.mock_slaves import OkSlave
from lp.services.database.isolation import is_transaction_in_progress
from lp.services.database.policy import DatabaseBlockedPolicy
@@ -46,7 +48,6 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
transaction.commit()
clock = task.Clock()
manager = NumberCruncher(clock=clock)
- manager.builder_factory.update()
manager.updateBuilderStats()
self.assertFalse(is_transaction_in_progress())
@@ -62,7 +63,6 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
transaction.commit()
clock = task.Clock()
manager = NumberCruncher(clock=clock)
- manager.builder_factory.update()
manager.updateBuilderStats()
self.assertFalse(is_transaction_in_progress())
@@ -75,14 +75,42 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
self.assertEqual(4, len(amd64_calls))
def test_correct_values_counts(self):
- builder = self.factory.makeBuilder(
- processors=[getUtility(IProcessorSet).getByName('amd64')])
- builder.setCleanStatus(BuilderCleanStatus.CLEANING)
+ for _ in range(3):
+ cleaning_builder = self.factory.makeBuilder(
+ processors=[getUtility(IProcessorSet).getByName('amd64')])
+ cleaning_builder.setCleanStatus(BuilderCleanStatus.CLEANING)
+ for _ in range(4):
+ idle_builder = self.factory.makeBuilder(
+ processors=[getUtility(IProcessorSet).getByName('amd64')])
+ idle_builder.setCleanStatus(BuilderCleanStatus.CLEAN)
+ old_build = self.factory.makeSnapBuild()
+ old_build.queueBuild()
+ old_build.buildqueue_record.markAsBuilding(builder=idle_builder)
+ old_build.buildqueue_record.destroySelf()
+ builds = []
+ for _ in range(2):
+ building_builder = self.factory.makeBuilder(
+ processors=[getUtility(IProcessorSet).getByName('amd64')])
+ building_builder.setCleanStatus(BuilderCleanStatus.CLEAN)
+ build = self.factory.makeSnapBuild()
+ build.queueBuild()
+ build.buildqueue_record.markAsBuilding(builder=building_builder)
+ builds.append(build)
self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
transaction.commit()
clock = task.Clock()
manager = NumberCruncher(clock=clock)
manager.builder_factory.update()
+ # Simulate one of the builds having finished between the builder
+ # factory being updated and _updateBuilderCounts being called. (In
+ # this case we count the builder as building anyway, since
+ # everything is prefetched.) We do this in an unusual way so that
+ # Storm doesn't know that the object has been deleted until it tries
+ # to reload it.
+ Store.of(builds[1].buildqueue_record).find(
+ BuildQueue, id=builds[1].buildqueue_record.id).remove()
+ transaction.commit()
+ manager.builder_factory.update = FakeMethod()
manager.updateBuilderStats()
self.assertFalse(is_transaction_in_progress())
@@ -96,13 +124,13 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
'virtualized=True,env=test', 0)),
Equals((
'builders,status=building,arch=amd64,'
- 'virtualized=True,env=test', 0)),
+ 'virtualized=True,env=test', 2)),
Equals((
'builders,status=idle,arch=amd64,'
- 'virtualized=True,env=test', 0)),
+ 'virtualized=True,env=test', 4)),
Equals((
'builders,status=cleaning,arch=amd64,'
- 'virtualized=True,env=test', 1))
+ 'virtualized=True,env=test', 3))
]))
def test_updateBuilderStats_error(self):
@@ -128,7 +156,6 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
clock = task.Clock()
manager = NumberCruncher(clock=clock)
manager._updateBuilderCounts = FakeMethod()
- manager.builder_factory.update()
manager.updateBuilderQueues()
self.assertFalse(is_transaction_in_progress())