← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:number-cruncher-lost-build-queue into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:number-cruncher-lost-build-queue into launchpad:master.

Commit message:
number-cruncher: Count builders as building if they have a build queue

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/393642

We don't really need to check the build queue status to determine that a builder is building; just having a build queue is sufficient, even if it's e.g. in the process of a cancelling a build.  Compare the logic in SlaveScanner.scan.

This also fixes occasional LostObjectError exceptions when a build completes between the start of the transaction taken by `self.builder_factory.update()` and the start of the transaction taken by `self._updateBuilderCounts()`, since evaluating `builder.build_queue.status` can't be done solely using the prefetched builder vitals and requires a database query.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:number-cruncher-lost-build-queue into launchpad:master.
diff --git a/lib/lp/services/statsd/numbercruncher.py b/lib/lp/services/statsd/numbercruncher.py
index 6a43c36..31191d4 100644
--- a/lib/lp/services/statsd/numbercruncher.py
+++ b/lib/lp/services/statsd/numbercruncher.py
@@ -20,10 +20,7 @@ from twisted.internet.task import LoopingCall
 from twisted.python import log
 from zope.component import getUtility
 
-from lp.buildmaster.enums import (
-    BuilderCleanStatus,
-    BuildQueueStatus,
-    )
+from lp.buildmaster.enums import BuilderCleanStatus
 from lp.buildmaster.interfaces.builder import IBuilderSet
 from lp.buildmaster.manager import PrefetchedBuilderFactory
 from lp.services.statsd.interfaces.statsd_client import IStatsdClient
@@ -109,8 +106,7 @@ class NumberCruncher(service.Service):
                     counts['disabled'] += 1
                 elif builder.clean_status == BuilderCleanStatus.CLEANING:
                     counts['cleaning'] += 1
-                elif (builder.build_queue and
-                      builder.build_queue.status == BuildQueueStatus.RUNNING):
+                elif builder.build_queue:
                     counts['building'] += 1
                 elif builder.clean_status == BuilderCleanStatus.CLEAN:
                     counts['idle'] += 1
diff --git a/lib/lp/services/statsd/tests/test_numbercruncher.py b/lib/lp/services/statsd/tests/test_numbercruncher.py
index 020ef76..9a43f6f 100644
--- a/lib/lp/services/statsd/tests/test_numbercruncher.py
+++ b/lib/lp/services/statsd/tests/test_numbercruncher.py
@@ -7,6 +7,7 @@ from __future__ import absolute_import, print_function, unicode_literals
 
 __metaclass__ = type
 
+from storm.store import Store
 from testtools.matchers import (
     Equals,
     MatchesListwise,
@@ -19,6 +20,7 @@ from zope.component import getUtility
 from lp.buildmaster.enums import BuilderCleanStatus
 from lp.buildmaster.interactor import BuilderSlave
 from lp.buildmaster.interfaces.processor import IProcessorSet
+from lp.buildmaster.model.buildqueue import BuildQueue
 from lp.buildmaster.tests.mock_slaves import OkSlave
 from lp.services.database.isolation import is_transaction_in_progress
 from lp.services.database.policy import DatabaseBlockedPolicy
@@ -46,7 +48,6 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
         transaction.commit()
         clock = task.Clock()
         manager = NumberCruncher(clock=clock)
-        manager.builder_factory.update()
         manager.updateBuilderStats()
 
         self.assertFalse(is_transaction_in_progress())
@@ -62,7 +63,6 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
         transaction.commit()
         clock = task.Clock()
         manager = NumberCruncher(clock=clock)
-        manager.builder_factory.update()
         manager.updateBuilderStats()
 
         self.assertFalse(is_transaction_in_progress())
@@ -75,14 +75,42 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
         self.assertEqual(4, len(amd64_calls))
 
     def test_correct_values_counts(self):
-        builder = self.factory.makeBuilder(
-            processors=[getUtility(IProcessorSet).getByName('amd64')])
-        builder.setCleanStatus(BuilderCleanStatus.CLEANING)
+        for _ in range(3):
+            cleaning_builder = self.factory.makeBuilder(
+                processors=[getUtility(IProcessorSet).getByName('amd64')])
+            cleaning_builder.setCleanStatus(BuilderCleanStatus.CLEANING)
+        for _ in range(4):
+            idle_builder = self.factory.makeBuilder(
+                processors=[getUtility(IProcessorSet).getByName('amd64')])
+            idle_builder.setCleanStatus(BuilderCleanStatus.CLEAN)
+            old_build = self.factory.makeSnapBuild()
+            old_build.queueBuild()
+            old_build.buildqueue_record.markAsBuilding(builder=idle_builder)
+            old_build.buildqueue_record.destroySelf()
+        builds = []
+        for _ in range(2):
+            building_builder = self.factory.makeBuilder(
+                processors=[getUtility(IProcessorSet).getByName('amd64')])
+            building_builder.setCleanStatus(BuilderCleanStatus.CLEAN)
+            build = self.factory.makeSnapBuild()
+            build.queueBuild()
+            build.buildqueue_record.markAsBuilding(builder=building_builder)
+            builds.append(build)
         self.patch(BuilderSlave, 'makeBuilderSlave', FakeMethod(OkSlave()))
         transaction.commit()
         clock = task.Clock()
         manager = NumberCruncher(clock=clock)
         manager.builder_factory.update()
+        # Simulate one of the builds having finished between the builder
+        # factory being updated and _updateBuilderCounts being called.  (In
+        # this case we count the builder as building anyway, since
+        # everything is prefetched.)  We do this in an unusual way so that
+        # Storm doesn't know that the object has been deleted until it tries
+        # to reload it.
+        Store.of(builds[1].buildqueue_record).find(
+            BuildQueue, id=builds[1].buildqueue_record.id).remove()
+        transaction.commit()
+        manager.builder_factory.update = FakeMethod()
         manager.updateBuilderStats()
 
         self.assertFalse(is_transaction_in_progress())
@@ -96,13 +124,13 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
                     'virtualized=True,env=test', 0)),
                  Equals((
                      'builders,status=building,arch=amd64,'
-                     'virtualized=True,env=test', 0)),
+                     'virtualized=True,env=test', 2)),
                  Equals((
                      'builders,status=idle,arch=amd64,'
-                     'virtualized=True,env=test', 0)),
+                     'virtualized=True,env=test', 4)),
                  Equals((
                      'builders,status=cleaning,arch=amd64,'
-                     'virtualized=True,env=test', 1))
+                     'virtualized=True,env=test', 3))
                  ]))
 
     def test_updateBuilderStats_error(self):
@@ -128,7 +156,6 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
         clock = task.Clock()
         manager = NumberCruncher(clock=clock)
         manager._updateBuilderCounts = FakeMethod()
-        manager.builder_factory.update()
         manager.updateBuilderQueues()
 
         self.assertFalse(is_transaction_in_progress())