← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:number-cruncher-errors into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:number-cruncher-errors into launchpad:master.

Commit message:
Handle errors in number-cruncher loops

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/393599

If a LoopingCall raises an exception, then Twisted won't schedule it to be called again.  Log any exceptions and return normally instead.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:number-cruncher-errors into launchpad:master.
diff --git a/lib/lp/services/statsd/numbercruncher.py b/lib/lp/services/statsd/numbercruncher.py
index 85fa2cd..6a43c36 100644
--- a/lib/lp/services/statsd/numbercruncher.py
+++ b/lib/lp/services/statsd/numbercruncher.py
@@ -74,15 +74,19 @@ class NumberCruncher(service.Service):
         This aborts the current transaction before returning.
         """
         self.logger.debug("Updating build queue stats.")
-        queue_details = getUtility(IBuilderSet).getBuildQueueSizes()
-        for queue_type, contents in queue_details.items():
-            virt = queue_type == 'virt'
-            for arch, value in contents.items():
-                gauge_name = "buildqueue,virtualized={},arch={},env={}".format(
-                    virt, arch, self.statsd_client.lp_environment)
-                self.logger.debug("{}: {}".format(gauge_name, value[0]))
-                self.statsd_client.gauge(gauge_name, value[0])
-        self.logger.debug("Build queue stats update complete.")
+        try:
+            queue_details = getUtility(IBuilderSet).getBuildQueueSizes()
+            for queue_type, contents in queue_details.items():
+                virt = queue_type == 'virt'
+                for arch, value in contents.items():
+                    gauge_name = (
+                        "buildqueue,virtualized={},arch={},env={}".format(
+                            virt, arch, self.statsd_client.lp_environment))
+                    self.logger.debug("{}: {}".format(gauge_name, value[0]))
+                    self.statsd_client.gauge(gauge_name, value[0])
+            self.logger.debug("Build queue stats update complete.")
+        except Exception:
+            self.logger.exception("Failure while updating build queue stats:")
         transaction.abort()
 
     def _updateBuilderCounts(self):
@@ -123,8 +127,11 @@ class NumberCruncher(service.Service):
 
         This aborts the current transaction before returning.
         """
-        self.builder_factory.update()
-        self._updateBuilderCounts()
+        try:
+            self.builder_factory.update()
+            self._updateBuilderCounts()
+        except Exception:
+            self.logger.exception("Failure while updating builder stats:")
         transaction.abort()
 
     def startService(self):
diff --git a/lib/lp/services/statsd/tests/test_numbercruncher.py b/lib/lp/services/statsd/tests/test_numbercruncher.py
index ede74e1..020ef76 100644
--- a/lib/lp/services/statsd/tests/test_numbercruncher.py
+++ b/lib/lp/services/statsd/tests/test_numbercruncher.py
@@ -21,6 +21,8 @@ from lp.buildmaster.interactor import BuilderSlave
 from lp.buildmaster.interfaces.processor import IProcessorSet
 from lp.buildmaster.tests.mock_slaves import OkSlave
 from lp.services.database.isolation import is_transaction_in_progress
+from lp.services.database.policy import DatabaseBlockedPolicy
+from lp.services.log.logger import BufferLogger
 from lp.services.statsd.numbercruncher import NumberCruncher
 from lp.services.statsd.tests import StatsMixin
 from lp.testing import TestCaseWithFactory
@@ -103,6 +105,18 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
                      'virtualized=True,env=test', 1))
                  ]))
 
+    def test_updateBuilderStats_error(self):
+        clock = task.Clock()
+        cruncher = NumberCruncher(clock=clock)
+        cruncher.logger = BufferLogger()
+        with DatabaseBlockedPolicy():
+            cruncher.updateBuilderStats()
+
+        self.assertFalse(is_transaction_in_progress())
+        self.assertIn(
+            "Failure while updating builder stats:",
+            cruncher.logger.getLogBuffer())
+
     def test_updateBuilderQueues(self):
         builder = self.factory.makeBuilder(
             processors=[getUtility(IProcessorSet).getByName('amd64')])
@@ -127,6 +141,18 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
                  Equals(('buildqueue,virtualized=False,arch=386,env=test', 1))
                  ]))
 
+    def test_updateBuilderQueues_error(self):
+        clock = task.Clock()
+        cruncher = NumberCruncher(clock=clock)
+        cruncher.logger = BufferLogger()
+        with DatabaseBlockedPolicy():
+            cruncher.updateBuilderQueues()
+
+        self.assertFalse(is_transaction_in_progress())
+        self.assertIn(
+            "Failure while updating build queue stats:",
+            cruncher.logger.getLogBuffer())
+
     def test_startService_starts_update_queues_loop(self):
         clock = task.Clock()
         cruncher = NumberCruncher(clock=clock)