launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #00592
[Merge] lp:~julian-edwards/launchpad/catch-eintr-bug-616693 into lp:launchpad/devel
Julian Edwards has proposed merging lp:~julian-edwards/launchpad/catch-eintr-bug-616693 into lp:launchpad/devel.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
#616693 IBuilder.updateBuild() doesn't deal with EINTR
https://bugs.launchpad.net/bugs/616693
Fixes the linked bug by checking for EINTR and re-trying a few times.
--
https://code.launchpad.net/~julian-edwards/launchpad/catch-eintr-bug-616693/+merge/32517
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~julian-edwards/launchpad/catch-eintr-bug-616693 into lp:launchpad/devel.
=== modified file 'lib/lp/buildmaster/model/builder.py'
--- lib/lp/buildmaster/model/builder.py 2010-08-06 10:48:49 +0000
+++ lib/lp/buildmaster/model/builder.py 2010-08-12 20:44:20 +0000
@@ -12,6 +12,7 @@
'updateBuilderStatus',
]
+import errno
import httplib
import gzip
import logging
@@ -200,24 +201,36 @@
if logger:
logger.debug('Checking %s' % builder.name)
- try:
- builder.checkSlaveAlive()
- builder.rescueIfLost(logger)
- # Catch only known exceptions.
- # XXX cprov 2007-06-15 bug=120571: ValueError & TypeError catching is
- # disturbing in this context. We should spend sometime sanitizing the
- # exceptions raised in the Builder API since we already started the
- # main refactoring of this area.
- except (ValueError, TypeError, xmlrpclib.Fault,
- BuildDaemonError), reason:
- builder.failBuilder(str(reason))
- if logger:
- logger.warn(
- "%s (%s) marked as failed due to: %s",
- builder.name, builder.url, builder.failnotes, exc_info=True)
- except socket.error, reason:
- error_message = str(reason)
- builder.handleTimeout(logger, error_message)
+ MAX_EINTR_RETRIES = 5 # pulling a number out of my a$$ here
+ eintr_retry_count = 0
+
+ while True:
+ try:
+ builder.checkSlaveAlive()
+ builder.rescueIfLost(logger)
+ # Catch only known exceptions.
+ # XXX cprov 2007-06-15 bug=120571: ValueError & TypeError catching is
+ # disturbing in this context. We should spend sometime sanitizing the
+ # exceptions raised in the Builder API since we already started the
+ # main refactoring of this area.
+ except (ValueError, TypeError, xmlrpclib.Fault,
+ BuildDaemonError), reason:
+ builder.failBuilder(str(reason))
+ if logger:
+ logger.warn(
+ "%s (%s) marked as failed due to: %s",
+ builder.name, builder.url, builder.failnotes, exc_info=True)
+ except socket.error, reason:
+ # In Python 2.6 we can use IOError instead.
+ if reason.errno == errno.EINTR:
+ eintr_retry_count += 1
+ if eintr_retry_count != MAX_EINTR_RETRIES:
+ continue
+ error_message = str(reason)
+ builder.handleTimeout(logger, error_message)
+ return
+ else:
+ return
class Builder(SQLBase):
=== modified file 'lib/lp/buildmaster/tests/test_builder.py'
--- lib/lp/buildmaster/tests/test_builder.py 2010-08-02 16:00:50 +0000
+++ lib/lp/buildmaster/tests/test_builder.py 2010-08-12 20:44:20 +0000
@@ -3,6 +3,9 @@
"""Test Builder features."""
+import errno
+import socket
+
from zope.component import getUtility
from zope.security.proxy import removeSecurityProxy
@@ -23,6 +26,7 @@
BinaryPackageBuildBehavior)
from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
from lp.testing import TestCaseWithFactory
+from lp.testing.fakemethod import FakeMethod
class TestBuilder(TestCaseWithFactory):
@@ -43,6 +47,43 @@
bq = builder.getBuildQueue()
self.assertIs(None, bq)
+ def test_updateBuilderStatus_catches_repeated_EINTR(self):
+ # A single EINTR return from a socket operation should cause the
+ # operation to be retried, not fail/reset the builder.
+ builder = removeSecurityProxy(self.factory.makeBuilder())
+ builder.handleTimeout = FakeMethod()
+ builder.rescueIfLost = FakeMethod()
+
+ def _fake_checkSlaveAlive():
+ # Raise an EINTR error for all invocations.
+ raise socket.error(errno.EINTR, "fake eintr")
+
+ builder.checkSlaveAlive = _fake_checkSlaveAlive
+ builder.updateStatus()
+
+ # builder.updateStatus should eventually have called
+ # handleTimeout()
+ self.assertEqual(1, builder.handleTimeout.call_count)
+
+ def test_updateBuilderStatus_catches_single_EINTR(self):
+ builder = removeSecurityProxy(self.factory.makeBuilder())
+ builder.handleTimeout = FakeMethod()
+ builder.rescueIfLost = FakeMethod()
+ self.eintr_returned = False
+
+ def _fake_checkSlaveAlive():
+ # raise an EINTR error for the first invocation only.
+ if not self.eintr_returned:
+ self.eintr_returned = True
+ raise socket.error(errno.EINTR, "fake eintr")
+
+ builder.checkSlaveAlive = _fake_checkSlaveAlive
+ builder.updateStatus()
+
+ # builder.updateStatus should never call handleTimeout() for a
+ # single EINTR.
+ self.assertEqual(0, builder.handleTimeout.call_count)
+
class TestFindBuildCandidateBase(TestCaseWithFactory):
"""Setup the test publisher and some builders."""