← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~julian-edwards/launchpad/timeouts-feature-bug-681426 into lp:launchpad

 

Julian Edwards has proposed merging lp:~julian-edwards/launchpad/timeouts-feature-bug-681426 into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  #586359 Virtual builders are sometimes very slow to accept connections
  https://bugs.launchpad.net/bugs/586359


See the bug for gory details.

bin/test -cvv test_builder test_virtual_job_dispatch_pings_before_building
-- 
https://code.launchpad.net/~julian-edwards/launchpad/timeouts-feature-bug-681426/+merge/41881
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~julian-edwards/launchpad/timeouts-feature-bug-681426 into lp:launchpad.
=== modified file 'lib/lp/buildmaster/model/builder.py'
--- lib/lp/buildmaster/model/builder.py	2010-11-18 17:54:06 +0000
+++ lib/lp/buildmaster/model/builder.py	2010-11-25 17:05:53 +0000
@@ -512,39 +512,26 @@
         else:
             d = defer.succeed(None)
 
-        def resume_done(ignored):
+        def ping_done(ignored):
             return self.current_build_behavior.dispatchBuildToSlave(
                 build_queue_item.id, logger)
 
-        def eb_slave_failure(failure):
-            failure.trap(BuildSlaveFailure)
-            e = failure.value
-            self.failBuilder(
-                "Exception (%s) when setting up to new job" % (e,))
-
-        def eb_cannot_fetch_file(failure):
-            failure.trap(CannotFetchFile)
-            e = failure.value
-            message = """Slave '%s' (%s) was unable to fetch file.
-            ****** URL ********
-            %s
-            ****** INFO *******
-            %s
-            *******************
-            """ % (self.name, self.url, e.file_url, e.error_information)
-            raise BuildDaemonError(message)
-
-        def eb_socket_error(failure):
-            failure.trap(socket.error)
-            e = failure.value
-            error_message = "Exception (%s) when setting up new job" % (e,)
-            d = self.handleTimeout(logger, error_message)
-            return d.addBoth(lambda ignored: failure)
+        def resume_done(ignored):
+            # Before we try and contact the resumed slave, we're going
+            # to send it a message.  This is to ensure it's accepting
+            # packets from the outside world, because testing has shown
+            # that the first packet will randomly fail for no apparent
+            # reason.  This could be a quirk of the Xen guest, we're not
+            # sure.  We also don't care about the result from this message,
+            # just that it's sent, hence the "addBoth".
+            if self.virtualized:
+                d = self.slave.echo("ping")
+            else:
+                d = defer.succeed(None)
+            d.addBoth(ping_done)
+            return d
 
         d.addCallback(resume_done)
-        d.addErrback(eb_slave_failure)
-        d.addErrback(eb_cannot_fetch_file)
-        d.addErrback(eb_socket_error)
         return d
 
     def failBuilder(self, reason):

=== modified file 'lib/lp/buildmaster/tests/test_builder.py'
--- lib/lp/buildmaster/tests/test_builder.py	2010-11-22 10:11:44 +0000
+++ lib/lp/buildmaster/tests/test_builder.py	2010-11-25 17:05:53 +0000
@@ -247,6 +247,19 @@
             self.assertEqual(BuildStatus.BUILDING, build.status)
         return d.addCallback(check_build_started)
 
+    def test_virtual_job_dispatch_pings_before_building(self):
+        # We need to send a ping to the builder to work around a bug
+        # where sometimes the first network packet sent is dropped.
+        builder, build = self._setupBinaryBuildAndBuilder()
+        candidate = build.queueBuild()
+        removeSecurityProxy(builder)._findBuildCandidate = FakeMethod(
+            result=candidate)
+        d = builder.findAndStartJob()
+        def check_build_started(candidate):
+            self.assertIn(
+                ('echo', 'ping'), removeSecurityProxy(builder.slave).call_log)
+        return d.addCallback(check_build_started)
+
     def test_slave(self):
         # Builder.slave is a BuilderSlave that points at the actual Builder.
         # The Builder is only ever used in scripts that run outside of the


Follow ups