launchpad-reviewers team mailing list archive
  
  - 
     launchpad-reviewers team launchpad-reviewers team
- 
    Mailing list archive
  
- 
    Message #01867
  
 [Merge] lp:~jameinel/launchpad/lp-service	into lp:launchpad
  
John A Meinel has proposed merging lp:~jameinel/launchpad/lp-service into lp:launchpad.
Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  #660264 bzr+ssh on launchpad should fork, not exec
  https://bugs.launchpad.net/bugs/660264
This adds a --pid-file option to "bzr launchpad-forking-service". Which then causes the service to Daemonize itself, and write its pid to the given file.
This was brought out of the discussion of trying to get the service rolled out into qastaging.
see https://rt.admin.canonical.com/Ticket/Display.html?=42199
It would seem that 'start-stop-daemon' could be told to do the forking and pid-file management (--background and --make-pidfile), but it seems to state that "This is a last resort, and is only meant for programs that either make no sense forking on their own, or where it's not feasible to add the code for them to do this themselves."
The change seems reasonably localized, and hopefully reasonably tested. In general with IPC, I'm not always sure how to avoid both race conditions, hangs, and spurious failures. I chose 2.0s as a way to wait for the process to cleanup properly, but not block forever and hang the test suite.
-- 
https://code.launchpad.net/~jameinel/launchpad/lp-service/+merge/40386
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~jameinel/launchpad/lp-service into lp:launchpad.
=== modified file 'bzrplugins/lpserve/__init__.py'
--- bzrplugins/lpserve/__init__.py	2010-10-07 23:43:40 +0000
+++ bzrplugins/lpserve/__init__.py	2010-11-08 22:00:00 +0000
@@ -568,6 +568,10 @@
                     self.log(client_addr, 'request timeout failure: %s' % (e,))
                     conn.sendall('FAILURE\nrequest timed out\n')
                     conn.close()
+                except Exception, e:
+                    trace.log_exception_quietly()
+                    self.log(client_addr, 'trapped a failure while handling'
+                                          ' connection: %s' % (e,))
             self._poll_children()
 
     def log(self, client_addr, message):
@@ -759,6 +763,8 @@
                         help="Do/don't preload libraries before startup."),
                      Option('children-timeout', type=int, argname='SEC',
                         help="Only wait SEC seconds for children to exit"),
+                     Option('pid-file', type=unicode,
+                        help='Write the process PID to this file.')
                     ]
 
     def _preload_libraries(self):
@@ -768,8 +774,49 @@
             except ImportError, e:
                 trace.mutter('failed to preload %s: %s' % (pyname, e))
 
+
+    def _daemonize(self, pid_filename):
+        """Turn this process into a child-of-init daemon.
+
+        Upon request, we relinquish our control and switch to daemon mode,
+        writing out the final pid of the daemon process.
+        """
+        out = '/dev/null'
+        # If fork fails, it will bubble out naturally and be reported by the
+        # cmd logic
+        pid = os.fork()
+        if pid > 0:
+            # Original process exits cleanly
+            os._exit(0)
+
+        # Do we need to os.chdir('/') or anything else funky like that?
+        os.setsid()
+
+        # fork again, to truly become a daemon.
+        pid = os.fork()
+        if pid > 0:
+            os._exit(0)
+
+        # Redirect file handles
+        stdin = open(out, 'r')
+        os.dup2(stdin.fileno(), sys.stdin.fileno())
+        stdout = open(out, 'a+')
+        os.dup2(stdout.fileno(), sys.stdout.fileno())
+        stderr = open(out, 'a+', 0)
+        os.dup2(stderr.fileno(), sys.stderr.fileno())
+
+        # Now that we are a daemon, let people know what pid is running
+        f = open(pid_filename, 'wb')
+        try:
+            f.write('%d\n' % (os.getpid(),))
+        finally:
+            f.close()
+
     def run(self, path=None, perms=None, preload=True,
-            children_timeout=LPForkingService.WAIT_FOR_CHILDREN_TIMEOUT):
+            children_timeout=LPForkingService.WAIT_FOR_CHILDREN_TIMEOUT,
+            pid_file=None):
+        if pid_file is not None:
+            self._daemonize(pid_file)
         if path is None:
             path = LPForkingService.DEFAULT_PATH
         if perms is None:
@@ -781,6 +828,12 @@
         service = LPForkingService(path, perms)
         service.WAIT_FOR_CHILDREN_TIMEOUT = children_timeout
         service.main_loop()
+        if pid_file is not None:
+            try:
+                os.remove(pid_file)
+            except (OSError, IOError), e:
+                trace.mutter('Failed to cleanup pid_file: %s\n%s'
+                             % (pid_file, e))
 
 register_command(cmd_launchpad_forking_service)
 
=== modified file 'bzrplugins/lpserve/test_lpserve.py'
--- bzrplugins/lpserve/test_lpserve.py	2010-10-07 23:43:40 +0000
+++ bzrplugins/lpserve/test_lpserve.py	2010-11-08 22:00:00 +0000
@@ -1,6 +1,7 @@
 # Copyright 2010 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
+import errno
 import os
 import signal
 import socket
@@ -383,6 +384,18 @@
         self.assertContainsRe(path, '/lp-forking-service-child-')
         return path, pid, sock
 
+    def _start_subprocess(self, path, env_changes):
+        proc = self.start_bzr_subprocess(
+            ['lp-service', '--path', path, '--no-preload',
+             '--children-timeout=1'],
+            env_changes=env_changes)
+        trace.mutter('started lp-service subprocess')
+        expected = 'Listening on socket: %s\n' % (path,)
+        path_line = proc.stderr.readline()
+        trace.mutter(path_line)
+        self.assertEqual(expected, path_line)
+        return proc
+
     def start_service_subprocess(self):
         # Make sure this plugin is exposed to the subprocess
         # SLOOWWW (~2 seconds, which is why we are doing the work anyway)
@@ -406,16 +419,7 @@
         os.remove(path) # service wants create it as a socket
         env_changes = {'BZR_PLUGIN_PATH': lpserve.__path__[0],
                        'BZR_LOG': tempname}
-        proc = self.start_bzr_subprocess(
-            ['lp-service', '--path', path, '--no-preload',
-             '--children-timeout=1'],
-            env_changes=env_changes)
-        trace.mutter('started lp-service subprocess')
-        expected = 'Listening on socket: %s\n' % (path,)
-        path_line = proc.stderr.readline()
-        trace.mutter(path_line)
-        self.assertEqual(expected, path_line)
-        # The process won't delete it, so we do
+        proc = self._start_subprocess(path, env_changes)
         return proc, path
 
     def stop_service(self):
@@ -469,6 +473,9 @@
         code = int(response.split('\n', 1)[1])
         self.assertEqual(expected_code, code)
 
+
+class TestLPServiceInSubprocess(TestCaseWithLPForkingServiceSubprocess):
+
     def test_fork_lp_serve_hello(self):
         path, _, sock = self.send_fork_request('lp-serve --inet 2')
         stdout_content, stderr_content = self.communicate_with_fork(path,
@@ -532,3 +539,126 @@
 
     def test_sigint_exits_nicely(self):
         self._check_exits_nicely(signal.SIGINT)
+
+
+class TestCaseWithLPForkingServiceDaemon(
+    TestCaseWithLPForkingServiceSubprocess):
+    """Test LPForkingService interaction, when run in daemon mode."""
+
+    def _cleanup_daemon(self, pid, pid_filename):
+        try:
+            os.kill(pid, signal.SIGKILL)
+        except (OSError, IOError), e:
+            trace.mutter('failed to kill pid %d, might be already dead: %s'
+                         % (pid, e))
+        try:
+            os.remove(pid_filename)
+        except (OSError, IOError), e:
+            if e.errno != errno.ENOENT:
+                trace.mutter('failed to remove %r: %s'
+                             % (pid_filename, e))
+
+    def _start_subprocess(self, path, env_changes):
+        fd, pid_filename = tempfile.mkstemp(prefix='tmp-lp-forking-service-',
+                                            suffix='.pid')
+        self.service_pid_filename = pid_filename
+        os.close(fd)
+        proc = self.start_bzr_subprocess(
+            ['lp-service', '--path', path, '--no-preload',
+             '--children-timeout=1', '--pid-file', pid_filename],
+            env_changes=env_changes)
+        trace.mutter('started lp-service daemon')
+        # We wait for the spawned process to exit, expecting it to report the
+        # final pid into the pid_filename.
+        tnow = time.time()
+        tstop_waiting = tnow + 1.0
+        proc.wait()
+        while tnow < tstop_waiting:
+            tnow = time.time()
+            f = open(pid_filename, 'rb')
+            try:
+                pid = f.read()
+            finally:
+                f.close()
+            if pid:
+                trace.mutter('found pid: %r' % (pid,))
+                break
+            else:
+                time.sleep(0.1)
+        pid = int(pid.strip())
+        # This is now the pid of the final daemon
+        trace.mutter('lp-forking-service daemon at pid %s' % (pid,))
+        # Because nothing else will clean this up, add this final handler to
+        # clean up if all else fails.
+        self.addCleanup(self._cleanup_daemon, pid, pid_filename)
+        # self.service_process will now be the pid of the daemon, rather than a
+        # Popen object.
+        return pid
+
+    def stop_service(self):
+        if self.service_process is None:
+            # Already stopped
+            return
+        # First, try to stop the service gracefully, by sending a 'quit'
+        # message
+        try:
+            response = self.send_message_to_service('quit\n')
+        except socket.error, e:
+            # Ignore a failure to connect, the service must be stopping/stopped
+            # already
+            response = None
+        if response is not None:
+            self.assertEqual('ok\nquit command requested... exiting\n',
+                             response)
+        # Wait for the process to actually exit, or force it if necessary.
+        tnow = time.time()
+        tend = tnow + 2.0
+        # We'll be nice a couple of times, and then get mean
+        attempts = [None, None, None, signal.SIGTERM, signal.SIGKILL]
+        stopped = False
+        unclean = False
+        while tnow < tend:
+            try:
+                os.kill(self.service_process, 0)
+            except (OSError, IOError), e:
+                if e.errno == errno.ESRCH:
+                    # The process has successfully exited
+                    stopped = True
+                    break
+                raise
+            else:
+                # The process has not exited yet
+                time.sleep(0.1)
+                if attempts:
+                    sig = attempts.pop(0)
+                    if sig is not None:
+                        unclean = True
+                        try:
+                            os.kill(self.service_process, sig)
+                        except (OSError, IOError), e:
+                            if e.errno == errno.ESRCH:
+                                stopped = True
+                                break
+                            raise
+        if not stopped:
+            self.fail('Unable to stop the daemon process (pid %s) after 2.0s'
+                      % (self.service_process,))
+        elif unclean:
+            self.fail('Process (pid %s) had to be shut-down'
+                      % (self.service_process,))
+        self.service_process = None
+
+    def test_simple_start_and_stop(self):
+        pass # All the work is done in setUp()
+
+    def test_starts_and_cleans_up(self):
+        # The service should be up and responsive
+        response = self.send_message_to_service('hello\n')
+        self.assertEqual('ok\nyep, still alive\n', response)
+        self.failUnless(os.path.isfile(self.service_pid_filename))
+        with open(self.service_pid_filename, 'rb') as f:
+            content = f.read()
+        self.assertEqualDiff('%d\n' % (self.service_process,), content)
+        # We're done, shut it down
+        self.stop_service()
+        self.failIf(os.path.isfile(self.service_pid_filename))
Follow ups