← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/loggerhead-shutdown-race into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/loggerhead-shutdown-race into lp:launchpad.

Commit message:
Fix stop-loggerhead to do a two-stage kill.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/loggerhead-shutdown-race/+merge/352884

This avoids problems during deployments where stop-loggerhead exits before the old process has actually stopped.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/loggerhead-shutdown-race into lp:launchpad.
=== modified file 'lib/lp/services/osutils.py'
--- lib/lp/services/osutils.py	2018-06-06 12:46:56 +0000
+++ lib/lp/services/osutils.py	2018-08-10 10:18:30 +0000
@@ -110,13 +110,15 @@
         raise
 
 
-def two_stage_kill(pid, poll_interval=0.1, num_polls=50):
+def two_stage_kill(pid, poll_interval=0.1, num_polls=50, get_status=True):
     """Kill process 'pid' with SIGTERM. If it doesn't die, SIGKILL it.
 
     :param pid: The pid of the process to kill.
     :param poll_interval: The polling interval used to check if the
         process is still around.
     :param num_polls: The number of polls to do before doing a SIGKILL.
+    :param get_status: If True, collect the process' exit status (which
+        requires it to be a child of the process running this function).
     """
     # Kill the process.
     _kill_may_race(pid, SIGTERM)
@@ -124,11 +126,16 @@
     # Poll until the process has ended.
     for i in range(num_polls):
         try:
-            # Reap the child process and get its return value. If it's not
-            # gone yet, continue.
-            new_pid, result = os.waitpid(pid, os.WNOHANG)
-            if new_pid:
-                return result
+            if get_status:
+                # Reap the child process and get its return value. If it's
+                # not gone yet, continue.
+                new_pid, result = os.waitpid(pid, os.WNOHANG)
+                if new_pid:
+                    return result
+            else:
+                # If the process isn't gone yet, continue.
+                if not process_exists(pid):
+                    return
             time.sleep(poll_interval)
         except OSError as e:
             if e.errno in (errno.ESRCH, errno.ECHILD):

=== modified file 'scripts/stop-loggerhead.py'
--- scripts/stop-loggerhead.py	2018-06-06 12:46:56 +0000
+++ scripts/stop-loggerhead.py	2018-08-10 10:18:30 +0000
@@ -8,10 +8,12 @@
 import _pythonpath
 
 from optparse import OptionParser
-import os
-import signal
 import sys
 
+from lp.services.osutils import (
+    process_exists,
+    two_stage_kill,
+    )
 from lp.services.pidfile import get_pid
 
 
@@ -20,9 +22,11 @@
 
 pid = get_pid("codebrowse")
 
-try:
-    os.kill(pid, 0)
-except OSError as e:
+if pid is None:
+    # Already stopped.
+    sys.exit(0)
+
+if not process_exists(pid):
     print('Stale pid file; server is not running.')
     sys.exit(1)
 
@@ -30,4 +34,5 @@
 print('Shutting down previous server @ pid %d.' % (pid,))
 print()
 
-os.kill(pid, signal.SIGTERM)
+# A busy gunicorn can take a while to shut down.
+two_stage_kill(pid, poll_interval=0.5, num_polls=120, get_status=False)


Follow ups