launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #22804
[Merge] lp:~cjwatson/launchpad/loggerhead-shutdown-race into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/loggerhead-shutdown-race into lp:launchpad.
Commit message:
Fix stop-loggerhead to do a two-stage kill.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/loggerhead-shutdown-race/+merge/352884
This avoids problems during deployments where stop-loggerhead exits before the old process has actually stopped.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/loggerhead-shutdown-race into lp:launchpad.
=== modified file 'lib/lp/services/osutils.py'
--- lib/lp/services/osutils.py 2018-06-06 12:46:56 +0000
+++ lib/lp/services/osutils.py 2018-08-10 10:18:30 +0000
@@ -110,13 +110,15 @@
raise
-def two_stage_kill(pid, poll_interval=0.1, num_polls=50):
+def two_stage_kill(pid, poll_interval=0.1, num_polls=50, get_status=True):
"""Kill process 'pid' with SIGTERM. If it doesn't die, SIGKILL it.
:param pid: The pid of the process to kill.
:param poll_interval: The polling interval used to check if the
process is still around.
:param num_polls: The number of polls to do before doing a SIGKILL.
+ :param get_status: If True, collect the process' exit status (which
+ requires it to be a child of the process running this function).
"""
# Kill the process.
_kill_may_race(pid, SIGTERM)
@@ -124,11 +126,16 @@
# Poll until the process has ended.
for i in range(num_polls):
try:
- # Reap the child process and get its return value. If it's not
- # gone yet, continue.
- new_pid, result = os.waitpid(pid, os.WNOHANG)
- if new_pid:
- return result
+ if get_status:
+ # Reap the child process and get its return value. If it's
+ # not gone yet, continue.
+ new_pid, result = os.waitpid(pid, os.WNOHANG)
+ if new_pid:
+ return result
+ else:
+ # If the process isn't gone yet, continue.
+ if not process_exists(pid):
+ return
time.sleep(poll_interval)
except OSError as e:
if e.errno in (errno.ESRCH, errno.ECHILD):
=== modified file 'scripts/stop-loggerhead.py'
--- scripts/stop-loggerhead.py 2018-06-06 12:46:56 +0000
+++ scripts/stop-loggerhead.py 2018-08-10 10:18:30 +0000
@@ -8,10 +8,12 @@
import _pythonpath
from optparse import OptionParser
-import os
-import signal
import sys
+from lp.services.osutils import (
+ process_exists,
+ two_stage_kill,
+ )
from lp.services.pidfile import get_pid
@@ -20,9 +22,11 @@
pid = get_pid("codebrowse")
-try:
- os.kill(pid, 0)
-except OSError as e:
+if pid is None:
+ # Already stopped.
+ sys.exit(0)
+
+if not process_exists(pid):
print('Stale pid file; server is not running.')
sys.exit(1)
@@ -30,4 +34,5 @@
print('Shutting down previous server @ pid %d.' % (pid,))
print()
-os.kill(pid, signal.SIGTERM)
+# A busy gunicorn can take a while to shut down.
+two_stage_kill(pid, poll_interval=0.5, num_polls=120, get_status=False)
Follow ups