launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #16342
[Merge] lp:~wgrant/launchpad/bug-966837 into lp:launchpad
William Grant has proposed merging lp:~wgrant/launchpad/bug-966837 into lp:launchpad.
Commit message:
Ignore stale PID files in make_pidfile, so unclean shutdowns don't block subsequent startups.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #966837 in Launchpad itself: "stale pid's on app servers are breaking NDT autodeploys"
https://bugs.launchpad.net/launchpad/+bug/966837
For more details, see:
https://code.launchpad.net/~wgrant/launchpad/bug-966837/+merge/201136
lp.services.pidfile.make_pidfile fails whenever the PID file exists, irrespective of whether the owning PID remains alive. This branch fixes the function to remove the file if the referenced PID can't be found, which usually comes up when an appserver spectacularly dies or after an unclean system shutdown.
--
https://code.launchpad.net/~wgrant/launchpad/bug-966837/+merge/201136
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/bug-966837 into lp:launchpad.
=== modified file 'lib/lp/services/doc/pidfile.txt'
--- lib/lp/services/doc/pidfile.txt 2012-06-29 08:40:05 +0000
+++ lib/lp/services/doc/pidfile.txt 2014-01-10 04:34:46 +0000
@@ -85,22 +85,21 @@
... except KeyboardInterrupt:
... pass'''
>>> def start_process():
- ... cmd = '%s -c "%s"' % (sys.executable, subprocess_code)
- ... subprocess.Popen(cmd, shell=True)
- ... for i in range(100):
+ ... real_pid = subprocess.Popen(
+ ... [sys.executable, '-c', subprocess_code]).pid
+ ... for i in range(10):
... if os.path.exists(pidfile_path('nuts')):
- ... return int(open(pidfile_path('nuts')).read())
+ ... if real_pid == int(open(pidfile_path('nuts')).read()):
+ ... return real_pid
... time.sleep(0.1)
... else:
... print 'Error: pid file was not created'
...
>>> def stop(pid, sig):
... os.kill(pid, sig)
- ... for i in range(300):
- ... if not os.path.exists(pidfile_path('nuts')):
- ... print 'Stopped successfully'
- ... break
- ... time.sleep(0.1)
+ ... os.waitpid(pid, 0)
+ ... if not os.path.exists(pidfile_path('nuts')):
+ ... print 'Stopped successfully'
... else:
... try:
... # Is it still here at all?
@@ -108,7 +107,8 @@
... except OSError as e:
... if e.errno == errno.ESRCH:
... print 'Error: pid file was not removed'
- ... else: raise
+ ... else:
+ ... raise
... else:
... print 'Error: process did not exit'
...
@@ -146,6 +146,28 @@
>>> stop(pid, signal.SIGTERM)
Stopped successfully
+make_pidfile also handles stale PID files, where the owning process
+terminated without removing the file, by removing the old file and
+continuing as normal.
+
+ >>> stale_pid = start_process()
+ >>> make_pidfile('nuts')
+ Traceback (most recent call last):
+ ...
+ RuntimeError: PID file /var/tmp/...nuts.pid already exists.
+ Already running?
+ >>> stop(stale_pid, signal.SIGKILL)
+ Error: pid file was not removed
+ >>> new_pid = start_process()
+ >>> new_pid == stale_pid
+ False
+ >>> new_pid == get_pid('nuts')
+ True
+ >>> stop(new_pid, signal.SIGTERM)
+ Stopped successfully
+ >>> print get_pid('nuts')
+ None
+
`get_pid`
---------
=== modified file 'lib/lp/services/pidfile.py'
--- lib/lp/services/pidfile.py 2012-01-01 02:58:52 +0000
+++ lib/lp/services/pidfile.py 2014-01-10 04:34:46 +0000
@@ -4,6 +4,7 @@
__metaclass__ = type
import atexit
+import errno
import os
from signal import (
signal,
@@ -35,18 +36,19 @@
inside it.
"""
pidfile = pidfile_path(service_name)
- if os.path.exists(pidfile):
- raise RuntimeError("PID file %s already exists. Already running?" %
- pidfile)
+ if is_locked(service_name):
+ raise RuntimeError(
+ "PID file %s already exists. Already running?" % pidfile)
atexit.register(remove_pidfile, service_name)
+
def remove_pidfile_handler(*ignored):
sys.exit(-1 * SIGTERM)
signal(SIGTERM, remove_pidfile_handler)
fd, tempname = tempfile.mkstemp(dir=os.path.dirname(pidfile))
outf = os.fdopen(fd, 'w')
- outf.write(str(os.getpid())+'\n')
+ outf.write(str(os.getpid()) + '\n')
outf.flush()
outf.close()
os.rename(tempname, pidfile)
@@ -85,3 +87,31 @@
return None
except ValueError:
raise ValueError("Invalid PID %s" % repr(pid))
+
+
+def is_locked(service_name, use_config=None):
+ """Check if a PID file is locked.
+
+ Will remove an existing PID file if the owning process no longer exists.
+ """
+ pid = get_pid(service_name, use_config)
+ if pid is None:
+ return False
+
+ # There's PID file with a PID in it. But if the PID no longer exists
+ # we should remove the stale file to unlock things.
+ # This is slightly racy, as another process could conceivably be
+ # right here at the same time. But that's sufficiently unlikely, and
+ # stale PIDs are sufficiently annoying, that it's a reasonable
+ # tradeoff.
+ try:
+ os.kill(pid, 0)
+ except OSError as e:
+ if e.errno == errno.ESRCH:
+ remove_pidfile(service_name, use_config)
+ return False
+ raise
+
+ # There's a PID file and we couldn't definitively say that the
+ # process no longer exists. Assume that we're locked.
+ return True
Follow ups