launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #05172
[Merge] lp:~stub/launchpad/trivial into lp:launchpad
Stuart Bishop has proposed merging lp:~stub/launchpad/trivial into lp:launchpad.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #851686 in Launchpad itself: "buildd-manager needs to connect as a unique database user"
https://bugs.launchpad.net/launchpad/+bug/851686
For more details, see:
https://code.launchpad.net/~stub/launchpad/trivial/+merge/78206
= Summary =
If database backends are slow to shutdown, pgmassacre kills them in a particularly evil way causing the cluster to go into recovery mode. This is fine for dev environments, but this script is also being used on staging and occasionally production.
== Proposed fix ==
Use pg_terminate_backend() instead, which didn't exist when this script was first written.
== Pre-implementation notes ==
== Implementation details ==
== Tests ==
== Demo and Q/A ==
= Launchpad lint =
Checking for conflicts and issues in changed files.
Linting changed files:
utilities/pgmassacre.py
--
https://code.launchpad.net/~stub/launchpad/trivial/+merge/78206
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~stub/launchpad/trivial into lp:launchpad.
=== modified file 'utilities/pgmassacre.py'
--- utilities/pgmassacre.py 2010-04-27 19:48:39 +0000
+++ utilities/pgmassacre.py 2011-10-05 07:07:31 +0000
@@ -1,22 +1,22 @@
#!/usr/bin/python
#
-# Copyright 2009 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
+# This file is mirrored into lp:losa-db-scripts, so please keep that
+# version in sync with the master in the Launchpad tree.
+
"""
dropdb only more so.
-Cut off access, slaughter connections and burn the database to the ground.
+Cut off access, slaughter connections and burn the database to the ground
+(but do nothing that could put the system into recovery mode).
"""
-# Nothing but system installed libraries - this script sometimes
-# gets installed standalone with no Launchpad tree available.
-from distutils.version import LooseVersion
import sys
import time
import psycopg2
import psycopg2.extensions
-from signal import SIGTERM, SIGQUIT, SIGKILL, SIGINT
from optparse import OptionParser
@@ -28,44 +28,6 @@
return psycopg2.connect("dbname=%s" % dbname)
-def send_signal(database, signal):
- con = connect()
- con.set_isolation_level(1) # READ COMMITTED. We rollback changes we make.
- cur = con.cursor()
-
- # Install PL/PythonU if it isn't already.
- cur.execute("SELECT TRUE FROM pg_language WHERE lanname = 'plpythonu'")
- if cur.fetchone() is None:
- cur.execute('CREATE LANGUAGE "plpythonu"')
-
- # Create a stored procedure to kill a backend process.
- qdatabase = str(psycopg2.extensions.QuotedString(database))
- cur.execute("""
- CREATE OR REPLACE FUNCTION _pgmassacre_killall(integer)
- RETURNS Boolean AS $$
- import os
-
- signal = args[0]
- for row in plpy.execute('''
- SELECT procpid FROM pg_stat_activity WHERE datname=%(qdatabase)s
- AND procpid != pg_backend_pid()
- '''):
- try:
- os.kill(row['procpid'], signal)
- except OSError:
- pass
- else:
- return False
-
- return True
- $$ LANGUAGE plpythonu
- """ % vars())
-
- cur.execute("SELECT _pgmassacre_killall(%(signal)s)", vars())
- con.rollback()
- con.close()
-
-
def rollback_prepared_transactions(database):
"""Rollback any prepared transactions.
@@ -73,7 +35,7 @@
transactions.
"""
con = connect(database)
- con.set_isolation_level(0) # Autocommit so we can ROLLBACK PREPARED.
+ con.set_isolation_level(0) # Autocommit so we can ROLLBACK PREPARED.
cur = con.cursor()
# Get a list of outstanding prepared transactions.
@@ -86,15 +48,18 @@
con.close()
-def still_open(database, max_wait=10):
+def still_open(database, max_wait=120):
"""Return True if there are still open connections, apart from our own.
- Waits a while to ensure that connections shutting down have a chance to.
+ Waits a while to ensure that connections shutting down have a chance
+ to. This might take a while if there is a big transaction to
+ rollback.
"""
con = connect()
- con.set_isolation_level(0) # Autocommit.
+ con.set_isolation_level(0) # Autocommit.
cur = con.cursor()
- # Wait for up to 10 seconds, returning True if all backends are gone.
+ # Keep checking until the timeout is reached, returning True if all
+ # of the backends are gone.
start = time.time()
while time.time() < start + max_wait:
cur.execute("""
@@ -106,14 +71,14 @@
""", vars())
if cur.fetchone() is None:
return False
- time.sleep(0.6) # Stats only updated every 500ms.
+ time.sleep(0.6) # Stats only updated every 500ms.
con.close()
return True
def massacre(database):
con = connect()
- con.set_isolation_level(0) # Autocommit
+ con.set_isolation_level(0) # Autocommit
cur = con.cursor()
# Allow connections to the doomed database if something turned this off,
@@ -131,23 +96,24 @@
"UPDATE pg_database SET datallowconn=FALSE WHERE datname=%s",
[database])
+ # New connections are disabled, but pg_stat_activity is only
+ # updated every 500ms. Ensure that pg_stat_activity has
+ # been refreshed to catch any connections that opened
+ # immediately before setting datallowconn.
+ time.sleep(1)
+
+ # Terminate open connections.
+ cur.execute("""
+ SELECT procpid, pg_terminate_backend(procpid)
+ FROM pg_stat_activity
+ WHERE datname=%s AND procpid <> pg_backend_pid()
+ """, [database])
+ for procpid, success in cur.fetchall():
+ if not success:
+ print >> sys.stderr, (
+ "pg_terminate_backend(%s) failed" % procpid)
con.close()
- # Terminate current statements.
- send_signal(database, SIGINT)
-
- # Shutdown current connections normally.
- if still_open(database, 1):
- send_signal(database, SIGTERM)
-
- # Shutdown current connections immediately.
- if still_open(database):
- send_signal(database, SIGQUIT)
-
- # Shutdown current connections nastily.
- if still_open(database):
- send_signal(database, SIGKILL)
-
if still_open(database):
print >> sys.stderr, (
"Unable to kill all backends! Database not destroyed.")
@@ -158,7 +124,7 @@
# AUTOCOMMIT required to execute commands like DROP DATABASE.
con.set_isolation_level(0)
cur = con.cursor()
- cur.execute("DROP DATABASE %s" % database) # Not quoted.
+ cur.execute("DROP DATABASE %s" % database) # Not quoted.
con.close()
return 0
finally:
@@ -184,7 +150,7 @@
now = start
error_msg = None
con = connect()
- con.set_isolation_level(0) # Autocommit required for CREATE DATABASE.
+ con.set_isolation_level(0) # Autocommit required for CREATE DATABASE.
create_db_cmd = """
CREATE DATABASE %s WITH ENCODING='UTF8' TEMPLATE=%s
""" % (database, template)
@@ -193,7 +159,7 @@
# We make use of this feature so we don't have to care what locale
# was used to create the database cluster rather than requiring it
# to be rebuilt in the C locale.
- if pg_version >= LooseVersion("8.4.0") and template == "template0":
+ if template == "template0":
create_db_cmd += "LC_COLLATE='C' LC_CTYPE='C'"
while now < start + 20:
cur = con.cursor()
@@ -203,7 +169,7 @@
return 0
except psycopg2.Error, exception:
error_msg = str(exception)
- time.sleep(0.6) # Stats only updated every 500ms.
+ time.sleep(0.6) # Stats only updated every 500ms.
now = time.time()
con.close()
@@ -228,7 +194,6 @@
options = None
-pg_version = None # LooseVersion - Initialized in main()
def main():
@@ -236,7 +201,8 @@
parser.add_option("-U", "--user", dest="user", default=None,
help="Connect as USER", metavar="USER")
parser.add_option("-t", "--template", dest="template", default=None,
- help="Recreate database using DBNAME as a template database.",
+ help="Recreate database using DBNAME as a template database."
+ " If template0, database will be created in the C locale.",
metavar="DBNAME")
global options
(options, args) = parser.parse_args()
@@ -254,11 +220,6 @@
con = connect()
cur = con.cursor()
- # Store the database version for version specific code.
- global pg_version
- cur.execute("show server_version")
- pg_version = LooseVersion(cur.fetchone()[0])
-
# Ensure the template database exists.
if options.template is not None:
cur.execute(