← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~stub/launchpad/trivial into lp:launchpad

 

Stuart Bishop has proposed merging lp:~stub/launchpad/trivial into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #851686 in Launchpad itself: "buildd-manager needs to connect as a unique database user"
  https://bugs.launchpad.net/launchpad/+bug/851686

For more details, see:
https://code.launchpad.net/~stub/launchpad/trivial/+merge/78206

= Summary =

If database backends are slow to shutdown, pgmassacre kills them in a particularly evil way causing the cluster to go into recovery mode. This is fine for dev environments, but this script is also being used on staging and occasionally production.

== Proposed fix ==

Use pg_terminate_backend() instead, which didn't exist when this script was first written.

== Pre-implementation notes ==

== Implementation details ==

== Tests ==

== Demo and Q/A ==


= Launchpad lint =

Checking for conflicts and issues in changed files.

Linting changed files:
  utilities/pgmassacre.py
-- 
https://code.launchpad.net/~stub/launchpad/trivial/+merge/78206
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~stub/launchpad/trivial into lp:launchpad.
=== modified file 'utilities/pgmassacre.py'
--- utilities/pgmassacre.py	2010-04-27 19:48:39 +0000
+++ utilities/pgmassacre.py	2011-10-05 07:07:31 +0000
@@ -1,22 +1,22 @@
 #!/usr/bin/python
 #
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
+# This file is mirrored into lp:losa-db-scripts, so please keep that
+# version in sync with the master in the Launchpad tree.
+
 """
 dropdb only more so.
 
-Cut off access, slaughter connections and burn the database to the ground.
+Cut off access, slaughter connections and burn the database to the ground
+(but do nothing that could put the system into recovery mode).
 """
 
-# Nothing but system installed libraries - this script sometimes
-# gets installed standalone with no Launchpad tree available.
-from distutils.version import LooseVersion
 import sys
 import time
 import psycopg2
 import psycopg2.extensions
-from signal import SIGTERM, SIGQUIT, SIGKILL, SIGINT
 from optparse import OptionParser
 
 
@@ -28,44 +28,6 @@
         return psycopg2.connect("dbname=%s" % dbname)
 
 
-def send_signal(database, signal):
-    con = connect()
-    con.set_isolation_level(1) # READ COMMITTED. We rollback changes we make.
-    cur = con.cursor()
-
-    # Install PL/PythonU if it isn't already.
-    cur.execute("SELECT TRUE FROM pg_language WHERE lanname = 'plpythonu'")
-    if cur.fetchone() is None:
-        cur.execute('CREATE LANGUAGE "plpythonu"')
-
-    # Create a stored procedure to kill a backend process.
-    qdatabase = str(psycopg2.extensions.QuotedString(database))
-    cur.execute("""
-        CREATE OR REPLACE FUNCTION _pgmassacre_killall(integer)
-        RETURNS Boolean AS $$
-        import os
-
-        signal = args[0]
-        for row in plpy.execute('''
-            SELECT procpid FROM pg_stat_activity WHERE datname=%(qdatabase)s
-                AND procpid != pg_backend_pid()
-            '''):
-            try:
-                os.kill(row['procpid'], signal)
-            except OSError:
-                pass
-        else:
-            return False
-
-        return True
-        $$ LANGUAGE plpythonu
-        """ % vars())
-
-    cur.execute("SELECT _pgmassacre_killall(%(signal)s)", vars())
-    con.rollback()
-    con.close()
-
-
 def rollback_prepared_transactions(database):
     """Rollback any prepared transactions.
 
@@ -73,7 +35,7 @@
     transactions.
     """
     con = connect(database)
-    con.set_isolation_level(0) # Autocommit so we can ROLLBACK PREPARED.
+    con.set_isolation_level(0)  # Autocommit so we can ROLLBACK PREPARED.
     cur = con.cursor()
 
     # Get a list of outstanding prepared transactions.
@@ -86,15 +48,18 @@
     con.close()
 
 
-def still_open(database, max_wait=10):
+def still_open(database, max_wait=120):
     """Return True if there are still open connections, apart from our own.
 
-    Waits a while to ensure that connections shutting down have a chance to.
+    Waits a while to ensure that connections shutting down have a chance
+    to. This might take a while if there is a big transaction to
+    rollback.
     """
     con = connect()
-    con.set_isolation_level(0) # Autocommit.
+    con.set_isolation_level(0)  # Autocommit.
     cur = con.cursor()
-    # Wait for up to 10 seconds, returning True if all backends are gone.
+    # Keep checking until the timeout is reached, returning True if all
+    # of the backends are gone.
     start = time.time()
     while time.time() < start + max_wait:
         cur.execute("""
@@ -106,14 +71,14 @@
             """, vars())
         if cur.fetchone() is None:
             return False
-        time.sleep(0.6) # Stats only updated every 500ms.
+        time.sleep(0.6)  # Stats only updated every 500ms.
     con.close()
     return True
 
 
 def massacre(database):
     con = connect()
-    con.set_isolation_level(0) # Autocommit
+    con.set_isolation_level(0)  # Autocommit
     cur = con.cursor()
 
     # Allow connections to the doomed database if something turned this off,
@@ -131,23 +96,24 @@
             "UPDATE pg_database SET datallowconn=FALSE WHERE datname=%s",
             [database])
 
+        # New connections are disabled, but pg_stat_activity is only
+        # updated every 500ms. Ensure that pg_stat_activity has
+        # been refreshed to catch any connections that opened
+        # immediately before setting datallowconn.
+        time.sleep(1)
+
+        # Terminate open connections.
+        cur.execute("""
+            SELECT procpid, pg_terminate_backend(procpid)
+            FROM pg_stat_activity
+            WHERE datname=%s AND procpid <> pg_backend_pid()
+            """, [database])
+        for procpid, success in cur.fetchall():
+            if not success:
+                print >> sys.stderr, (
+                    "pg_terminate_backend(%s) failed" % procpid)
         con.close()
 
-        # Terminate current statements.
-        send_signal(database, SIGINT)
-
-        # Shutdown current connections normally.
-        if still_open(database, 1):
-            send_signal(database, SIGTERM)
-
-        # Shutdown current connections immediately.
-        if still_open(database):
-            send_signal(database, SIGQUIT)
-
-        # Shutdown current connections nastily.
-        if still_open(database):
-            send_signal(database, SIGKILL)
-
         if still_open(database):
             print >> sys.stderr, (
                     "Unable to kill all backends! Database not destroyed.")
@@ -158,7 +124,7 @@
         # AUTOCOMMIT required to execute commands like DROP DATABASE.
         con.set_isolation_level(0)
         cur = con.cursor()
-        cur.execute("DROP DATABASE %s" % database) # Not quoted.
+        cur.execute("DROP DATABASE %s" % database)  # Not quoted.
         con.close()
         return 0
     finally:
@@ -184,7 +150,7 @@
     now = start
     error_msg = None
     con = connect()
-    con.set_isolation_level(0) # Autocommit required for CREATE DATABASE.
+    con.set_isolation_level(0)  # Autocommit required for CREATE DATABASE.
     create_db_cmd = """
         CREATE DATABASE %s WITH ENCODING='UTF8' TEMPLATE=%s
         """ % (database, template)
@@ -193,7 +159,7 @@
     # We make use of this feature so we don't have to care what locale
     # was used to create the database cluster rather than requiring it
     # to be rebuilt in the C locale.
-    if pg_version >= LooseVersion("8.4.0") and template == "template0":
+    if template == "template0":
         create_db_cmd += "LC_COLLATE='C' LC_CTYPE='C'"
     while now < start + 20:
         cur = con.cursor()
@@ -203,7 +169,7 @@
             return 0
         except psycopg2.Error, exception:
             error_msg = str(exception)
-        time.sleep(0.6) # Stats only updated every 500ms.
+        time.sleep(0.6)  # Stats only updated every 500ms.
         now = time.time()
     con.close()
 
@@ -228,7 +194,6 @@
 
 
 options = None
-pg_version = None # LooseVersion - Initialized in main()
 
 
 def main():
@@ -236,7 +201,8 @@
     parser.add_option("-U", "--user", dest="user", default=None,
         help="Connect as USER", metavar="USER")
     parser.add_option("-t", "--template", dest="template", default=None,
-        help="Recreate database using DBNAME as a template database.",
+        help="Recreate database using DBNAME as a template database."
+            " If template0, database will be created in the C locale.",
         metavar="DBNAME")
     global options
     (options, args) = parser.parse_args()
@@ -254,11 +220,6 @@
     con = connect()
     cur = con.cursor()
 
-    # Store the database version for version specific code.
-    global pg_version
-    cur.execute("show server_version")
-    pg_version = LooseVersion(cur.fetchone()[0])
-
     # Ensure the template database exists.
     if options.template is not None:
         cur.execute(