← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~stub/launchpad/staging into lp:launchpad

 

Stuart Bishop has proposed merging lp:~stub/launchpad/staging into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~stub/launchpad/staging/+merge/68529

Updates to full-update.py to do integrate with pgbouncer.

full-update.py now disconnects all clients and locks them out for the duration, per low downtime deploy goals.
-- 
https://code.launchpad.net/~stub/launchpad/staging/+merge/68529
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~stub/launchpad/staging into lp:launchpad.
=== modified file 'database/schema/full-update.py'
--- database/schema/full-update.py	2011-04-26 13:56:48 +0000
+++ database/schema/full-update.py	2011-07-20 11:50:43 +0000
@@ -13,10 +13,15 @@
 
 from canonical.launchpad.scripts import (
     db_options,
+    logger,
     logger_options,
     )
 
 
+# NB. This script doesn't need to be invoked from sudo.
+PGBOUNCER_INITD = ['sudo', '/etc/init.d/pgbouncer']
+
+
 def run_script(script, *extra_args):
     script_path = os.path.join(os.path.dirname(__file__), script)
     return subprocess.call([script_path] + sys.argv[1:] + list(extra_args))
@@ -32,24 +37,92 @@
     if args:
         parser.error("Too many arguments")
 
-    preflight_rc = run_script('preflight.py')
+    log = logger(options)
+
+    #
+    # Preflight checks. Confirm as best we can that the upgrade will
+    # work unattended.
+    #
+
+    # We initially ignore open connections, as they will shortly be
+    # killed.
+    preflight_rc = run_script('preflight.py', '--skip-connection-check')
     if preflight_rc != 0:
         return preflight_rc
 
-    upgrade_rc = run_script('upgrade.py')
-    if upgrade_rc != 0:
-        return upgrade_rc
-
-    fti_rc = run_script('fti.py')
-    if fti_rc != 0:
-        return fti_rc
-
-    security_rc = run_script('security.py', '--cluster')
-    if security_rc != 0:
-        return security_rc
-
-    preflight_rc = run_script('preflight.py')
-    return preflight_rc
+    # Confirm we can invoke PGBOUNCER_INITD
+    pgbouncer_status_cmd = PGBOUNCER_INITD + ['status']
+    pgbouncer_rc = subprocess.call(pgbouncer_status_cmd)
+    sys.stdout.flush()
+    if pgbouncer_rc != 0:
+        log.fatal("Unable to invoke %s", ' '.join(pgbouncer_status_cmd))
+        return pgbouncer_rc
+
+    #
+    # Start the actual upgrade. Failures beyond this point need to
+    # generate informative messages to help with recovery.
+    #
+
+    # status flags
+    pgbouncer_down = False
+    upgrade_run = False
+    fti_run = False
+    security_run = False
+
+    try:
+        # Shutdown pgbouncer
+        pgbouncer_rc = subprocess.call(PGBOUNCER_INITD + ['stop'])
+        sys.stdout.flush()
+        if pgbouncer_rc != 0:
+            log.fatal("pgbouncer not shut down [%s]", pgbouncer_rc)
+            return pgbouncer_rc
+        pgbouncer_down = True
+
+        preflight_rc = run_script('preflight.py', '--kill-connections')
+        if preflight_rc != 0:
+            return preflight_rc
+
+        upgrade_rc = run_script('upgrade.py')
+        if upgrade_rc != 0:
+            log.warning("upgrade.py run may have been partial")
+            return upgrade_rc
+        upgrade_run = True
+
+        fti_rc = run_script('fti.py')
+        if fti_rc != 0:
+            return fti_rc
+        fti_run = True
+
+        security_rc = run_script('security.py', '--cluster')
+        if security_rc != 0:
+            return security_rc
+        security_run = True
+
+        log.info("All database upgrade steps completed")
+
+        pgbouncer_rc = subprocess.call(PGBOUNCER_INITD + ['start'])
+        sys.stdout.flush()
+        if pgbouncer_rc != 0:
+            log.fatal("pgbouncer not restarted [%s]", pgbouncer_rc)
+            return pgbouncer_rc
+        pgbouncer_down = False
+
+        preflight_rc = run_script('preflight.py')
+        if preflight_rc != 0:
+            return preflight_rc
+
+        log.info("All good. All done.")
+        return 0
+
+    finally:
+        if pgbouncer_down:
+            log.warning("pgbouncer is down and will need to be restarted")
+        if not upgrade_run:
+            log.warning("upgrade.py still needs to be run")
+        if not fti_run:
+            log.warning("fti.py still needs to be run")
+        if not security_run:
+            log.warning("security.py still needs to be run")
 
 
 if __name__ == '__main__':

=== modified file 'database/schema/preflight.py'
--- database/schema/preflight.py	2011-05-10 11:20:06 +0000
+++ database/schema/preflight.py	2011-07-20 11:50:43 +0000
@@ -15,6 +15,7 @@
 from canonical.database.sqlbase import (
     connect,
     ISOLATION_LEVEL_AUTOCOMMIT,
+    sqlvalues,
     )
 from canonical.launchpad.scripts import (
     db_options,
@@ -42,27 +43,28 @@
             for node in self.nodes:
                 node.con = psycopg2.connect(node.connection_string)
                 node.con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
+
+            # Create a list of nodes subscribed to the replicated sets we
+            # are modifying.
+            cur = master_con.cursor()
+            cur.execute("""
+                WITH subscriptions AS (
+                    SELECT *
+                    FROM _sl.sl_subscribe
+                    WHERE sub_set = 1 AND sub_active IS TRUE)
+                SELECT sub_provider FROM subscriptions
+                UNION
+                SELECT sub_receiver FROM subscriptions
+                """)
+            lpmain_node_ids = set(row[0] for row in cur.fetchall())
+            self.lpmain_nodes = set(
+                node for node in self.nodes
+                if node.node_id in lpmain_node_ids)
         else:
             node = replication.helpers.Node(None, None, None, True)
             node.con = master_con
             self.nodes = set([node])
-
-        # Create a list of nodes subscribed to the replicated sets we
-        # are modifying.
-        cur = master_con.cursor()
-        cur.execute("""
-            WITH subscriptions AS (
-                SELECT *
-                FROM _sl.sl_subscribe
-                WHERE sub_set = 1 AND sub_active IS TRUE)
-            SELECT sub_provider FROM subscriptions
-            UNION
-            SELECT sub_receiver FROM subscriptions
-            """)
-        lpmain_node_ids = set(row[0] for row in cur.fetchall())
-        self.lpmain_nodes = set(
-            node for node in self.nodes
-            if node.node_id in lpmain_node_ids)
+            self.lpmain_nodes = self.nodes
 
     def check_is_superuser(self):
         """Return True if all the node connections are as superusers."""
@@ -217,20 +219,68 @@
         return success
 
 
+class NoConnectionCheckPreflight(DatabasePreflight):
+    def check_open_connections(self):
+        return True
+
+
+class KillConnectionsPreflight(DatabasePreflight):
+    def check_open_connections(self):
+        """Kill all non-system connections to Launchpad databases.
+
+        We only check on subscribed nodes, as there will be active systems
+        connected to other nodes in the replication cluster (such as the
+        SSO servers).
+
+        System users are defined by SYSTEM_USERS.
+        """
+        for node in self.lpmain_nodes:
+            cur = node.con.cursor()
+            cur.execute("""
+                SELECT procpid, datname, usename, pg_terminate_backend(procpid)
+                FROM pg_stat_activity
+                WHERE
+                    datname=current_database()
+                    AND procpid <> pg_backend_pid()
+                    AND usename NOT IN %s
+                """ % sqlvalues(SYSTEM_USERS))
+            for procpid, datname, usename, ignored in cur.fetchall():
+                self.log.warning(
+                    "Killed %s [%s] on %s", usename, procpid, datname)
+        return True
+
+
 def main():
     parser = OptionParser()
     db_options(parser)
     logger_options(parser)
+    parser.add_option(
+        "--skip-connection-check", dest='skip_connection_check',
+        default=False, action="store_true",
+        help="Don't check open connections.")
+    parser.add_option(
+        "--kill-connections", dest='kill_connections',
+        default=False, action="store_true",
+        help="Kill non-system connections instead of reporting an error.")
     (options, args) = parser.parse_args()
     if args:
         parser.error("Too many arguments")
 
+    if options.kill_connections and options.skip_connection_check:
+        parser.error(
+            "--skip-connection-check conflicts with --kill-connections")
+
     log = logger(options)
 
     master_con = connect(lp.dbuser)
     master_con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
 
-    preflight_check = DatabasePreflight(log, master_con)
+    if options.kill_connections:
+        preflight_check = KillConnectionsPreflight(log, master_con)
+    elif options.skip_connection_check:
+        preflight_check = NoConnectionCheckPreflight(log, master_con)
+    else:
+        preflight_check = DatabasePreflight(log, master_con)
 
     if preflight_check.check_all():
         log.info('Preflight check succeeded. Good to go.')

=== modified file 'lib/canonical/database/sqlbase.py'
--- lib/canonical/database/sqlbase.py	2011-06-29 08:48:31 +0000
+++ lib/canonical/database/sqlbase.py	2011-07-20 11:50:43 +0000
@@ -535,12 +535,15 @@
 
     >>> quote(set([1,2,3]))
     '(1, 2, 3)'
+
+    >>> quote(frozenset([1,2,3]))
+    '(1, 2, 3)'
     """
     if isinstance(x, datetime):
         return "'%s'" % x
     elif ISQLBase(x, None) is not None:
         return str(x.id)
-    elif isinstance(x, set):
+    elif isinstance(x, (set, frozenset)):
         # SQLObject can't cope with sets, so convert to a list, which it
         # /does/ know how to handle.
         x = list(x)


Follow ups