launchpad-reviewers team mailing list archive

Thread
Date

[Merge] lp:~stub/launchpad/bug-778338-transient-schema into lp:launchpad

To: mp+60512@xxxxxxxxxxxxxxxxxx
From: Stuart Bishop <stuart.bishop@xxxxxxxxxxxxx>
Date: Tue, 10 May 2011 14:21:04 -0000
Reply-to: mp+60512@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Stuart Bishop has proposed merging lp:~stub/launchpad/bug-778338-transient-schema into lp:launchpad with lp:~stub/launchpad/bug-179821-lowercase-tablenames as a prerequisite.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #778338 in Launchpad itself: "restartable multitablecopier interacts badly with replication and upgrade operations"
  https://bugs.launchpad.net/launchpad/+bug/778338

For more details, see:
https://code.launchpad.net/~stub/launchpad/bug-778338-transient-schema/+merge/60512

Multitable should create transient tables in a dedicated schema so they get ignored by the replication system.
-- 
https://code.launchpad.net/~stub/launchpad/bug-778338-transient-schema/+merge/60512
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~stub/launchpad/bug-778338-transient-schema into lp:launchpad.

=== modified file 'database/schema/patch-2208-66-0.sql'
--- database/schema/patch-2208-66-0.sql	2011-05-10 14:18:55 +0000
+++ database/schema/patch-2208-66-0.sql	2011-05-10 14:18:57 +0000
@@ -1,6 +1,14 @@
 SET client_min_messages=ERROR;
 
--- Rename all temp_ names currently in existance to lower(name)
+-- Create a schema for persistent but temporary objects.
+-- This would normally be called temp, but that is a reserved word.
+-- We use this to create temporary tables that need to persist
+-- across database restarts.
+CREATE SCHEMA transient;
+GRANT CREATE, USAGE ON SCHEMA transient TO PUBLIC;
+
+-- Rename all temp_ names currently in existance to
+-- transient.name.
 CREATE OR REPLACE FUNCTION _migrate() RETURNS VOID LANGUAGE plpythonu AS
 $$
     rows = plpy.execute(r"""
@@ -14,13 +22,13 @@
     for row in rows:
         oldname = row['oldname']
         assert oldname.startswith('temp_'), oldname
-        newname = oldname.lower()
+        newname = oldname[5:].lower()
         plpy.execute('ALTER TABLE "%s" RENAME TO "%s"' % (oldname, newname))
+        plpy.execute('ALTER TABLE "%s" SET SCHEMA transient' % newname)
 $$;
 
 SELECT _migrate();
 
 DROP FUNCTION _migrate();
 
-
 INSERT INTO LaunchpadDatabaseRevision VALUES (2208, 66, 0);

=== modified file 'lib/canonical/database/ftests/test_multitablecopy.txt'
--- lib/canonical/database/ftests/test_multitablecopy.txt	2010-10-18 22:24:59 +0000
+++ lib/canonical/database/ftests/test_multitablecopy.txt	2011-05-10 14:18:57 +0000
@@ -99,9 +99,11 @@
 
 We have the data we're copying in holding tables now.
 
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+    >>> schema, name = copier.getRawHoldingTableName('numeric')
+    >>> postgresql.have_table(cur, name, schema)
     True
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+    >>> schema, name = copier.getRawHoldingTableName('numeric')
+    >>> postgresql.have_table(cur, name, schema)
     True
 
 
@@ -133,9 +135,11 @@
 
 And the holding tables are gone.
 
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+    >>> schema, name = copier.getRawHoldingTableName('numeric')
+    >>> postgresql.have_table(cur, name, schema)
     False
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+    >>> schema, name = copier.getRawHoldingTableName('textual')
+    >>> postgresql.have_table(cur, name, schema)
     False
 
 
@@ -268,15 +272,19 @@
     >>> copier.needsRecovery()
     False
 
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+    >>> schema, name = copier.getRawHoldingTableName('textual')
+    >>> postgresql.have_table(cur, name, schema)
     True
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+    >>> schema, name = copier.getRawHoldingTableName('numeric')
+    >>> postgresql.have_table(cur, name, schema)
     True
 
     >>> copier.dropHoldingTables()
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+    >>> schema, name = copier.getRawHoldingTableName('textual')
+    >>> postgresql.have_table(cur, name, schema)
     False
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+    >>> schema, name = copier.getRawHoldingTableName('numeric')
+    >>> postgresql.have_table(cur, name, schema)
     False
 
     >>> cur.execute("SELECT t, count(*) FROM textual GROUP BY t ORDER BY t")
@@ -319,9 +327,11 @@
     >>> transaction.begin()
     <transaction...
     >>> cur = cursor()
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+    >>> schema, name = copier.getRawHoldingTableName('textual')
+    >>> postgresql.have_table(cur, name, schema)
     False
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+    >>> schema, name = copier.getRawHoldingTableName('numeric')
+    >>> postgresql.have_table(cur, name, schema)
     True
 
 Our textual data has been copied, so the textual table now lists each of its
@@ -358,9 +368,11 @@
 This time we run to completion without problems.
 
     >>> cur = cursor()
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+    >>> schema, name = copier.getRawHoldingTableName('textual')
+    >>> postgresql.have_table(cur, name, schema)
     False
-    >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+    >>> schema, name = copier.getRawHoldingTableName('numeric')
+    >>> postgresql.have_table(cur, name, schema)
     False
 
     >>> cur.execute("""
@@ -626,10 +638,10 @@
 
     >>> copier.pour(transaction)
     Pouring textual
-    Pouring text from "temp_textual_holding_test" to textual
+    Pouring text from "transient"."textual_holding_test" to textual
     ...
     Pouring numeric
-    Pouring numbers from "temp_numeric_holding_test" to numeric
+    Pouring numbers from "transient"."numeric_holding_test" to numeric
     ...
 
 

=== modified file 'lib/canonical/database/multitablecopy.py'
--- lib/canonical/database/multitablecopy.py	2011-05-10 14:18:55 +0000
+++ lib/canonical/database/multitablecopy.py	2011-05-10 14:18:57 +0000
@@ -6,6 +6,7 @@
 __all__ = [ 'MultiTableCopy' ]
 
 import logging
+import re
 import time
 
 from zope.interface import implements
@@ -245,28 +246,31 @@
 
     def dropHoldingTables(self):
         """Drop any holding tables that may exist for this MultiTableCopy."""
-        holding_tables = [self.getHoldingTableName(table)
-                          for table in self.tables]
-        postgresql.drop_tables(cursor(), holding_tables)
+        for table in self.tables:
+            schema, raw_table = self.getRawHoldingTableName(table)
+            postgresql.drop_tables(cursor(), raw_table, schema)
 
     def getRawHoldingTableName(self, tablename, suffix=''):
-        """Name for a holding table, but without quotes.  Use with care."""
+        """(schema, name) for a holding table, but without quotes."""
         if suffix:
             suffix = '_%s' % suffix
 
-        raw_name = "temp_%s_holding_%s%s" % (
+        raw_name = "%s_holding_%s%s" % (
             str(tablename), self.name, str(suffix))
 
         # Only lowercase names per Bug #179821.
-        return raw_name.lower()
+        raw_name = raw_name.lower()
+
+        # In a separate schema for transient tables per Bug #778338.
+        return ("transient", raw_name)
 
     def getHoldingTableName(self, tablename, suffix=''):
         """Name for a holding table to hold data being copied in tablename.
 
         Return value is properly quoted for use as an SQL identifier.
         """
-        raw_name = self.getRawHoldingTableName(tablename, suffix)
-        return quoteIdentifier(raw_name)
+        schema, tablename = self.getRawHoldingTableName(tablename, suffix)
+        return "%s.%s" % (quoteIdentifier(schema), quoteIdentifier(tablename))
 
     def _pointsToTable(self, source_table, foreign_key):
         """Name of table that source_table.foreign_key refers to.
@@ -481,14 +485,16 @@
         """Index id column on holding table.
 
         Creates a unique index on "id" column in holding table.  The index
-        gets the name of the holding table with "_id" appended.
+        gets the name of the holding table with "temp_" prepended and
+        "_id" appended.
         """
         source_table = str(source_table)
         self.logger.debug("Indexing %s" % holding_table)
+        index_name = quoteIdentifier('temp_%s_id' % holding_table.lower())
         cur.execute('''
             CREATE UNIQUE INDEX %s
             ON %s (id)
-        ''' % (self.getHoldingTableName(source_table, 'id'), holding_table))
+        ''' % (index_name, holding_table))
 
     def _retargetForeignKeys(self, holding_table, joins, cur):
         """Replace foreign keys in new holding table.
@@ -518,15 +524,18 @@
         # If there are any holding tables to be poured into their source
         # tables, there must at least be one for the last table that pour()
         # processes.
-        last_holding_table = self.getRawHoldingTableName(self.tables[-1])
-        if not postgresql.have_table(cur, last_holding_table):
+        schema, last_holding_table = self.getRawHoldingTableName(
+            self.tables[-1])
+        if not postgresql.have_table(cur, last_holding_table, schema):
             return False
 
         # If the first table in our list also still exists, and it still has
         # its new_id column, then the pouring process had not begun yet.
         # Assume the data was not ready for pouring.
-        first_holding_table = self.getRawHoldingTableName(self.tables[0])
-        if postgresql.table_has_column(cur, first_holding_table, 'new_id'):
+        schema, first_holding_table = self.getRawHoldingTableName(
+            self.tables[0])
+        if postgresql.table_has_column(
+            cur, first_holding_table, 'new_id', schema):
             self.logger.info(
                 "Previous run aborted too early for recovery; redo all")
             return False
@@ -564,9 +573,10 @@
         # there's a matching holding table.  If so, prepare it, pour it back
         # into the source table, and drop.
         for table in self.tables:
-            holding_table_unquoted = self.getRawHoldingTableName(table)
+            schema, holding_table_unquoted = self.getRawHoldingTableName(
+                table)
 
-            if not postgresql.have_table(cur, holding_table_unquoted):
+            if not postgresql.have_table(cur, holding_table_unquoted, schema):
                 # We know we're in a suitable state for pouring.  If this
                 # table does not exist, it must be because it's been poured
                 # out completely and dropped in an earlier instance of this
@@ -580,14 +590,14 @@
             tablestarttime = time.time()
 
             has_new_id = postgresql.table_has_column(
-                cur, holding_table_unquoted, 'new_id')
+                cur, holding_table_unquoted, 'new_id', schema)
 
             self._pourTable(
                 holding_table, table, has_new_id, transaction_manager)
 
             # Drop holding table.  It may still contain rows with id set to
             # null.  Those must not be poured.
-            postgresql.drop_tables(cursor(), holding_table)
+            postgresql.drop_tables(cursor(), holding_table_unquoted, schema)
 
             self.logger.debug(
                 "Pouring %s took %.3f seconds."

=== modified file 'lib/canonical/database/postgresql.py'
--- lib/canonical/database/postgresql.py	2009-09-21 08:13:40 +0000
+++ lib/canonical/database/postgresql.py	2011-05-10 14:18:57 +0000
@@ -307,7 +307,7 @@
     return int(match.group(1))
 
 
-def have_table(cur, table):
+def have_table(cur, table, schema='public'):
     """Is there a table of the given name?
 
     Returns boolean answer.
@@ -324,12 +324,14 @@
     cur.execute('''
         SELECT count(*) > 0
         FROM pg_tables
-        WHERE tablename=%s
-    ''' % str(quote(table)))
+        WHERE
+            schemaname=%s
+            AND tablename=%s
+    ''' % sqlvalues(schema, table))
     return (cur.fetchall()[0][0] != 0)
 
 
-def table_has_column(cur, table, column):
+def table_has_column(cur, table, column, schema='public'):
     """Does a table of the given name exist and have the given column?
 
     Returns boolean answer.
@@ -349,13 +351,16 @@
         SELECT count(*) > 0
         FROM pg_attribute
         JOIN pg_class ON pg_class.oid = attrelid
-        WHERE relname=%s
+        JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
+        WHERE
+            nspname=%s
+            AND relname=%s
             AND attname=%s
-    ''' % sqlvalues(table, column))
+    ''' % sqlvalues(schema, table, column))
     return (cur.fetchall()[0][0] != 0)
 
 
-def drop_tables(cur, tables):
+def drop_tables(cur, tables, schema='public'):
     """Drop given tables (a list, one name, or None), if they exist.
 
     >>> cur.execute("CREATE TEMP TABLE foo (a integer)")
@@ -381,8 +386,10 @@
     if isinstance(tables, basestring):
         tables = [tables]
 
+    fqns = [fqn(schema, table) for table in tables]
+
     # This syntax requires postgres 8.2 or better
-    cur.execute("DROP TABLE IF EXISTS %s" % ','.join(tables))
+    cur.execute("DROP TABLE IF EXISTS %s" % ', '.join(fqns))
 
 
 def allow_sequential_scans(cur, permission):