launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #03581
[Merge] lp:~stub/launchpad/bug-778338-transient-schema into lp:launchpad
Stuart Bishop has proposed merging lp:~stub/launchpad/bug-778338-transient-schema into lp:launchpad with lp:~stub/launchpad/bug-179821-lowercase-tablenames as a prerequisite.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #778338 in Launchpad itself: "restartable multitablecopier interacts badly with replication and upgrade operations"
https://bugs.launchpad.net/launchpad/+bug/778338
For more details, see:
https://code.launchpad.net/~stub/launchpad/bug-778338-transient-schema/+merge/60652
= Summary =
Multitable Copy creates transient tables. These may be discovered by the replication tools and make them explode because they don't know what to do with them.
== Proposed fix ==
Put the transient tables in their own schema, which will automatically be ignored. This seems cleaner than just white listing 'temp_%' and relying on a well known prefix.
== Pre-implementation notes ==
== Implementation details ==
== Tests ==
== Demo and Q/A ==
= Launchpad lint =
Checking for conflicts and issues in changed files.
Linting changed files:
lib/canonical/database/postgresql.py
lib/canonical/database/ftests/test_multitablecopy.txt
lib/canonical/database/multitablecopy.py
./lib/canonical/database/ftests/test_multitablecopy.txt
1: narrative uses a moin header.
14: narrative uses a moin header.
54: narrative uses a moin header.
84: narrative uses a moin header.
121: narrative uses a moin header.
157: narrative uses a moin header.
199: narrative exceeds 78 characters.
244: narrative uses a moin header.
274: narrative uses a moin header.
276: narrative exceeds 78 characters.
311: narrative exceeds 78 characters.
439: narrative uses a moin header.
486: narrative uses a moin header.
498: narrative has trailing whitespace.
590: narrative uses a moin header.
658: narrative uses a moin header.
--
https://code.launchpad.net/~stub/launchpad/bug-778338-transient-schema/+merge/60652
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~stub/launchpad/bug-778338-transient-schema into lp:launchpad.
=== modified file 'lib/canonical/database/ftests/test_multitablecopy.txt'
--- lib/canonical/database/ftests/test_multitablecopy.txt 2011-05-11 15:51:09 +0000
+++ lib/canonical/database/ftests/test_multitablecopy.txt 2011-05-11 15:51:13 +0000
@@ -111,9 +111,11 @@
We have the data we're copying in holding tables now.
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+ >>> schema, name = copier.getRawHoldingTableName('numeric')
+ >>> postgresql.have_table(cur, name, schema)
True
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+ >>> schema, name = copier.getRawHoldingTableName('numeric')
+ >>> postgresql.have_table(cur, name, schema)
True
@@ -145,9 +147,11 @@
And the holding tables are gone.
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+ >>> schema, name = copier.getRawHoldingTableName('numeric')
+ >>> postgresql.have_table(cur, name, schema)
False
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+ >>> schema, name = copier.getRawHoldingTableName('textual')
+ >>> postgresql.have_table(cur, name, schema)
False
@@ -280,15 +284,19 @@
>>> copier.needsRecovery()
False
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+ >>> schema, name = copier.getRawHoldingTableName('textual')
+ >>> postgresql.have_table(cur, name, schema)
True
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+ >>> schema, name = copier.getRawHoldingTableName('numeric')
+ >>> postgresql.have_table(cur, name, schema)
True
>>> copier.dropHoldingTables()
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+ >>> schema, name = copier.getRawHoldingTableName('textual')
+ >>> postgresql.have_table(cur, name, schema)
False
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+ >>> schema, name = copier.getRawHoldingTableName('numeric')
+ >>> postgresql.have_table(cur, name, schema)
False
>>> cur.execute("SELECT t, count(*) FROM textual GROUP BY t ORDER BY t")
@@ -331,9 +339,11 @@
>>> transaction.begin()
<transaction...
>>> cur = cursor()
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+ >>> schema, name = copier.getRawHoldingTableName('textual')
+ >>> postgresql.have_table(cur, name, schema)
False
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+ >>> schema, name = copier.getRawHoldingTableName('numeric')
+ >>> postgresql.have_table(cur, name, schema)
True
Our textual data has been copied, so the textual table now lists each of its
@@ -370,9 +380,11 @@
This time we run to completion without problems.
>>> cur = cursor()
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('textual'))
+ >>> schema, name = copier.getRawHoldingTableName('textual')
+ >>> postgresql.have_table(cur, name, schema)
False
- >>> postgresql.have_table(cur, copier.getRawHoldingTableName('numeric'))
+ >>> schema, name = copier.getRawHoldingTableName('numeric')
+ >>> postgresql.have_table(cur, name, schema)
False
>>> cur.execute("""
@@ -638,10 +650,10 @@
>>> copier.pour(transaction)
Pouring textual
- Pouring text from "temp_textual_holding_test" to textual
+ Pouring text from "transient"."textual_holding_test" to textual
...
Pouring numeric
- Pouring numbers from "temp_numeric_holding_test" to numeric
+ Pouring numbers from "transient"."numeric_holding_test" to numeric
...
=== modified file 'lib/canonical/database/multitablecopy.py'
--- lib/canonical/database/multitablecopy.py 2011-05-11 15:51:09 +0000
+++ lib/canonical/database/multitablecopy.py 2011-05-11 15:51:13 +0000
@@ -247,30 +247,31 @@
def dropHoldingTables(self):
"""Drop any holding tables that may exist for this MultiTableCopy."""
- holding_tables = [self.getHoldingTableName(table)
- for table in self.tables]
- postgresql.drop_tables(cursor(), holding_tables)
+ for table in self.tables:
+ schema, raw_table = self.getRawHoldingTableName(table)
+ postgresql.drop_tables(cursor(), raw_table, schema)
def getRawHoldingTableName(self, tablename, suffix=''):
- """Name for a holding table, but without quotes. Use with care."""
+ """(schema, name) for a holding table, but without quotes."""
if suffix:
suffix = '_%s' % suffix
assert re.search(r'[^a-z_]', tablename + suffix) is None, (
'Unsupported characters in table name per Bug #179821')
- raw_name = "temp_%s_holding_%s%s" % (
+ raw_name = "%s_holding_%s%s" % (
str(tablename), self.name, str(suffix))
- return raw_name
+ # In a separate schema for transient tables per Bug #778338.
+ return ("transient", raw_name)
def getHoldingTableName(self, tablename, suffix=''):
"""Name for a holding table to hold data being copied in tablename.
Return value is properly quoted for use as an SQL identifier.
"""
- raw_name = self.getRawHoldingTableName(tablename, suffix)
- return quoteIdentifier(raw_name)
+ schema, tablename = self.getRawHoldingTableName(tablename, suffix)
+ return "%s.%s" % (quoteIdentifier(schema), quoteIdentifier(tablename))
def _pointsToTable(self, source_table, foreign_key):
"""Name of table that source_table.foreign_key refers to.
@@ -485,14 +486,16 @@
"""Index id column on holding table.
Creates a unique index on "id" column in holding table. The index
- gets the name of the holding table with "_id" appended.
+ gets the name of the holding table with "temp_" prepended and
+ "_id" appended.
"""
source_table = str(source_table)
self.logger.debug("Indexing %s" % holding_table)
+ index_name = quoteIdentifier('temp_%s_id' % holding_table.lower())
cur.execute('''
CREATE UNIQUE INDEX %s
ON %s (id)
- ''' % (self.getHoldingTableName(source_table, 'id'), holding_table))
+ ''' % (index_name, holding_table))
def _retargetForeignKeys(self, holding_table, joins, cur):
"""Replace foreign keys in new holding table.
@@ -522,15 +525,18 @@
# If there are any holding tables to be poured into their source
# tables, there must at least be one for the last table that pour()
# processes.
- last_holding_table = self.getRawHoldingTableName(self.tables[-1])
- if not postgresql.have_table(cur, last_holding_table):
+ schema, last_holding_table = self.getRawHoldingTableName(
+ self.tables[-1])
+ if not postgresql.have_table(cur, last_holding_table, schema):
return False
# If the first table in our list also still exists, and it still has
# its new_id column, then the pouring process had not begun yet.
# Assume the data was not ready for pouring.
- first_holding_table = self.getRawHoldingTableName(self.tables[0])
- if postgresql.table_has_column(cur, first_holding_table, 'new_id'):
+ schema, first_holding_table = self.getRawHoldingTableName(
+ self.tables[0])
+ if postgresql.table_has_column(
+ cur, first_holding_table, 'new_id', schema):
self.logger.info(
"Previous run aborted too early for recovery; redo all")
return False
@@ -568,9 +574,10 @@
# there's a matching holding table. If so, prepare it, pour it back
# into the source table, and drop.
for table in self.tables:
- holding_table_unquoted = self.getRawHoldingTableName(table)
+ schema, holding_table_unquoted = self.getRawHoldingTableName(
+ table)
- if not postgresql.have_table(cur, holding_table_unquoted):
+ if not postgresql.have_table(cur, holding_table_unquoted, schema):
# We know we're in a suitable state for pouring. If this
# table does not exist, it must be because it's been poured
# out completely and dropped in an earlier instance of this
@@ -584,14 +591,14 @@
tablestarttime = time.time()
has_new_id = postgresql.table_has_column(
- cur, holding_table_unquoted, 'new_id')
+ cur, holding_table_unquoted, 'new_id', schema)
self._pourTable(
holding_table, table, has_new_id, transaction_manager)
# Drop holding table. It may still contain rows with id set to
# null. Those must not be poured.
- postgresql.drop_tables(cursor(), holding_table)
+ postgresql.drop_tables(cursor(), holding_table_unquoted, schema)
self.logger.debug(
"Pouring %s took %.3f seconds."
=== modified file 'lib/canonical/database/postgresql.py'
--- lib/canonical/database/postgresql.py 2009-09-21 08:13:40 +0000
+++ lib/canonical/database/postgresql.py 2011-05-11 15:51:13 +0000
@@ -85,7 +85,7 @@
for t in cur.fetchall():
# t == (src_table, src_column, dest_table, dest_column, upd, del)
- if t not in _state: # Avoid loops
+ if t not in _state: # Avoid loops
_state.append(t)
# Recurse, Locating references to the reference we just found.
listReferences(cur, t[0], t[1], _state)
@@ -93,6 +93,7 @@
# from the original (table, column), making it easier to change keys
return _state
+
def listUniques(cur, table, column):
'''Return a list of unique indexes on `table` that include the `column`
@@ -175,6 +176,7 @@
rv.append(tuple(keys))
return rv
+
def listSequences(cur):
"""Return a list of (schema, sequence, table, column) tuples.
@@ -206,7 +208,7 @@
for schema, sequence in list(cur.fetchall()):
match = re.search('^(\w+)_(\w+)_seq$', sequence)
if match is None:
- rv.append( (schema, sequence, None, None) )
+ rv.append((schema, sequence, None, None))
else:
table = match.group(1)
column = match.group(2)
@@ -225,11 +227,12 @@
cur.execute(sql, dict(schema=schema, table=table, column=column))
num = cur.fetchone()[0]
if num == 1:
- rv.append( (schema, sequence, table, column) )
+ rv.append((schema, sequence, table, column))
else:
- rv.append( (schema, sequence, None, None) )
+ rv.append((schema, sequence, None, None))
return rv
+
def generateResetSequencesSQL(cur):
"""Return SQL that will reset table sequences to match the data in them.
"""
@@ -257,6 +260,7 @@
else:
return ''
+
def resetSequences(cur):
"""Reset table sequences to match the data in them.
@@ -278,9 +282,11 @@
if sql:
cur.execute(sql)
+
# Regular expression used to parse row count estimate from EXPLAIN output
_rows_re = re.compile("rows=(\d+)\swidth=")
+
def estimateRowCount(cur, query):
"""Ask the PostgreSQL query optimizer for an estimated rowcount.
@@ -307,7 +313,7 @@
return int(match.group(1))
-def have_table(cur, table):
+def have_table(cur, table, schema='public'):
"""Is there a table of the given name?
Returns boolean answer.
@@ -324,12 +330,14 @@
cur.execute('''
SELECT count(*) > 0
FROM pg_tables
- WHERE tablename=%s
- ''' % str(quote(table)))
+ WHERE
+ schemaname=%s
+ AND tablename=%s
+ ''' % sqlvalues(schema, table))
return (cur.fetchall()[0][0] != 0)
-def table_has_column(cur, table, column):
+def table_has_column(cur, table, column, schema='public'):
"""Does a table of the given name exist and have the given column?
Returns boolean answer.
@@ -349,13 +357,16 @@
SELECT count(*) > 0
FROM pg_attribute
JOIN pg_class ON pg_class.oid = attrelid
- WHERE relname=%s
+ JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
+ WHERE
+ nspname=%s
+ AND relname=%s
AND attname=%s
- ''' % sqlvalues(table, column))
+ ''' % sqlvalues(schema, table, column))
return (cur.fetchall()[0][0] != 0)
-def drop_tables(cur, tables):
+def drop_tables(cur, tables, schema='public'):
"""Drop given tables (a list, one name, or None), if they exist.
>>> cur.execute("CREATE TEMP TABLE foo (a integer)")
@@ -381,8 +392,10 @@
if isinstance(tables, basestring):
tables = [tables]
+ fqns = [fqn(schema, table) for table in tables]
+
# This syntax requires postgres 8.2 or better
- cur.execute("DROP TABLE IF EXISTS %s" % ','.join(tables))
+ cur.execute("DROP TABLE IF EXISTS %s" % ', '.join(fqns))
def allow_sequential_scans(cur, permission):
@@ -422,9 +435,10 @@
cur.execute("SET enable_seqscan=%s" % permission_value)
+
def all_tables_in_schema(cur, schema):
"""Return a set of all tables in the given schema.
-
+
:returns: A set of quoted, fully qualified table names.
"""
cur.execute("""
@@ -569,4 +583,3 @@
for table, column in listReferences(cur, 'person', 'id'):
print '%32s %32s' % (table, column)
-
Follow ups