launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #08907
[Merge] lp:~stub/launchpad/staging into lp:launchpad
Stuart Bishop has proposed merging lp:~stub/launchpad/staging into lp:launchpad.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~stub/launchpad/staging/+merge/110818
= Summary =
We are switching from Slony to PostgreSQL 9.1 streaming replication and the staging rebuild scripts need to be refactored.
== Proposed fix ==
== Pre-implementation notes ==
== LOC Rationale ==
== Implementation details ==
== Tests ==
== Demo and Q/A ==
= Launchpad lint =
Checking for conflicts and issues in changed files.
Linting changed files:
database/schema/upgrade.py
database/replication/Makefile
./database/schema/upgrade.py
12: '_pythonpath' imported but unused
--
https://code.launchpad.net/~stub/launchpad/staging/+merge/110818
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~stub/launchpad/staging into lp:launchpad.
=== modified file 'database/replication/Makefile'
--- database/replication/Makefile 2012-04-24 14:11:52 +0000
+++ database/replication/Makefile 2012-06-18 13:30:16 +0000
@@ -1,21 +1,15 @@
# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
#
-# This makefile is used for two task:
-#
-# - Build a replicated development environment
-# - Build and replicate the staging database
-#
-# To build a replicated development environment:
-#
-# $ make -C database/replication devsetup
-# $ make run
+# This makefile is used to build and replicate the staging database.
+# To build a replicated development environent, replicate your 9.1 main
+# cluster with standard PG 9.1 streaming replication per the PG docs.
#
# To test the staging rebuild script:
#
# $ mkdir -p /srv/staging.launchpad.net/staging-logs
# $ cd database/replication
-# $ pg_dump --exclude-schema=_sl --format=c launchpad_dev > launchpad.dump
+# $ pg_dump --format=c launchpad_dev > launchpad.dump
# $ make stagingsetup \
# STAGING_CONFIG=dev-staging STAGING_DUMP=launchpad.dump
# $ make stagingswitch STAGING_CONFIG=dev-staging
@@ -26,18 +20,10 @@
# $ make dogfood DOGFOOD_DBNAME=launchpad_dogfood DOGFOOD_DUMP=launchpad.dump
#
-# This used to be 10 seconds, so we always ran staging lagged to detect
-# replication glitches more easily. However, this does not play well
-# with DBLoopTuner, as it correctly blocks when it detects lag.
-# We should put this back when DBLoopTuner is more intelligent, perhaps
-# pulling the allowable lag setting from a config file.
-LAG=0 seconds
-
# For real restores, this is being run on an 8 core system. Let pg_restore
# run multiprocess.
MULTIPROC=-j 4
-DEV_CONFIG=replicated-development
NEW_STAGING_CONFIG=staging-setup # For building the db with a different name.
STAGING_CONFIG=staging-db # For swapping fresh db into place.
STAGING_DUMP=launchpad.dump # Dumpfile to build new staging from.
@@ -46,18 +32,9 @@
DOGFOOD_DBNAME=launchpad_dogfood
DOGFOOD_DUMP=launchpad.dump
-_CONFIG=overridden-on-command-line
-_SLAVE_TABLESPACE=pg_default
-
PGMASSACRE=../../utilities/pgmassacre.py
-CREATEDB_83=createdb --encoding=UTF8
-CREATEDB_84=createdb --encoding=UTF8 --locale=C --template=template0
-CREATEDB=${CREATEDB_84}
-
-# Set this to --exit-on-error once we have migrated Launchpad production
-# to Slony-I 2.0.x.
-EXIT_ON_ERROR=
+CREATEDB=createdb --encoding=UTF8 --locale=C --template=template0
# Turn off output silencing so we can see details of staging deployments.
# Without the timestamps, we are unable to estimate production deployment
@@ -66,161 +43,39 @@
SHHH=
default:
- echo Usage: make [start|stop|restart]
-
-start:
- ./slon_ctl.py --lag="${LAG}" start
-
-stop:
- ./slon_ctl.py stop
-
-restart: stop start
-
-devsetup:
- make _prelim LPCONFIG=${DEV_CONFIG} \
- _MASTER=launchpad_dev _SLAVE=launchpad_dev_slave LAG="0 seconds"
-
- # Build the master database
- LPCONFIG=${DEV_CONFIG} make -C ../schema
-
- # Replicate it
- make _replicate LPCONFIG=${DEV_CONFIG} \
- _MASTER=launchpad_dev _SLAVE=launchpad_dev_slave LAG="0 seconds"
-
- # Replicate it again, so we can test with multiple slaves.
- -${PGMASSACRE} launchpad_dev_slave2
- ${CREATEDB} launchpad_dev_slave2
- LPCONFIG=${DEV_CONFIG} ./slon_ctl.py start \
- node3_node 'dbname=launchpad_dev_slave2 user=slony'
- LPCONFIG=${DEV_CONFIG} ./new-slave.py 3 launchpad_dev_slave2
- LPCONFIG=${DEV_CONFIG} ../schema/security.py -d launchpad_dev_slave2
-
- # Regenerate the preamble for manual slonik(1) usage.
- LPCONFIG=${DEV_CONFIG} ./preamble.py > preamble.sk
-
- # Restart slon daemons with default lag setting.
- LPCONFIG=${DEV_CONFIG} ./slon_ctl.py stop
- LPCONFIG=${DEV_CONFIG} ./slon_ctl.py --lag="${LAG}" start
-
+ echo Need a target
# Build _new staging databases from a production dump.
-stagingsetup: DUMPLIST:=$(shell tempfile --suffix=.lst --prefix=lpstag)
stagingsetup:
- make _prelim LPCONFIG=${NEW_STAGING_CONFIG} \
- _MASTER=lpmain_staging_new _SLAVE=lpmain_staging_slave_new \
- LAG="0 seconds"
+ # Kill the existing temporary staging database if it exists from an
+ # aborted run
+ -${PGMASSACRE} lpmain_staging_new
# Create the DB with the desired default tablespace.
${CREATEDB} --tablespace ${STAGING_TABLESPACE} lpmain_staging_new
- # Drop the plpgsql language if it was installed, as it will be
- # recreated with the dump from PostgreSQL 8.4
- -droplang plpgsql lpmain_staging_new
# Restore the database. We need to restore permissions, despite
# later running security.py, to pull in permissions granted on
# production to users not maintained by security.py.
- # Unfortunately, we need to dump production databases using
- # --exclude-schema=_sl, or the read locks opened by pg_dump will
- # cause the replication to appear lagged and set off alerts.
- # This makes the restoration process more complex - a human being
- # can just restore the dump, scan the errors and confirm it worked.
- # For this automation, we need to massage the list of data to
- # restore from the dump to avoid the spurious errors and get a
- # valid error code.
- pg_restore --list ${STAGING_DUMP} | grep -v 'TRIGGER public _sl_' \
- > ${DUMPLIST}
- pg_restore --dbname=lpmain_staging_new --no-owner ${EXIT_ON_ERROR} \
- --use-list=${DUMPLIST} ${MULTIPROC} ${STAGING_DUMP}
- rm ${DUMPLIST}
- # Uninstall Slony-I if it is installed - a pg_dump of a DB with
- # Slony-I installed isn't usable without this step. This is likely
- # irrelevant, as if our dump was created with --exclude-schema=_sl
- # it will not contain anything to remove.
- LPCONFIG=${NEW_STAGING_CONFIG} ./repair-restored-db.py
- # Apply database patches now. This is unfortunate, as it means
- # the patches are not being applied to a replicated database.
- # With this update, we get incorrect patch application times and
- # won't catch patches incompatible with Slony. We do this now,
- # because we are running short of disk space on the staging
- # database server. With the patches being applied before
- # replication, we can swap the slave database into place before
- # it has finished building and we end up using only 3*production
- # amount of disk space rather than 4*production.
- @echo Running upgrade.py `date` non-replicated. Times need adjustment.
+ pg_restore --dbname=lpmain_staging_new ${EXIT_ON_ERROR} \
+ ${MULTIPROC} ${STAGING_DUMP}
+ # Apply database patches.
+ @echo Running upgrade.py `date`.
LPCONFIG=${NEW_STAGING_CONFIG} ${SHHH} ../schema/upgrade.py \
- --log-file=INFO:${STAGING_LOGDIR}/dbupgrade.log
+ --ignore-slony --log-file=INFO:${STAGING_LOGDIR}/dbupgrade.log
@echo Running security.py `date`
- LPCONFIG=${NEW_STAGING_CONFIG} ${SHHH} ../schema/security.py -U slony \
+ LPCONFIG=${NEW_STAGING_CONFIG} ${SHHH} ../schema/security.py \
--log-file=INFO:${STAGING_LOGDIR}/dbupgrade.log
- # Setup replication
- make _replicate LPCONFIG=${NEW_STAGING_CONFIG} LAG="0 seconds" \
- _MASTER=lpmain_staging_new _SLAVE=lpmain_staging_slave_new \
- _SLAVE_TABLESPACE=${STAGING_TABLESPACE}
# Switch the _new staging databases into place.
stagingswitch:
- # Stop Slony-I daemons - don't confuse the poor darlings.
- -LPCONFIG=${NEW_STAGING_CONFIG} ./slon_ctl.py stop
- -LPCONFIG=${STAGING_CONFIG} ./slon_ctl.py stop
# Kill the existing staging database if it exists.
-${PGMASSACRE} lpmain_staging
- -${PGMASSACRE} lpmain_staging_slave
# Rename the newly build staging databases.
psql -d template1 -c \
"ALTER DATABASE lpmain_staging_new RENAME TO lpmain_staging;"
- psql -d template1 -c "\
- ALTER DATABASE lpmain_staging_slave_new \
- RENAME TO lpmain_staging_slave;"
- # Fix the paths to match.
- psql -d lpmain_staging -U slony -c \
- "UPDATE _sl.sl_path SET pa_conninfo=replace(pa_conninfo, '_new', '')"
- psql -d lpmain_staging_slave -U slony -c \
- "UPDATE _sl.sl_path SET pa_conninfo=replace(pa_conninfo, '_new', '')"
- # Start the slon daemons, with requested lag.
- LPCONFIG=${STAGING_CONFIG} ./slon_ctl.py --lag="${LAG}" start
-dogfood: DUMPLIST:=$(shell tempfile --suffix=.lst --prefix=lpdogf)
dogfood:
${CREATEDB} ${DOGFOOD_DBNAME}
- # May be needed with PG 9.1 if the dump is from PG 8.4.
- -droplang plpgsql ${DOGFOOD_DBNAME}
- pg_restore --list ${DOGFOOD_DUMP} | grep -v 'TRIGGER public _sl_' \
- > ${DUMPLIST}
pg_restore --dbname=${DOGFOOD_DBNAME} --no-acl --no-owner \
- --use-list=${DUMPLIST} ${EXIT_ON_ERROR} ${DOGFOOD_DUMP}
- rm ${DUMPLIST}
- ./repair-restored-db.py -d ${DOGFOOD_DBNAME}
- ../schema/upgrade.py -d ${DOGFOOD_DBNAME}
+ ${EXIT_ON_ERROR} ${DOGFOOD_DUMP}
+ ../schema/upgrade.py --ignore-slony -d ${DOGFOOD_DBNAME}
../schema/security.py -d ${DOGFOOD_DBNAME}
-
-_prelim:
- @echo LPCONFIG currently ${LPCONFIG}
- # Create the slony PostgreSQL superuser if necessary.
- -createuser --superuser slony
- # Stop the slon daemons and wait a bit for connections to drop.
- -./slon_ctl.py stop
- sleep 5
- # Drop any existing databases if they exist
- ${PGMASSACRE} ${_MASTER}
- ${PGMASSACRE} ${_SLAVE}
-
-_replicate:
- @echo LPCONFIG currently ${LPCONFIG}
- # Start the slon daemon for the master.
- ./slon_ctl.py --lag="0 seconds" start \
- node1_node "dbname=${_MASTER} user=slony"
- # Initialize the cluster and create replication sets.
- ./initialize.py \
- --log-file=INFO:${STAGING_LOGDIR}/dbupgrade.log
- # Create the soon-to-be-slave database, empty at this point.
- ${CREATEDB} --tablespace=${_SLAVE_TABLESPACE} ${_SLAVE}
- # Start the slon daemon for the slave
- ./slon_ctl.py --lag="0 seconds" start \
- node2_node "dbname=${_SLAVE} user=slony"
- # Setup the slave
- ./new-slave.py 2 "dbname=${_SLAVE}" \
- --log-file=INFO:${STAGING_LOGDIR}/dbupgrade.log
- # Restart slon daemons with default lag setting.
- ./slon_ctl.py stop
- ./slon_ctl.py --lag="${LAG}" start
- # Generate a preamble for manual slonik(1) usage.
- ./preamble.py > preamble.sk
-
=== modified file 'database/schema/upgrade.py'
--- database/schema/upgrade.py 2012-05-31 09:54:57 +0000
+++ database/schema/upgrade.py 2012-06-18 13:30:16 +0000
@@ -44,7 +44,7 @@
con = connect()
patches = get_patchlist(con)
- if replication.helpers.slony_installed(con):
+ if not options.ignore_slony and replication.helpers.slony_installed(con):
con.close()
if options.commit is False:
parser.error("--dry-run does not make sense with replicated db")
@@ -609,14 +609,14 @@
db_options(parser)
logger_options(parser)
parser.add_option(
- "-n", "--dry-run", dest="commit", default=True,
- action="store_false", help="Don't actually commit changes"
- )
- parser.add_option(
- "--partial", dest="partial", default=False,
- action="store_true",
- help="Commit after applying each patch",
- )
+ "-n", "--dry-run", dest="commit", default=True,
+ action="store_false", help="Don't actually commit changes")
+ parser.add_option(
+ "--partial", dest="partial", default=False,
+ action="store_true", help="Commit after applying each patch",)
+ parser.add_option(
+ "--ignore-slony", dest="ignore_slony", default=False,
+ action="store_true", help="Ignore any Slony installations",)
(options, args) = parser.parse_args()
if args:
Follow ups