launchpad-reviewers team mailing list archive

Thread
Date

[Merge] lp:~lifeless/launchpad/storylayers into lp:launchpad

To: mp+66737@xxxxxxxxxxxxxxxxxx
From: Robert Collins <robertc@xxxxxxxxxxxxxxxxx>
Date: Mon, 04 Jul 2011 02:14:35 -0000
Reply-to: mp+66737@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Robert Collins has proposed merging lp:~lifeless/launchpad/storylayers into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~lifeless/launchpad/storylayers/+merge/66737

This aids parallel testing: we had tests that required test A, B, C to be run by the same test runner strictly in series. These tests existed to test a layer facility that was not needed: since we started wrapping story tests in an opaque layer for testr - so that they report as one test as far as test discovery and zope testrunner layer planning are concerned.

So this branch just nukes all of that, making all code assume that the reset_between_tests rule is True, and consolidates the tests that resets happen to each run as one test manually calling the layer per-test tear down and setup logic.
-- 
https://code.launchpad.net/~lifeless/launchpad/storylayers/+merge/66737
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~lifeless/launchpad/storylayers into lp:launchpad.

=== modified file 'lib/canonical/database/ftests/test_postgresql.py'
--- lib/canonical/database/ftests/test_postgresql.py	2011-02-19 13:50:19 +0000
+++ lib/canonical/database/ftests/test_postgresql.py	2011-07-04 02:14:32 +0000
@@ -5,6 +5,7 @@
 
 from doctest import DocTestSuite
 
+from canonical.testing.layers import BaseLayer
 from lp.testing.pgsql import PgTestSetup
 
 def setUp(test):
@@ -63,5 +64,6 @@
             "canonical.database.postgresql",
             setUp=setUp, tearDown=tearDown
             )
+    suite.layer = BaseLayer
     return suite
 

=== modified file 'lib/canonical/launchpad/testing/pages.py'
--- lib/canonical/launchpad/testing/pages.py	2011-05-27 21:12:25 +0000
+++ lib/canonical/launchpad/testing/pages.py	2011-07-04 02:14:32 +0000
@@ -883,14 +883,7 @@
     def run(self, result=None):
         if result is None:
             result = self.defaultTestResult()
-        PageTestLayer.startStory()
-        try:
-            # XXX Robert Collins 2006-01-17: we can hook in pre and post
-            # story actions here much more tidily (and in self.debug too)
-            # - probably via self.setUp and self.tearDown
-            self._suite.run(result)
-        finally:
-            PageTestLayer.endStory()
+        return self._suite.run(result)
 
     def debug(self):
         self._suite.debug()

=== modified file 'lib/canonical/testing/ftests/test_layers.py'
--- lib/canonical/testing/ftests/test_layers.py	2011-06-21 12:38:23 +0000
+++ lib/canonical/testing/ftests/test_layers.py	2011-07-04 02:14:32 +0000
@@ -337,21 +337,17 @@
             self.assertFalse(os.path.exists(active_root))
 
 
-class LibrarianNoResetTestCase(testtools.TestCase):
+class LibrarianResetTestCase(testtools.TestCase):
     """Our page tests need to run multple tests without destroying
     the librarian database in between.
     """
-    layer = LaunchpadLayer
+    layer = LibrarianLayer
 
     sample_data = 'This is a test'
 
-    def testNoReset1(self):
-        # Inform the librarian not to reset the library until we say
-        # otherwise
-        LibrarianLayer._reset_between_tests = False
-
-        # Add a file for testNoReset2. We use remoteAddFile because
-        # it does not need the CA loaded to work.
+    def test_librarian_is_reset(self):
+        # Add a file. We use remoteAddFile because it does not need the CA
+        # loaded to work.
         client = LibrarianClient()
         LibrarianTestCase.url = client.remoteAddFile(
                 self.sample_data, len(self.sample_data),
@@ -360,20 +356,10 @@
         self.failUnlessEqual(
                 urlopen(LibrarianTestCase.url).read(), self.sample_data
                 )
-
-    def testNoReset2(self):
-        # The file added by testNoReset1 should be there
-        self.failUnlessEqual(
-                urlopen(LibrarianTestCase.url).read(), self.sample_data
-                )
-        # Restore this - keeping state is our responsibility
-        LibrarianLayer._reset_between_tests = True
-        # The database was committed to, but not by this process, so we need
-        # to ensure that it is fully torn down and recreated.
-        DatabaseLayer.force_dirty_database()
-
-    def testNoReset3(self):
-        # The file added by testNoReset1 should be gone
+        # Perform the librarian specific between-test code:
+        LibrarianLayer.testTearDown()
+        LibrarianLayer.testSetUp()
+        # Which should have nuked the old file.
         # XXX: StuartBishop 2006-06-30 Bug=51370:
         # We should get a DownloadFailed exception here.
         data = urlopen(LibrarianTestCase.url).read()
@@ -423,36 +409,18 @@
         num = cur.fetchone()[0]
         return num
 
-    # XXX: Parallel-fail: because layers are not cleanly integrated with
-    # unittest, what should be one test is expressed as three distinct
-    # tests here. We need to either write enough glue to push/pop the
-    # global state of zope.testing.runner or we need to stop using layers,
-    # before these tests will pass in a parallel run. Robert Collins
-    # 2010-11-01
-    def testNoReset1(self):
-        # Ensure that we can switch off database resets between tests
-        # if necessary, such as used by the page tests
-        DatabaseLayer._reset_between_tests = False
+    def test_db_is_reset(self):
         con = DatabaseLayer.connect()
         cur = con.cursor()
         cur.execute("DELETE FROM Wikiname")
         self.failUnlessEqual(self.getWikinameCount(con), 0)
         con.commit()
-
-    def testNoReset2(self):
-        # Wikiname table was emptied by testNoReset1 and should still
-        # contain nothing.
-        con = DatabaseLayer.connect()
-        self.failUnlessEqual(self.getWikinameCount(con), 0)
-        # Note we don't need to commit, but we do need to force
-        # a reset!
-        DatabaseLayer._reset_between_tests = True
-        DatabaseLayer.force_dirty_database()
-
-    def testNoReset3(self):
-        # Wikiname table should contain data again
-        con = DatabaseLayer.connect()
-        self.failIfEqual(self.getWikinameCount(con), 0)
+        # Run the per-test code for the Database layer.
+        DatabaseLayer.testTearDown()
+        DatabaseLayer.testSetUp()
+        # Wikiname table should have been restored.
+        con = DatabaseLayer.connect()
+        self.assertNotEqual(0, self.getWikinameCount(con))
 
 
 class LaunchpadTestCase(BaseTestCase):

=== modified file 'lib/canonical/testing/layers.py'
--- lib/canonical/testing/layers.py	2011-06-30 16:18:11 +0000
+++ lib/canonical/testing/layers.py	2011-07-04 02:14:32 +0000
@@ -575,7 +575,6 @@
     ZopelessLayer, FunctionalLayer or sublayer as they will be accessing
     memcached using a utility.
     """
-    _reset_between_tests = True
 
     # A memcache.Client instance.
     client = None
@@ -644,9 +643,8 @@
     @classmethod
     @profiled
     def testSetUp(cls):
-        if MemcachedLayer._reset_between_tests:
-            MemcachedLayer.client.forget_dead_hosts()
-            MemcachedLayer.client.flush_all()
+        MemcachedLayer.client.forget_dead_hosts()
+        MemcachedLayer.client.flush_all()
 
     @classmethod
     @profiled
@@ -665,7 +663,6 @@
 
 class RabbitMQLayer(BaseLayer):
     """Provides tests access to a rabbitMQ instance."""
-    _reset_between_tests = True
 
     rabbit = RabbitServer()
 
@@ -711,13 +708,11 @@
 class DatabaseLayer(BaseLayer):
     """Provides tests access to the Launchpad sample database."""
 
-    # If set to False, database will not be reset between tests. It is
-    # your responsibility to set it back to True and call
-    # Database.force_dirty_database() when you do so.
-    _reset_between_tests = True
-
     _is_setup = False
     _db_fixture = None
+    # For parallel testing, we allocate a temporary template to prevent worker
+    # contention.
+    _db_template_fixture = None
 
     @classmethod
     @profiled
@@ -726,7 +721,16 @@
         # Read the sequences we'll need from the test template database.
         reset_sequences_sql = LaunchpadTestSetup(
             dbname='launchpad_ftest_template').generateResetSequencesSQL()
-        cls._db_fixture = LaunchpadTestSetup(
+        # Allocate a template for this test instance
+        if os.environ.get('LP_TEST_INSTANCE'):
+            template_name = '_'.join([LaunchpadTestSetup.template,
+                os.environ.get('LP_TEST_INSTANCE')])
+            cls._db_template_fixture = LaunchpadTestSetup(dbname=template_name,
+                reset_sequences_sql=reset_sequences_sql)
+            cls._db_template_fixture.setUp()
+        else:
+            template_name = LaunchpadTestSetup.template
+        cls._db_fixture = LaunchpadTestSetup(template=template_name,
             reset_sequences_sql=reset_sequences_sql)
         cls.force_dirty_database()
         # Nuke any existing DB (for persistent-test-services) [though they
@@ -756,6 +760,8 @@
         cls.force_dirty_database()
         cls._db_fixture.tearDown()
         cls._db_fixture = None
+        cls._db_template_fixture.tearDown()
+        cls._db_template_fixture = None
 
     @classmethod
     @profiled
@@ -767,8 +773,7 @@
 
     @classmethod
     def _ensure_db(cls):
-        if cls._reset_between_tests:
-            cls._db_fixture.setUp()
+        cls._db_fixture.setUp()
         # Ensure that the database is connectable. Because we might have
         # just created it, keep trying for a few seconds incase PostgreSQL
         # is taking its time getting its house in order.
@@ -792,8 +797,7 @@
         # Ensure that the database is connectable
         cls.connect().close()
 
-        if cls._reset_between_tests:
-            cls._db_fixture.tearDown()
+        cls._db_fixture.tearDown()
 
         # Fail tests that forget to uninstall their database policies.
         from canonical.launchpad.webapp.adapter import StoreSelector
@@ -880,17 +884,12 @@
     Calls to the Librarian will fail unless there is also a Launchpad
     database available.
     """
-    _reset_between_tests = True
 
     librarian_fixture = None
 
     @classmethod
     @profiled
     def setUp(cls):
-        if not cls._reset_between_tests:
-            raise LayerInvariantError(
-                    "_reset_between_tests changed before LibrarianLayer "
-                    "was actually used.")
         cls.librarian_fixture = LibrarianServerFixture(
             BaseLayer.config_fixture)
         cls.librarian_fixture.setUp()
@@ -913,13 +912,7 @@
         finally:
             librarian = cls.librarian_fixture
             cls.librarian_fixture = None
-            try:
-                if not cls._reset_between_tests:
-                    raise LayerInvariantError(
-                        "_reset_between_tests not reset before "
-                        "LibrarianLayer shutdown")
-            finally:
-                librarian.cleanUp()
+            librarian.cleanUp()
 
     @classmethod
     @profiled
@@ -937,8 +930,7 @@
                     "LibrarianLayer.reveal() where possible, and ensure "
                     "the Librarian is restarted if it absolutely must be "
                     "shutdown: " + str(e))
-        if cls._reset_between_tests:
-            cls.librarian_fixture.clear()
+        cls.librarian_fixture.clear()
 
     @classmethod
     @profiled
@@ -1642,13 +1634,6 @@
 
     @classmethod
     @profiled
-    def resetBetweenTests(cls, flag):
-        LibrarianLayer._reset_between_tests = flag
-        DatabaseLayer._reset_between_tests = flag
-        MemcachedLayer._reset_between_tests = flag
-
-    @classmethod
-    @profiled
     def setUp(cls):
         if os.environ.get('PROFILE_PAGETESTS_REQUESTS'):
             PageTestLayer.profiler = Profile()
@@ -1678,12 +1663,10 @@
         PageTestLayer.orig__call__ = (
                 zope.app.testing.functional.HTTPCaller.__call__)
         zope.app.testing.functional.HTTPCaller.__call__ = my__call__
-        PageTestLayer.resetBetweenTests(True)
 
     @classmethod
     @profiled
     def tearDown(cls):
-        PageTestLayer.resetBetweenTests(True)
         zope.app.testing.functional.HTTPCaller.__call__ = (
                 PageTestLayer.orig__call__)
         if PageTestLayer.profiler:
@@ -1692,25 +1675,8 @@
 
     @classmethod
     @profiled
-    def startStory(cls):
-        MemcachedLayer.testSetUp()
-        DatabaseLayer.testSetUp()
-        LibrarianLayer.testSetUp()
+    def testSetUp(cls):
         LaunchpadLayer.resetSessionDb()
-        PageTestLayer.resetBetweenTests(False)
-
-    @classmethod
-    @profiled
-    def endStory(cls):
-        PageTestLayer.resetBetweenTests(True)
-        LibrarianLayer.testTearDown()
-        DatabaseLayer.testTearDown()
-        MemcachedLayer.testTearDown()
-
-    @classmethod
-    @profiled
-    def testSetUp(cls):
-        pass
 
     @classmethod
     @profiled

=== modified file 'lib/lp/bugs/model/tests/test_bugtask.py'
=== modified file 'lib/lp/testing/pgsql.py'
--- lib/lp/testing/pgsql.py	2011-04-08 00:43:30 +0000
+++ lib/lp/testing/pgsql.py	2011-07-04 02:14:32 +0000
@@ -11,7 +11,10 @@
 import os
 import random
 import time
+import sys
 
+from bzrlib.lock import WriteLock
+from bzrlib.errors import LockContention
 import psycopg2
 
 from canonical.config import config
@@ -261,35 +264,92 @@
             return
         self.dropDb()
 
-        # Create the database from the template. We might need to keep
-        # trying for a few seconds in case there are connections to the
-        # template database that are slow in dropping off.
-        attempts = 60
-        for counter in range(0, attempts):
-            con = self.superuser_connection(self.template)
+        # Take out an external lock on the template to avoid causing contention
+        # and impeding other processes (pg performs poorly when performing
+        # concurrent create db from a single template).
+        pid = os.getpid()
+        start = time.time()
+        # try for up to 10 seconds:
+        debug = False
+        if debug:
+            sys.stderr.write('%0.2f starting %s\n' % (start, pid,))
+        l = None
+        lockname = '/tmp/lp.createdb.%s' % (self.template,)
+        # Wait for the external lock. Most LP tests use the DatabaseLayer which
+        # does a double-indirect: it clones the launchpad_ftest_template into a
+        # per-test runner template, so we don't have much template contention.
+        # However there are a few tests in LP which do use template1 and will
+        # contend a lot. Cloning template1 takes 0.2s on a modern machine, so
+        # even a modest 8-way server will trivially backlog on db cloning.
+        # The 30 second time is enough to deal with the backlog on the known
+        # template1 using tests.
+        while time.time() - start < 30.0:
             try:
-                con.set_isolation_level(0)
-                cur = con.cursor()
-                try:
-                    cur.execute(
-                        "CREATE DATABASE %s TEMPLATE=%s "
-                        "ENCODING='UNICODE'" % (
-                            self.dbname, self.template))
-                    # Try to ensure our cleanup gets invoked, even in
-                    # the face of adversity such as the test suite
-                    # aborting badly.
-                    atexit.register(self.dropDb)
-                    break
-                except psycopg2.DatabaseError, x:
-                    if counter == attempts - 1:
-                        raise
-                    x = str(x)
-                    if 'being accessed by other users' not in x:
-                        raise
-            finally:
-                con.close()
-            # Let the other user complete their copying of the template DB.
+                if debug:
+                    sys.stderr.write('taking %s\n' % (pid,))
+                l = WriteLock(lockname)
+                if debug:
+                    sys.stderr.write('%0.2f taken %s\n' % (time.time(), pid,))
+                break
+            except LockContention:
+                if debug:
+                    sys.stderr.write('blocked %s\n' % (pid,))
             time.sleep(random.random())
+        if l is None:
+            raise LockContention(lockname)
+        try:
+            # The clone may be delayed if gc has not disconnected other
+            # processes which have done a recent clone. So provide a spin
+            # with an exponential backoff.
+            attempts = 10
+            for counter in range(0, attempts):
+                if debug:
+                    sys.stderr.write('%0.2f connecting %s %s\n' % (
+                        time.time(), pid, self.template))
+                con = self.superuser_connection(self.template)
+                try:
+                    con.set_isolation_level(0)
+                    cur = con.cursor()
+                    try:
+                        _start = time.time()
+                        try:
+                            cur.execute(
+                                "CREATE DATABASE %s TEMPLATE=%s "
+                                "ENCODING='UNICODE'" % (
+                                    self.dbname, self.template))
+                            # Try to ensure our cleanup gets invoked, even in
+                            # the face of adversity such as the test suite
+                            # aborting badly.
+                            atexit.register(self.dropDb)
+                            if debug:
+                                sys.stderr.write('create db in %0.2fs\n' % (
+                                    time.time()-_start,))
+                            break
+                        except psycopg2.DatabaseError, x:
+                            if counter == attempts - 1:
+                                raise
+                            x = str(x)
+                            if 'being accessed by other users' not in x:
+                                raise
+                    finally:
+                        cur.close()
+                finally:
+                    con.close()
+                duration = (2**counter)*random.random()
+                if debug:
+                    sys.stderr.write(
+                        '%0.2f busy:sleeping (%d retries) %s %s %s\n' % (
+                        time.time(), counter, pid, self.template, duration))
+                # Let the server wrap up whatever was blocking the copy of the template.
+                time.sleep(duration)
+            end = time.time()
+            if debug:
+                sys.stderr.write('%0.2f (%0.2f) completed (%d retries) %s %s\n' % (
+                    end, end-start, counter, pid, self.template))
+        finally:
+            l.unlock()
+            if debug:
+                sys.stderr.write('released %s\n' % (pid,))
         ConnectionWrapper.committed = False
         ConnectionWrapper.dirty = False
         PgTestSetup._last_db = (self.template, self.dbname)
@@ -328,7 +388,7 @@
             try:
                 con = self.superuser_connection(self.template)
             except psycopg2.OperationalError, x:
-                if 'does not exist' in x:
+                if 'does not exist' in str(x):
                     return
                 raise
             try: