← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~stub/launchpad/branch-rewrite into lp:launchpad

 

Stuart Bishop has proposed merging lp:~stub/launchpad/branch-rewrite into lp:launchpad with lp:~stub/launchpad/pgbouncer-fixture as a prerequisite.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~stub/launchpad/branch-rewrite/+merge/73284

= Summary =

branch-rewrite.py does not reconnect after a database outage.

== Proposed fix ==

Make it reconnect after a database outage.
-- 
https://code.launchpad.net/~stub/launchpad/branch-rewrite/+merge/73284
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~stub/launchpad/branch-rewrite into lp:launchpad.
=== modified file 'lib/lp/codehosting/tests/test_rewrite.py'
--- lib/lp/codehosting/tests/test_rewrite.py	2011-08-12 11:37:08 +0000
+++ lib/lp/codehosting/tests/test_rewrite.py	2011-08-29 19:48:49 +0000
@@ -14,7 +14,10 @@
 from zope.security.proxy import removeSecurityProxy
 
 from canonical.config import config
-from canonical.testing.layers import DatabaseFunctionalLayer
+from canonical.testing.layers import (
+    DatabaseFunctionalLayer,
+    LaunchpadScriptLayer,
+    )
 from lp.code.interfaces.codehosting import branch_id_alias
 from lp.codehosting.rewrite import BranchRewriter
 from lp.codehosting.vfs import branch_id_to_path
@@ -23,6 +26,7 @@
     FakeTime,
     TestCaseWithFactory,
     )
+from lp.testing.fixture import PGBouncerFixture
 
 
 class TestBranchRewriter(TestCaseWithFactory):
@@ -177,7 +181,8 @@
         transaction.commit()
         rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
         rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
-        logging_output_lines = self.getLoggerOutput(rewriter).strip().split('\n')
+        logging_output_lines = self.getLoggerOutput(
+            rewriter).strip().split('\n')
         self.assertEqual(2, len(logging_output_lines))
         self.assertIsNot(
             None,
@@ -194,7 +199,8 @@
         self.fake_time.advance(
             config.codehosting.branch_rewrite_cache_lifetime + 1)
         rewriter.rewriteLine('/' + branch.unique_name + '/.bzr/README')
-        logging_output_lines = self.getLoggerOutput(rewriter).strip().split('\n')
+        logging_output_lines = self.getLoggerOutput(
+            rewriter).strip().split('\n')
         self.assertEqual(2, len(logging_output_lines))
         self.assertIsNot(
             None,
@@ -274,3 +280,62 @@
         # The script produces logging output, but not to stderr.
         self.assertEqual('', err)
         self.assertEqual(expected_lines, output_lines)
+
+
+class TestBranchRewriterScriptHandlesDisconnects(TestCaseWithFactory):
+    """Ensure branch-rewrite.py survives fastdowntime deploys."""
+    layer = LaunchpadScriptLayer
+
+    def setUp(self):
+        super(TestBranchRewriterScriptHandlesDisconnects, self).setUp()
+        self.pgbouncer = PGBouncerFixture()
+        self.addCleanup(self.pgbouncer.cleanUp)
+        self.pgbouncer.setUp()
+
+    def spawn(self):
+        script_file = os.path.join(
+            config.root, 'scripts', 'branch-rewrite.py')
+
+        self.rewriter_proc = subprocess.Popen(
+            [script_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE, bufsize=0)
+
+    def request(self, query):
+        self.rewriter_proc.stdin.write(query + '\n')
+        return self.rewriter_proc.stdout.readline().rstrip('\n')
+
+    def test_disconnect(self):
+        self.spawn()
+
+        # Everything should be working, and we get valid output.
+        out = self.request('foo')
+        assert out.endswith('/foo'), out
+
+        self.pgbouncer.stop()
+
+        # Now with pgbouncer down, we should get NULL messages and
+        # stderr spam, and this keeps happening. We test more than
+        # once to ensure that we will keep trying to reconnect even
+        # after several failures.
+        for count in range(5):
+            out = self.request('foo')
+            assert out == 'NULL', out
+
+        self.pgbouncer.start()
+
+        # Everything should be working, and we get valid output.
+        out = self.request('foo')
+        assert out.endswith('/foo'), out
+
+    def test_starts_with_db_down(self):
+        self.pgbouncer.stop()
+        self.spawn()
+
+        for count in range(5):
+            out = self.request('foo')
+            assert out == 'NULL', out
+
+        self.pgbouncer.start()
+
+        out = self.request('foo')
+        assert out.endswith('/foo'), out

=== modified file 'scripts/branch-rewrite.py'
--- scripts/branch-rewrite.py	2010-11-06 12:50:22 +0000
+++ scripts/branch-rewrite.py	2011-08-29 19:48:49 +0000
@@ -19,6 +19,8 @@
 
 from canonical.database.sqlbase import ISOLATION_LEVEL_AUTOCOMMIT
 from canonical.config import config
+from canonical.launchpad.interfaces.lpstorm import ISlaveStore
+from lp.code.model.branch import Branch
 from lp.codehosting.rewrite import BranchRewriter
 from lp.services.log.loglevels import INFO, WARNING
 from lp.services.scripts.base import LaunchpadScript
@@ -58,11 +60,18 @@
                 else:
                     # Standard input has been closed, so die.
                     return
-            except KeyboardInterrupt:
-                sys.exit()
-            except:
+            except Exception:
                 self.logger.exception('Exception occurred:')
                 print "NULL"
+                # The exception might have been a DisconnectionError or
+                # similar. Cleanup such as database reconnection will
+                # not happen until the transaction is rolled back. We
+                # are explicitly rolling back the store here instead of
+                # using transaction.abort() due to Bug #819282.
+                try:
+                    ISlaveStore(Branch).rollback()
+                except Exception:
+                    self.logger.exception('Exception occurred in rollback:')
 
 
 if __name__ == '__main__':