← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~rvb/maas/file-lock-start-up into lp:maas

 

Raphaël Badin has proposed merging lp:~rvb/maas/file-lock-start-up into lp:maas.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~rvb/maas/file-lock-start-up/+merge/119543

The start_up method is called multiple times by the WSGI script when multiple WSGI process are started.  Since this happens outside of a transaction-managed scope we need to make sure that the runs are not concurrent in order to avoid race conditions.

= Pre-imp =

This was mostly discussed with Jeroen.

= Notes =

Note that the race condition I'm talking about is everything but hypothetical.  Andres, while testing a package he was creating, saw it happen and showed me the stacktrace.


-- 
https://code.launchpad.net/~rvb/maas/file-lock-start-up/+merge/119543
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~rvb/maas/file-lock-start-up into lp:maas.
=== modified file 'src/maasserver/start_up.py'
--- src/maasserver/start_up.py	2012-08-08 14:05:45 +0000
+++ src/maasserver/start_up.py	2012-08-14 14:21:20 +0000
@@ -15,11 +15,25 @@
     ]
 
 
+from lockfile import (
+    FileLock,
+    LockFailed,
+    LockTimeout,
+    NotMyLock,
+    )
 from maasserver.dns import write_full_dns_config
 from maasserver.maasavahi import setup_maas_avahi_service
 from maasserver.models import NodeGroup
 
 
+# Lock file used to prevent concurrent runs of the start_up() method.
+LOCK_FILE_NAME = '/var/lock/maas.start-up'
+
+
+# Timeout used to grab the filed-based lock used by the start_up() method.
+LOCK_TIMEOUT = 60
+
+
 def start_up():
     """Start up this MAAS server.
 
@@ -30,8 +44,24 @@
     This method is called when the MAAS application starts up.
     In production, it's called from the WSGI script so this shouldn't block
     at any costs.  It should simply call very simple methods or Celery tasks.
+
+    The method will be executed multiple times if multiple processes are used
+    but this method uses file-based locking to ensure that the methods it calls
+    internally are not ran concurrently.
     """
-
+    lock = FileLock(LOCK_FILE_NAME)
+    try:
+        lock.acquire(timeout=LOCK_TIMEOUT)
+    except (LockTimeout, NotMyLock, LockFailed):
+        raise
+    else:
+        try:
+            inner_start_up()
+        finally:
+            lock.release()
+
+
+def inner_start_up():
     # Publish the MAAS server existence over Avahi.
     setup_maas_avahi_service()
 

=== modified file 'src/maasserver/tests/test_start_up.py'
--- src/maasserver/tests/test_start_up.py	2012-08-08 14:40:21 +0000
+++ src/maasserver/tests/test_start_up.py	2012-08-14 14:21:20 +0000
@@ -12,6 +12,15 @@
 __metaclass__ = type
 __all__ = []
 
+from multiprocessing import (
+    Process,
+    Value,
+    )
+
+from lockfile import (
+    FileLock,
+    LockTimeout,
+    )
 from maasserver import start_up
 from maasserver.models.nodegroup import NodeGroup
 from maastesting.fakemethod import FakeMethod
@@ -21,6 +30,11 @@
 class TestStartUp(TestCase):
     """Testing for the method `start_up`."""
 
+    def setUp(self):
+        super(TestStartUp, self).setUp()
+        self.TEST_LOCK_FILE_NAME = self.make_file()
+        self.patch(start_up, 'LOCK_FILE_NAME', self.TEST_LOCK_FILE_NAME)
+
     def test_start_up_calls_setup_maas_avahi_service(self):
         recorder = FakeMethod()
         self.patch(start_up, 'setup_maas_avahi_service', recorder)
@@ -40,3 +54,30 @@
     def test_start_up_creates_master_nodegroup(self):
         start_up.start_up()
         self.assertEqual(1, NodeGroup.objects.all().count())
+
+    def test_start_up_runs_in_exclusion(self):
+        called = Value('b', False)
+
+        def temp_start_up():
+            def call(called):
+                called.value = True
+                lock = FileLock(start_up.LOCK_FILE_NAME)
+                self.assertRaises(LockTimeout, lock.acquire, timeout=0.1)
+            proc = Process(target=call, args=(called, ))
+            proc.start()
+            proc.join()
+
+        self.patch(start_up, 'inner_start_up', temp_start_up)
+        start_up.start_up()
+        self.assertTrue(called.value)
+
+    def test_start_up_respects_timeout_to_acquire_lock(self):
+        recorder = FakeMethod()
+        self.patch(start_up, 'inner_start_up', recorder)
+        # Use a timeout more suitable for automated testing.
+        self.patch(start_up, 'LOCK_TIMEOUT', 0.1)
+        # Manually create a lock.
+        self.make_file(FileLock(self.TEST_LOCK_FILE_NAME).lock_file)
+
+        self.assertRaises(LockTimeout, start_up.start_up)
+        self.assertEqual(0, recorder.call_count)