← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/copy-lock-archive into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/copy-lock-archive into lp:launchpad.

Commit message:
Take an advisory lock on the target archive when copying packages.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/copy-lock-archive/+merge/279275

Take an advisory lock on the target archive when copying packages; otherwise it's possible for multiple copies of the same package into different series in the same archive to race with the conflict checker and create conflicting builds.

The main potential downside that I can think of is that a large mass sync into the Ubuntu archive could potentially result in quite a few copies hitting the lock and needing to be retried repeatedly, taking those jobs over the retry limit.  I *think* that the ten-minute delay on retries means that it's unlikely we'll hit the retry limit in practice, but it's possible.  I'd welcome advice on whether this is a problem and if so what we might do about it.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/copy-lock-archive into lp:launchpad.
=== modified file 'lib/lp/services/database/locking.py'
--- lib/lp/services/database/locking.py	2015-03-16 16:22:19 +0000
+++ lib/lp/services/database/locking.py	2015-12-02 13:59:35 +0000
@@ -39,6 +39,11 @@
         Git repository reference scan.
         """)
 
+    PACKAGE_COPY = DBItem(2, """Package copy.
+
+        Package copy.
+        """)
+
 
 @contextmanager
 def try_advisory_lock(lock_type, lock_id, store):

=== modified file 'lib/lp/soyuz/model/packagecopyjob.py'
--- lib/lp/soyuz/model/packagecopyjob.py	2015-07-09 20:06:17 +0000
+++ lib/lp/soyuz/model/packagecopyjob.py	2015-12-02 13:59:35 +0000
@@ -49,6 +49,11 @@
     IMasterStore,
     IStore,
     )
+from lp.services.database.locking import (
+    AdvisoryLockHeld,
+    LockType,
+    try_advisory_lock,
+    )
 from lp.services.database.stormbase import StormBase
 from lp.services.job.interfaces.job import JobStatus
 from lp.services.job.model.job import (
@@ -266,7 +271,7 @@
     user_error_types = (CannotCopy,)
     # Raised when closing bugs ends up hitting another process and
     # deadlocking.
-    retry_error_types = (TransactionRollbackError,)
+    retry_error_types = (TransactionRollbackError, AdvisoryLockHeld)
     max_retries = 5
 
     @classmethod
@@ -575,7 +580,10 @@
     def run(self):
         """See `IRunnableJob`."""
         try:
-            self.attemptCopy()
+            with try_advisory_lock(
+                    LockType.PACKAGE_COPY, self.target_archive_id,
+                    IStore(Archive)):
+                self.attemptCopy()
         except CannotCopy as e:
             # Remember the target archive purpose, as otherwise aborting the
             # transaction will forget it.
@@ -600,7 +608,7 @@
                 # the job.  We will normally have a DistroSeriesDifference
                 # in this case.
                 pass
-        except SuspendJobException:
+        except (SuspendJobException, AdvisoryLockHeld):
             raise
         except:
             # Abort work done so far, but make sure that we commit the

=== modified file 'lib/lp/soyuz/tests/test_packagecopyjob.py'
--- lib/lp/soyuz/tests/test_packagecopyjob.py	2015-09-03 15:14:07 +0000
+++ lib/lp/soyuz/tests/test_packagecopyjob.py	2015-12-02 13:59:35 +0000
@@ -4,8 +4,11 @@
 """Tests for sync package jobs."""
 
 import operator
+import os
+import signal
 from textwrap import dedent
 
+from fixtures import FakeLogger
 from storm.store import Store
 from testtools.content import text_content
 from testtools.matchers import (
@@ -26,6 +29,10 @@
     )
 from lp.services.config import config
 from lp.services.database.interfaces import IStore
+from lp.services.database.locking import (
+    LockType,
+    try_advisory_lock,
+    )
 from lp.services.features.testing import FeatureFixture
 from lp.services.job.interfaces.job import JobStatus
 from lp.services.job.runner import JobRunner
@@ -56,6 +63,7 @@
 from lp.soyuz.interfaces.sourcepackageformat import (
     ISourcePackageFormatSelectionSet,
     )
+from lp.soyuz.model.archive import Archive
 from lp.soyuz.model.packagecopyjob import PackageCopyJob
 from lp.soyuz.model.queue import PackageUpload
 from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
@@ -326,6 +334,39 @@
 
         self.assertRaises(Boom, job.run)
 
+    def test_run_tries_advisory_lock(self):
+        # A job is retried if an advisory lock for the same archive is held.
+        logger = self.useFixture(FakeLogger())
+        job = create_proper_job(self.factory)
+        switch_dbuser(self.dbuser)
+        # Fork so that we can take an advisory lock from a different
+        # PostgreSQL session.
+        read, write = os.pipe()
+        pid = os.fork()
+        if pid == 0:  # child
+            os.close(read)
+            with try_advisory_lock(
+                    LockType.PACKAGE_COPY, job.target_archive_id,
+                    IStore(Archive)):
+                os.write(write, b"1")
+                try:
+                    signal.pause()
+                except KeyboardInterrupt:
+                    pass
+            os._exit(0)
+        else:  # parent
+            try:
+                os.close(write)
+                os.read(read, 1)
+                runner = JobRunner([job])
+                runner.runAll()
+                self.assertEqual(JobStatus.WAITING, job.status)
+                self.assertEqual([], runner.oops_ids)
+                self.assertIn(
+                    "Scheduling retry due to AdvisoryLockHeld", logger.output)
+            finally:
+                os.kill(pid, signal.SIGINT)
+
     def test_run_posts_copy_failure_as_comment(self):
         # If the job fails with a CannotCopy exception, it swallows the
         # exception and posts a DistroSeriesDifferenceComment with the


Follow ups