← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/optimise-merge-detection into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/optimise-merge-detection into lp:launchpad.

Commit message:
Optimise merge detection when the branch has no landing candidates.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/optimise-merge-detection/+merge/342975

I noticed an OOPS when scanning ~vcs-imports/gcc/git-mirror which appears to be due to a fair amount of non-SQL time somewhere around here; this seems like a likely candidate.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/optimise-merge-detection into lp:launchpad.
=== modified file 'lib/lp/codehosting/scanner/mergedetection.py'
--- lib/lp/codehosting/scanner/mergedetection.py	2016-04-06 12:14:54 +0000
+++ lib/lp/codehosting/scanner/mergedetection.py	2018-04-10 23:11:40 +0000
@@ -92,6 +92,7 @@
     #
     # XXX: JonathanLange 2009-05-11 spec=package-branches: This assumes that
     # merge detection only works with product branches.
+    logger.info("Looking for merged branches.")
     branches = getUtility(IAllBranches).inProduct(db_branch.product)
     branches = branches.withLifecycleStatus(
         BranchLifecycleStatus.DEVELOPMENT,
@@ -151,12 +152,16 @@
     # which introduced the change, that will either be set through the web
     # ui by a person, or by PQM once it is integrated.
 
+    logger.info("Looking for merged proposals.")
     if scan_completed.bzr_branch is None:
         # Only happens in tests.
         merge_sorted = []
     else:
+        # This is potentially slow for deep histories; we defer even
+        # initialising it until we need it, and we cache the iterator's
+        # results.
         merge_sorted = CachingIterator(
-            scan_completed.bzr_branch.iter_merge_sorted_revisions())
+            scan_completed.bzr_branch.iter_merge_sorted_revisions)
     for proposal in db_branch.landing_candidates:
         tip_rev_id = proposal.source_branch.last_scanned_id
         if tip_rev_id in new_ancestry:

=== modified file 'lib/lp/codehosting/scanner/tests/test_mergedetection.py'
--- lib/lp/codehosting/scanner/tests/test_mergedetection.py	2015-10-05 17:03:27 +0000
+++ lib/lp/codehosting/scanner/tests/test_mergedetection.py	2018-04-10 23:11:40 +0000
@@ -32,6 +32,7 @@
     )
 from lp.services.config import config
 from lp.services.database.interfaces import IStore
+from lp.services.log.logger import DevNullLogger
 from lp.services.osutils import override_environ
 from lp.testing import (
     TestCase,
@@ -199,7 +200,7 @@
         mergedetection.auto_merge_branches(
             events.ScanCompleted(
                 db_branch=db_branch, bzr_branch=None,
-                logger=None, new_ancestry=new_ancestry))
+                logger=DevNullLogger(), new_ancestry=new_ancestry))
 
     def mergeDetected(self, logger, source, target):
         # Record the merged branches

=== modified file 'lib/lp/services/tests/test_utils.py'
--- lib/lp/services/tests/test_utils.py	2018-03-23 12:26:35 +0000
+++ lib/lp/services/tests/test_utils.py	2018-04-10 23:11:40 +0000
@@ -7,6 +7,7 @@
 
 from contextlib import contextmanager
 from datetime import datetime
+from functools import partial
 import hashlib
 import itertools
 import os
@@ -145,7 +146,7 @@
 
     def test_reuse(self):
         # The same iterator can be used multiple times.
-        iterator = CachingIterator(itertools.count())
+        iterator = CachingIterator(itertools.count)
         self.assertEqual(
             [0, 1, 2, 3, 4], list(itertools.islice(iterator, 0, 5)))
         self.assertEqual(
@@ -154,7 +155,7 @@
     def test_more_values(self):
         # If a subsequent call to iter causes more values to be fetched, they
         # are also cached.
-        iterator = CachingIterator(itertools.count())
+        iterator = CachingIterator(itertools.count)
         self.assertEqual(
             [0, 1, 2], list(itertools.islice(iterator, 0, 3)))
         self.assertEqual(
@@ -162,14 +163,14 @@
 
     def test_limited_iterator(self):
         # Make sure that StopIteration is handled correctly.
-        iterator = CachingIterator(iter([0, 1, 2, 3, 4]))
+        iterator = CachingIterator(partial(iter, [0, 1, 2, 3, 4]))
         self.assertEqual(
             [0, 1, 2], list(itertools.islice(iterator, 0, 3)))
         self.assertEqual([0, 1, 2, 3, 4], list(iterator))
 
     def test_parallel_iteration(self):
         # There can be parallel iterators over the CachingIterator.
-        ci = CachingIterator(iter([0, 1, 2, 3, 4]))
+        ci = CachingIterator(partial(iter, [0, 1, 2, 3, 4]))
         i1 = iter(ci)
         i2 = iter(ci)
         self.assertEqual(0, i1.next())
@@ -177,6 +178,22 @@
         self.assertEqual([1, 2, 3, 4], list(i2))
         self.assertEqual([1, 2, 3, 4], list(i1))
 
+    def test_deferred_initialisation(self):
+        # Initialising the iterator may be expensive, so CachingIterator
+        # defers this until it needs it.
+        self.initialised = False
+
+        def iterator():
+            self.initialised = True
+            return iter([0, 1, 2])
+
+        ci = CachingIterator(iterator)
+        self.assertFalse(self.initialised)
+        self.assertEqual([0, 1, 2], list(ci))
+        self.assertTrue(self.initialised)
+        self.assertEqual([0, 1, 2], list(ci))
+        self.assertTrue(self.initialised)
+
 
 class TestDecorateWith(TestCase):
     """Tests for `decorate_with`."""

=== modified file 'lib/lp/services/utils.py'
--- lib/lp/services/utils.py	2018-03-23 12:26:35 +0000
+++ lib/lp/services/utils.py	2018-04-10 23:11:40 +0000
@@ -209,10 +209,13 @@
     iterator if necessary.
     """
 
-    def __init__(self, iterator):
-        self.iterator = iterator
+    def __init__(self, iterator_factory):
+        self.iterator_factory = iterator_factory
+        self.iterator = None
 
     def __iter__(self):
+        if self.iterator is None:
+            self.iterator = self.iterator_factory()
         # Teeing an iterator previously returned by tee won't cause heat
         # death. See tee_copy in itertoolsmodule.c in the Python source.
         self.iterator, iterator = tee(self.iterator)


Follow ups