← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/turnip:detect-merges-stop into turnip:master

 

Colin Watson has proposed merging ~cjwatson/turnip:detect-merges-stop into turnip:master.

Commit message:
Add stop commits to detect-merges

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1955040 in turnip: "Merge detection times out during ref scans in repositories with deep history"
  https://bugs.launchpad.net/turnip/+bug/1955040

For more details, see:
https://code.launchpad.net/~cjwatson/turnip/+git/turnip/+merge/445341

The `detect-merges` API can be very slow for repositories with deep history, because as currently defined it has to walk through the whole ancestry of a given target commit looking for "mainline" (first parent only) commits that introduce a merge of any of the given source commits.  In particular, this often times out for Linux kernel repositories.

However, we can do much better.  Merge proposals are normally only created in Launchpad once the target branch has been scanned, and we can reasonably assume that when a merge proposal is created it won't yet have been merged.  This means that Launchpad could tell turnip's `detect-merges` API to stop walking through history whenever it encounters the commit that it had previously scanned on the target branch, and then we'd only have to look through the changes introduced by a push rather than the entire history of the branch.  This should be much less likely to time out.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/turnip:detect-merges-stop into turnip:master.
diff --git a/turnip/api/store.py b/turnip/api/store.py
index 7a2f883..72554ac 100644
--- a/turnip/api/store.py
+++ b/turnip/api/store.py
@@ -703,7 +703,7 @@ def get_commits(repo_store, repo_name, commit_oids):
         return commits
 
 
-def detect_merges(repo_store, repo_name, target_oid, source_oids):
+def detect_merges(repo_store, repo_name, target_oid, source_oids, stop_oids):
     """Check whether each of the requested commits has been merged."""
     with open_repo(repo_store, repo_name) as repo:
         target = repo.get(target_oid)
@@ -717,7 +717,10 @@ def detect_merges(repo_store, repo_name, target_oid, source_oids):
         merge_info = {}
         last_mainline = target_oid
         next_mainline = target_oid
-        for commit in repo.walk(target_oid, GIT_SORT_TOPOLOGICAL):
+        walker = repo.walk(target_oid, GIT_SORT_TOPOLOGICAL)
+        for stop_oid in stop_oids:
+            walker.hide(stop_oid)
+        for commit in walker:
             if commit.id.hex == next_mainline:
                 last_mainline = commit.id.hex
                 if commit.parent_ids:
diff --git a/turnip/api/tests/test_api.py b/turnip/api/tests/test_api.py
index 0e566b5..1a6f195 100644
--- a/turnip/api/tests/test_api.py
+++ b/turnip/api/tests/test_api.py
@@ -982,6 +982,36 @@ class ApiTestCase(TestCase, ApiRepoStoreMixin):
         self.assertEqual(200, resp.status_code)
         self.assertEqual({b.hex: c.hex, e.hex: g.hex}, resp.json)
 
+    def test_repo_detect_merges_stop(self):
+        """detect-merges stops walking at specified stop commits.
+
+        This reduces the depth of history it needs to scan, which can be
+        important for large repositories.
+        """
+        factory = RepoFactory(self.repo_store)
+        # A---C---D---G---H
+        #  \ /       /
+        #   B---E---F---I
+        a = factory.add_commit('a\n', 'file')
+        b = factory.add_commit('b\n', 'file', parents=[a])
+        c = factory.add_commit('c\n', 'file', parents=[a, b])
+        d = factory.add_commit('d\n', 'file', parents=[c])
+        e = factory.add_commit('e\n', 'file', parents=[b])
+        f = factory.add_commit('f\n', 'file', parents=[e])
+        g = factory.add_commit('g\n', 'file', parents=[d, f])
+        h = factory.add_commit('h\n', 'file', parents=[g])
+        i = factory.add_commit('i\n', 'file', parents=[f])
+        resp = self.app.post_json('/repo/{}/detect-merges/{}'.format(
+            self.repo_path, h),
+            {'sources': [b.hex, e.hex, i.hex], 'stop': [c.hex]})
+        self.assertEqual(200, resp.status_code)
+        self.assertEqual({e.hex: g.hex}, resp.json)
+        resp = self.app.post_json('/repo/{}/detect-merges/{}'.format(
+            self.repo_path, h),
+            {'sources': [b.hex, e.hex, i.hex], 'stop': [c.hex, g.hex]})
+        self.assertEqual(200, resp.status_code)
+        self.assertEqual({}, resp.json)
+
     def test_repo_blob(self):
         """Getting an existing blob works."""
         factory = RepoFactory(self.repo_store)
diff --git a/turnip/api/views.py b/turnip/api/views.py
index 2f199af..db98f05 100644
--- a/turnip/api/views.py
+++ b/turnip/api/views.py
@@ -426,10 +426,12 @@ class DetectMergesAPI(BaseAPI):
         omitted from the response.
         """
         target = self.request.matchdict['target']
-        sources = extract_cstruct(self.request)['body'].get('sources')
+        payload = extract_cstruct(self.request)['body']
+        sources = payload.get('sources')
+        stops = payload.get('stop', [])
         try:
             merges = store.detect_merges(
-                repo_store, repo_name, target, sources)
+                repo_store, repo_name, target, sources, stops)
         except GitError:
             return exc.HTTPNotFound()
         return merges