← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:git-filter-paths into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:git-filter-paths into launchpad:master.

Commit message:
Add support for filtering git commits by paths

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/414503

This is the other end of https://code.launchpad.net/~jugmac00/turnip/+git/turnip/+merge/414251.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:git-filter-paths into launchpad:master.
diff --git a/lib/lp/code/interfaces/githosting.py b/lib/lp/code/interfaces/githosting.py
index cfed027..35e5868 100644
--- a/lib/lp/code/interfaces/githosting.py
+++ b/lib/lp/code/interfaces/githosting.py
@@ -1,4 +1,4 @@
-# Copyright 2015-2020 Canonical Ltd.  This software is licensed under the
+# Copyright 2015-2022 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Interface for communication with the Git hosting service."""
@@ -47,11 +47,14 @@ class IGitHostingClient(Interface):
             they point to.
         """
 
-    def getCommits(path, commit_oids, logger=None):
+    def getCommits(path, commit_oids, filter_paths=None, logger=None):
         """Get details of a list of commits.
 
         :param path: Physical path of the repository on the hosting service.
         :param commit_oids: A list of commit OIDs.
+        :param filter_paths: If given, only return commits that modify paths
+            in this list, and include the contents of the files at those
+            paths in the response.
         :param logger: An optional logger.
         :return: A list of dicts each of which represents one of the
             requested commits.  Non-existent commits will be omitted.
diff --git a/lib/lp/code/interfaces/gitrepository.py b/lib/lp/code/interfaces/gitrepository.py
index ef25343..4396096 100644
--- a/lib/lp/code/interfaces/gitrepository.py
+++ b/lib/lp/code/interfaces/gitrepository.py
@@ -1,4 +1,4 @@
-# Copyright 2015-2021 Canonical Ltd.  This software is licensed under the
+# Copyright 2015-2022 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Git repository interfaces."""
@@ -354,13 +354,15 @@ class IGitRepositoryView(IHasRecipes):
             paths to remove.
         """
 
-    def fetchRefCommits(hosting_path, refs, logger=None):
+    def fetchRefCommits(refs, filter_paths=None, logger=None):
         """Fetch commit information from the hosting service for a set of refs.
 
-        :param hosting_path: A path on the hosting service.
         :param refs: A dict mapping ref paths to dictionaries of their
             fields; the field dictionaries will be updated with any detailed
             commit information that is available.
+        :param filter_paths: If given, only return commits that modify paths
+            in this list, and include the contents of the files at those
+            paths in the response.
         :param logger: An optional logger.
         """
 
diff --git a/lib/lp/code/model/githosting.py b/lib/lp/code/model/githosting.py
index e498138..c5ade39 100644
--- a/lib/lp/code/model/githosting.py
+++ b/lib/lp/code/model/githosting.py
@@ -1,4 +1,4 @@
-# Copyright 2015-2020 Canonical Ltd.  This software is licensed under the
+# Copyright 2015-2022 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Communication with the Git hosting service."""
@@ -153,18 +153,42 @@ class GitHostingClient:
             raise GitRepositoryScanFault(
                 "Failed to get refs from Git repository: %s" % str(e))
 
-    def getCommits(self, path, commit_oids, logger=None):
+    def _decodeBlob(self, encoded_blob):
+        """Blobs are base64-decoded for transport.  Decode them."""
+        try:
+            blob = base64.b64decode(encoded_blob["data"].encode("UTF-8"))
+            if len(blob) != encoded_blob["size"]:
+                raise GitRepositoryScanFault(
+                    "Unexpected size (%s vs %s)" % (
+                        len(blob), encoded_blob["size"]))
+            return blob
+        except Exception as e:
+            raise GitRepositoryScanFault(
+                "Failed to get file from Git repository: %s" % str(e))
+
+    def getCommits(self, path, commit_oids, filter_paths=None, logger=None):
         """See `IGitHostingClient`."""
         commit_oids = list(commit_oids)
         try:
             if logger is not None:
-                logger.info("Requesting commit details for %s" % commit_oids)
-            return self._post(
-                "/repo/%s/commits" % path, json={"commits": commit_oids})
+                message = "Requesting commit details for %s" % commit_oids
+                if filter_paths:
+                    message += " (with paths: %s)" % filter_paths
+                logger.info(message)
+            json_data = {"commits": commit_oids}
+            if filter_paths:
+                json_data["filter_paths"] = filter_paths
+            response = self._post("/repo/%s/commits" % path, json=json_data)
         except requests.RequestException as e:
             raise GitRepositoryScanFault(
                 "Failed to get commit details from Git repository: %s" %
                 str(e))
+        if filter_paths:
+            for commit in response:
+                blobs = commit.get("blobs", {})
+                for path, encoded_blob in blobs.items():
+                    blobs[path] = self._decodeBlob(encoded_blob)
+        return response
 
     def getLog(self, path, start, limit=None, stop=None, logger=None):
         """See `IGitHostingClient`."""
@@ -250,16 +274,7 @@ class GitHostingClient:
             else:
                 raise GitRepositoryScanFault(
                     "Failed to get file from Git repository: %s" % str(e))
-        try:
-            blob = base64.b64decode(response["data"].encode("UTF-8"))
-            if len(blob) != response["size"]:
-                raise GitRepositoryScanFault(
-                    "Unexpected size (%s vs %s)" % (
-                        len(blob), response["size"]))
-            return blob
-        except Exception as e:
-            raise GitRepositoryScanFault(
-                "Failed to get file from Git repository: %s" % str(e))
+        return self._decodeBlob(response)
 
     def copyRefs(self, path, operations, logger=None):
         """See `IGitHostingClient`."""
diff --git a/lib/lp/code/model/gitrepository.py b/lib/lp/code/model/gitrepository.py
index 8a3b08c..1e9cef2 100644
--- a/lib/lp/code/model/gitrepository.py
+++ b/lib/lp/code/model/gitrepository.py
@@ -1,4 +1,4 @@
-# Copyright 2015-2021 Canonical Ltd.  This software is licensed under the
+# Copyright 2015-2022 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 __all__ = [
@@ -242,8 +242,8 @@ def parse_git_commits(commits):
     :param commits: A list of turnip-formatted commit object dicts.
     :return: A dict mapping sha1 identifiers of commits to parsed commit
         dicts: keys may include "sha1", "author_date", "author_addr",
-        "author", "committer_date", "committer_addr", "committer", and
-        "commit_message".
+        "author", "committer_date", "committer_addr", "committer",
+        "commit_message", and "blobs".
     """
     parsed = {}
     authors_to_acquire = []
@@ -288,6 +288,8 @@ def parse_git_commits(commits):
                     committers_to_acquire.append(committer_addr)
         if "message" in commit:
             info["commit_message"] = commit["message"]
+        if "blobs" in commit:
+            info["blobs"] = commit["blobs"]
         parsed[commit["sha1"]] = info
     revision_authors = getUtility(IRevisionSet).acquireRevisionAuthors(
         authors_to_acquire + committers_to_acquire)
@@ -877,15 +879,15 @@ class GitRepository(StormBase, WebhookTargetMixin, AccessTokenTargetMixin,
         refs_to_remove = set(current_refs) - set(new_refs)
         return refs_to_upsert, refs_to_remove
 
-    @staticmethod
-    def fetchRefCommits(hosting_path, refs, logger=None):
+    def fetchRefCommits(self, refs, filter_paths=None, logger=None):
         """See `IGitRepository`."""
         oids = sorted({info["sha1"] for info in refs.values()})
         if not oids:
             return
         commits = parse_git_commits(
             getUtility(IGitHostingClient).getCommits(
-                hosting_path, oids, logger=logger))
+                self.getInternalPath(), oids, filter_paths=filter_paths,
+                logger=logger))
         for info in refs.values():
             commit = commits.get(info["sha1"])
             if commit is not None:
@@ -904,8 +906,7 @@ class GitRepository(StormBase, WebhookTargetMixin, AccessTokenTargetMixin,
         hosting_path = self.getInternalPath()
         refs_to_upsert, refs_to_remove = (
             self.planRefChanges(hosting_path, logger=log))
-        self.fetchRefCommits(
-            hosting_path, refs_to_upsert, logger=log)
+        self.fetchRefCommits(refs_to_upsert, logger=log)
         self.synchroniseRefs(
             refs_to_upsert, refs_to_remove, logger=log)
         props = getUtility(IGitHostingClient).getProperties(
diff --git a/lib/lp/code/model/tests/test_githosting.py b/lib/lp/code/model/tests/test_githosting.py
index aa3c671..ea1ebbd 100644
--- a/lib/lp/code/model/tests/test_githosting.py
+++ b/lib/lp/code/model/tests/test_githosting.py
@@ -1,4 +1,4 @@
-# Copyright 2016-2020 Canonical Ltd.  This software is licensed under the
+# Copyright 2016-2022 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Unit tests for `GitHostingClient`.
@@ -207,6 +207,85 @@ class TestGitHostingClient(TestCase):
         self.assertRequest(
             "repo/123/commits", method="POST", json_data={"commits": ["0"]})
 
+    def test_getCommits_filter_paths(self):
+        commit_json = {
+            "sha1": "0",
+            "blobs": {
+                ".launchpad.yaml": b"foo",
+                "debian/.launchpad.yaml": b"bar",
+                },
+            }
+        encoded_commit_json = {
+            "sha1": "0",
+            "blobs": {
+                ".launchpad.yaml": {"size": 3, "data": "Zm9v"},
+                "debian/.launchpad.yaml": {"size": 3, "data": "YmFy"},
+                },
+            }
+        with self.mockRequests("POST", json=[encoded_commit_json]):
+            commits = self.client.getCommits(
+                "123", ["0"],
+                filter_paths=[".launchpad.yaml", "debian/.launchpad.yaml"])
+        self.assertEqual([commit_json], commits)
+        self.assertRequest(
+            "repo/123/commits", method="POST",
+            json_data={
+                "commits": ["0"],
+                "filter_paths": [".launchpad.yaml", "debian/.launchpad.yaml"],
+                })
+
+    def test_getCommits_filter_paths_no_data(self):
+        commit_json = {"sha1": "0", "blobs": {".launchpad.yaml": {"size": 1}}}
+        with self.mockRequests("POST", json=[commit_json]):
+            self.assertRaisesWithContent(
+                GitRepositoryScanFault,
+                "Failed to get file from Git repository: 'data'",
+                self.client.getCommits, "123", ["0"],
+                filter_paths=[".launchpad.yaml"])
+
+    def test_getCommits_filter_paths_no_size(self):
+        commit_json = {
+            "sha1": "0",
+            "blobs": {".launchpad.yaml": {"data": "data"}},
+            }
+        with self.mockRequests("POST", json=[commit_json]):
+            self.assertRaisesWithContent(
+                GitRepositoryScanFault,
+                "Failed to get file from Git repository: 'size'",
+                self.client.getCommits, "123", ["0"],
+                filter_paths=[".launchpad.yaml"])
+
+    def test_getCommits_filter_paths_bad_encoding(self):
+        commit_json = {
+            "sha1": "0",
+            "blobs": {".launchpad.yaml": {"data": "x", "size": 1}},
+            }
+        with self.mockRequests("POST", json=[commit_json]):
+            self.assertRaisesWithContent(
+                GitRepositoryScanFault,
+                "Failed to get file from Git repository: Incorrect padding",
+                self.client.getCommits, "123", ["0"],
+                filter_paths=[".launchpad.yaml"])
+
+    def test_getCommits_filter_paths_wrong_size(self):
+        blob = b"".join(bytes((i,)) for i in range(256))
+        commit_json = {
+            "sha1": "0",
+            "blobs": {
+                ".launchpad.yaml": {
+                    "data": base64.b64encode(blob).decode(),
+                    "size": 0,
+                    },
+                },
+            }
+        with self.mockRequests("POST", json=[commit_json]):
+            self.assertRaisesWithContent(
+                GitRepositoryScanFault,
+                "Failed to get file from Git repository: Unexpected size"
+                " (256 vs 0)",
+                self.client.getCommits, "123", ["0"],
+                filter_paths=[".launchpad.yaml"])
+
     def test_getCommits_failure(self):
         with self.mockRequests("POST", status=400):
             self.assertRaisesWithContent(
diff --git a/lib/lp/code/model/tests/test_gitrepository.py b/lib/lp/code/model/tests/test_gitrepository.py
index 39a46d5..cc0fc62 100644
--- a/lib/lp/code/model/tests/test_gitrepository.py
+++ b/lib/lp/code/model/tests/test_gitrepository.py
@@ -1,4 +1,4 @@
-# Copyright 2015-2021 Canonical Ltd.  This software is licensed under the
+# Copyright 2015-2022 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Tests for Git repositories."""
@@ -2058,6 +2058,7 @@ class TestGitRepositoryRefs(TestCaseWithFactory):
     def test_fetchRefCommits(self):
         # fetchRefCommits fetches detailed tip commit metadata for the
         # requested refs.
+        repository = self.factory.makeGitRepository()
         master_sha1 = six.ensure_text(
             hashlib.sha1(b"refs/heads/master").hexdigest())
         foo_sha1 = six.ensure_text(hashlib.sha1(b"refs/heads/foo").hexdigest())
@@ -2093,11 +2094,14 @@ class TestGitRepositoryRefs(TestCaseWithFactory):
                 "type": GitObjectType.COMMIT,
                 },
             }
-        GitRepository.fetchRefCommits("dummy", refs)
+        repository.fetchRefCommits(refs)
 
         expected_oids = [master_sha1, foo_sha1]
         [(_, observed_oids)] = hosting_fixture.getCommits.extract_args()
         self.assertContentEqual(expected_oids, observed_oids)
+        self.assertEqual(
+            [{"filter_paths": None, "logger": None}],
+            hosting_fixture.getCommits.extract_kwargs())
         expected_author_addr = "%s <%s>" % (author.displayname, author_email)
         [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
             [expected_author_addr]).values()
@@ -2126,10 +2130,74 @@ class TestGitRepositoryRefs(TestCaseWithFactory):
     def test_fetchRefCommits_empty(self):
         # If given an empty refs dictionary, fetchRefCommits returns early
         # without contacting the hosting service.
+        repository = self.factory.makeGitRepository()
         hosting_fixture = self.useFixture(GitHostingFixture())
-        GitRepository.fetchRefCommits("dummy", {})
+        repository.fetchRefCommits({})
         self.assertEqual([], hosting_fixture.getCommits.calls)
 
+    def test_fetchRefCommits_filter_paths(self):
+        # fetchRefCommits can be asked to filter commits to include only
+        # those containing the specified paths, and to return the contents
+        # of those paths.
+        repository = self.factory.makeGitRepository()
+        master_sha1 = six.ensure_text(
+            hashlib.sha1(b"refs/heads/master").hexdigest())
+        foo_sha1 = six.ensure_text(hashlib.sha1(b"refs/heads/foo").hexdigest())
+        author = self.factory.makePerson()
+        with person_logged_in(author):
+            author_email = author.preferredemail.email
+        author_date = datetime(2015, 1, 1, tzinfo=pytz.UTC)
+        hosting_fixture = self.useFixture(GitHostingFixture(commits=[
+            {
+                "sha1": master_sha1,
+                "message": "tip of master",
+                "author": {
+                    "name": author.displayname,
+                    "email": author_email,
+                    "time": int(seconds_since_epoch(author_date)),
+                    },
+                "parents": [],
+                "tree": six.ensure_text(hashlib.sha1(b"").hexdigest()),
+                "blobs": {".launchpad.yaml": b"foo"},
+                }]))
+        refs = {
+            "refs/heads/master": {
+                "sha1": master_sha1,
+                "type": GitObjectType.COMMIT,
+                },
+            "refs/heads/foo": {
+                "sha1": foo_sha1,
+                "type": GitObjectType.COMMIT,
+                },
+            }
+        repository.fetchRefCommits(refs, filter_paths=[".launchpad.yaml"])
+
+        expected_oids = [master_sha1, foo_sha1]
+        [(_, observed_oids)] = hosting_fixture.getCommits.extract_args()
+        self.assertContentEqual(expected_oids, observed_oids)
+        self.assertEqual(
+            [{"filter_paths": [".launchpad.yaml"], "logger": None}],
+            hosting_fixture.getCommits.extract_kwargs())
+        expected_author_addr = "%s <%s>" % (author.displayname, author_email)
+        [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
+            [expected_author_addr]).values()
+        expected_refs = {
+            "refs/heads/master": {
+                "sha1": master_sha1,
+                "type": GitObjectType.COMMIT,
+                "author": expected_author,
+                "author_addr": expected_author_addr,
+                "author_date": author_date,
+                "commit_message": "tip of master",
+                "blobs": {".launchpad.yaml": b"foo"},
+                },
+            "refs/heads/foo": {
+                "sha1": foo_sha1,
+                "type": GitObjectType.COMMIT,
+                },
+            }
+        self.assertEqual(expected_refs, refs)
+
     def test_synchroniseRefs(self):
         # synchroniseRefs copes with synchronising a repository where some
         # refs have been created, some deleted, and some changed.