launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #28010
[Merge] ~cjwatson/launchpad:git-filter-paths into launchpad:master
Colin Watson has proposed merging ~cjwatson/launchpad:git-filter-paths into launchpad:master.
Commit message:
Add support for filtering git commits by paths
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/414503
This is the other end of https://code.launchpad.net/~jugmac00/turnip/+git/turnip/+merge/414251.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:git-filter-paths into launchpad:master.
diff --git a/lib/lp/code/interfaces/githosting.py b/lib/lp/code/interfaces/githosting.py
index cfed027..35e5868 100644
--- a/lib/lp/code/interfaces/githosting.py
+++ b/lib/lp/code/interfaces/githosting.py
@@ -1,4 +1,4 @@
-# Copyright 2015-2020 Canonical Ltd. This software is licensed under the
+# Copyright 2015-2022 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Interface for communication with the Git hosting service."""
@@ -47,11 +47,14 @@ class IGitHostingClient(Interface):
they point to.
"""
- def getCommits(path, commit_oids, logger=None):
+ def getCommits(path, commit_oids, filter_paths=None, logger=None):
"""Get details of a list of commits.
:param path: Physical path of the repository on the hosting service.
:param commit_oids: A list of commit OIDs.
+ :param filter_paths: If given, only return commits that modify paths
+ in this list, and include the contents of the files at those
+ paths in the response.
:param logger: An optional logger.
:return: A list of dicts each of which represents one of the
requested commits. Non-existent commits will be omitted.
diff --git a/lib/lp/code/interfaces/gitrepository.py b/lib/lp/code/interfaces/gitrepository.py
index ef25343..4396096 100644
--- a/lib/lp/code/interfaces/gitrepository.py
+++ b/lib/lp/code/interfaces/gitrepository.py
@@ -1,4 +1,4 @@
-# Copyright 2015-2021 Canonical Ltd. This software is licensed under the
+# Copyright 2015-2022 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Git repository interfaces."""
@@ -354,13 +354,15 @@ class IGitRepositoryView(IHasRecipes):
paths to remove.
"""
- def fetchRefCommits(hosting_path, refs, logger=None):
+ def fetchRefCommits(refs, filter_paths=None, logger=None):
"""Fetch commit information from the hosting service for a set of refs.
- :param hosting_path: A path on the hosting service.
:param refs: A dict mapping ref paths to dictionaries of their
fields; the field dictionaries will be updated with any detailed
commit information that is available.
+ :param filter_paths: If given, only return commits that modify paths
+ in this list, and include the contents of the files at those
+ paths in the response.
:param logger: An optional logger.
"""
diff --git a/lib/lp/code/model/githosting.py b/lib/lp/code/model/githosting.py
index e498138..c5ade39 100644
--- a/lib/lp/code/model/githosting.py
+++ b/lib/lp/code/model/githosting.py
@@ -1,4 +1,4 @@
-# Copyright 2015-2020 Canonical Ltd. This software is licensed under the
+# Copyright 2015-2022 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Communication with the Git hosting service."""
@@ -153,18 +153,42 @@ class GitHostingClient:
raise GitRepositoryScanFault(
"Failed to get refs from Git repository: %s" % str(e))
- def getCommits(self, path, commit_oids, logger=None):
+ def _decodeBlob(self, encoded_blob):
+ """Blobs are base64-decoded for transport. Decode them."""
+ try:
+ blob = base64.b64decode(encoded_blob["data"].encode("UTF-8"))
+ if len(blob) != encoded_blob["size"]:
+ raise GitRepositoryScanFault(
+ "Unexpected size (%s vs %s)" % (
+ len(blob), encoded_blob["size"]))
+ return blob
+ except Exception as e:
+ raise GitRepositoryScanFault(
+ "Failed to get file from Git repository: %s" % str(e))
+
+ def getCommits(self, path, commit_oids, filter_paths=None, logger=None):
"""See `IGitHostingClient`."""
commit_oids = list(commit_oids)
try:
if logger is not None:
- logger.info("Requesting commit details for %s" % commit_oids)
- return self._post(
- "/repo/%s/commits" % path, json={"commits": commit_oids})
+ message = "Requesting commit details for %s" % commit_oids
+ if filter_paths:
+ message += " (with paths: %s)" % filter_paths
+ logger.info(message)
+ json_data = {"commits": commit_oids}
+ if filter_paths:
+ json_data["filter_paths"] = filter_paths
+ response = self._post("/repo/%s/commits" % path, json=json_data)
except requests.RequestException as e:
raise GitRepositoryScanFault(
"Failed to get commit details from Git repository: %s" %
str(e))
+ if filter_paths:
+ for commit in response:
+ blobs = commit.get("blobs", {})
+ for path, encoded_blob in blobs.items():
+ blobs[path] = self._decodeBlob(encoded_blob)
+ return response
def getLog(self, path, start, limit=None, stop=None, logger=None):
"""See `IGitHostingClient`."""
@@ -250,16 +274,7 @@ class GitHostingClient:
else:
raise GitRepositoryScanFault(
"Failed to get file from Git repository: %s" % str(e))
- try:
- blob = base64.b64decode(response["data"].encode("UTF-8"))
- if len(blob) != response["size"]:
- raise GitRepositoryScanFault(
- "Unexpected size (%s vs %s)" % (
- len(blob), response["size"]))
- return blob
- except Exception as e:
- raise GitRepositoryScanFault(
- "Failed to get file from Git repository: %s" % str(e))
+ return self._decodeBlob(response)
def copyRefs(self, path, operations, logger=None):
"""See `IGitHostingClient`."""
diff --git a/lib/lp/code/model/gitrepository.py b/lib/lp/code/model/gitrepository.py
index 8a3b08c..1e9cef2 100644
--- a/lib/lp/code/model/gitrepository.py
+++ b/lib/lp/code/model/gitrepository.py
@@ -1,4 +1,4 @@
-# Copyright 2015-2021 Canonical Ltd. This software is licensed under the
+# Copyright 2015-2022 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
__all__ = [
@@ -242,8 +242,8 @@ def parse_git_commits(commits):
:param commits: A list of turnip-formatted commit object dicts.
:return: A dict mapping sha1 identifiers of commits to parsed commit
dicts: keys may include "sha1", "author_date", "author_addr",
- "author", "committer_date", "committer_addr", "committer", and
- "commit_message".
+ "author", "committer_date", "committer_addr", "committer",
+ "commit_message", and "blobs".
"""
parsed = {}
authors_to_acquire = []
@@ -288,6 +288,8 @@ def parse_git_commits(commits):
committers_to_acquire.append(committer_addr)
if "message" in commit:
info["commit_message"] = commit["message"]
+ if "blobs" in commit:
+ info["blobs"] = commit["blobs"]
parsed[commit["sha1"]] = info
revision_authors = getUtility(IRevisionSet).acquireRevisionAuthors(
authors_to_acquire + committers_to_acquire)
@@ -877,15 +879,15 @@ class GitRepository(StormBase, WebhookTargetMixin, AccessTokenTargetMixin,
refs_to_remove = set(current_refs) - set(new_refs)
return refs_to_upsert, refs_to_remove
- @staticmethod
- def fetchRefCommits(hosting_path, refs, logger=None):
+ def fetchRefCommits(self, refs, filter_paths=None, logger=None):
"""See `IGitRepository`."""
oids = sorted({info["sha1"] for info in refs.values()})
if not oids:
return
commits = parse_git_commits(
getUtility(IGitHostingClient).getCommits(
- hosting_path, oids, logger=logger))
+ self.getInternalPath(), oids, filter_paths=filter_paths,
+ logger=logger))
for info in refs.values():
commit = commits.get(info["sha1"])
if commit is not None:
@@ -904,8 +906,7 @@ class GitRepository(StormBase, WebhookTargetMixin, AccessTokenTargetMixin,
hosting_path = self.getInternalPath()
refs_to_upsert, refs_to_remove = (
self.planRefChanges(hosting_path, logger=log))
- self.fetchRefCommits(
- hosting_path, refs_to_upsert, logger=log)
+ self.fetchRefCommits(refs_to_upsert, logger=log)
self.synchroniseRefs(
refs_to_upsert, refs_to_remove, logger=log)
props = getUtility(IGitHostingClient).getProperties(
diff --git a/lib/lp/code/model/tests/test_githosting.py b/lib/lp/code/model/tests/test_githosting.py
index aa3c671..ea1ebbd 100644
--- a/lib/lp/code/model/tests/test_githosting.py
+++ b/lib/lp/code/model/tests/test_githosting.py
@@ -1,4 +1,4 @@
-# Copyright 2016-2020 Canonical Ltd. This software is licensed under the
+# Copyright 2016-2022 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Unit tests for `GitHostingClient`.
@@ -207,6 +207,85 @@ class TestGitHostingClient(TestCase):
self.assertRequest(
"repo/123/commits", method="POST", json_data={"commits": ["0"]})
+ def test_getCommits_filter_paths(self):
+ commit_json = {
+ "sha1": "0",
+ "blobs": {
+ ".launchpad.yaml": b"foo",
+ "debian/.launchpad.yaml": b"bar",
+ },
+ }
+ encoded_commit_json = {
+ "sha1": "0",
+ "blobs": {
+ ".launchpad.yaml": {"size": 3, "data": "Zm9v"},
+ "debian/.launchpad.yaml": {"size": 3, "data": "YmFy"},
+ },
+ }
+ with self.mockRequests("POST", json=[encoded_commit_json]):
+ commits = self.client.getCommits(
+ "123", ["0"],
+ filter_paths=[".launchpad.yaml", "debian/.launchpad.yaml"])
+ self.assertEqual([commit_json], commits)
+ self.assertRequest(
+ "repo/123/commits", method="POST",
+ json_data={
+ "commits": ["0"],
+ "filter_paths": [".launchpad.yaml", "debian/.launchpad.yaml"],
+ })
+
+ def test_getCommits_filter_paths_no_data(self):
+ commit_json = {"sha1": "0", "blobs": {".launchpad.yaml": {"size": 1}}}
+ with self.mockRequests("POST", json=[commit_json]):
+ self.assertRaisesWithContent(
+ GitRepositoryScanFault,
+ "Failed to get file from Git repository: 'data'",
+ self.client.getCommits, "123", ["0"],
+ filter_paths=[".launchpad.yaml"])
+
+ def test_getCommits_filter_paths_no_size(self):
+ commit_json = {
+ "sha1": "0",
+ "blobs": {".launchpad.yaml": {"data": "data"}},
+ }
+ with self.mockRequests("POST", json=[commit_json]):
+ self.assertRaisesWithContent(
+ GitRepositoryScanFault,
+ "Failed to get file from Git repository: 'size'",
+ self.client.getCommits, "123", ["0"],
+ filter_paths=[".launchpad.yaml"])
+
+ def test_getCommits_filter_paths_bad_encoding(self):
+ commit_json = {
+ "sha1": "0",
+ "blobs": {".launchpad.yaml": {"data": "x", "size": 1}},
+ }
+ with self.mockRequests("POST", json=[commit_json]):
+ self.assertRaisesWithContent(
+ GitRepositoryScanFault,
+ "Failed to get file from Git repository: Incorrect padding",
+ self.client.getCommits, "123", ["0"],
+ filter_paths=[".launchpad.yaml"])
+
+ def test_getCommits_filter_paths_wrong_size(self):
+ blob = b"".join(bytes((i,)) for i in range(256))
+ commit_json = {
+ "sha1": "0",
+ "blobs": {
+ ".launchpad.yaml": {
+ "data": base64.b64encode(blob).decode(),
+ "size": 0,
+ },
+ },
+ }
+ with self.mockRequests("POST", json=[commit_json]):
+ self.assertRaisesWithContent(
+ GitRepositoryScanFault,
+ "Failed to get file from Git repository: Unexpected size"
+ " (256 vs 0)",
+ self.client.getCommits, "123", ["0"],
+ filter_paths=[".launchpad.yaml"])
+
def test_getCommits_failure(self):
with self.mockRequests("POST", status=400):
self.assertRaisesWithContent(
diff --git a/lib/lp/code/model/tests/test_gitrepository.py b/lib/lp/code/model/tests/test_gitrepository.py
index 39a46d5..cc0fc62 100644
--- a/lib/lp/code/model/tests/test_gitrepository.py
+++ b/lib/lp/code/model/tests/test_gitrepository.py
@@ -1,4 +1,4 @@
-# Copyright 2015-2021 Canonical Ltd. This software is licensed under the
+# Copyright 2015-2022 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Tests for Git repositories."""
@@ -2058,6 +2058,7 @@ class TestGitRepositoryRefs(TestCaseWithFactory):
def test_fetchRefCommits(self):
# fetchRefCommits fetches detailed tip commit metadata for the
# requested refs.
+ repository = self.factory.makeGitRepository()
master_sha1 = six.ensure_text(
hashlib.sha1(b"refs/heads/master").hexdigest())
foo_sha1 = six.ensure_text(hashlib.sha1(b"refs/heads/foo").hexdigest())
@@ -2093,11 +2094,14 @@ class TestGitRepositoryRefs(TestCaseWithFactory):
"type": GitObjectType.COMMIT,
},
}
- GitRepository.fetchRefCommits("dummy", refs)
+ repository.fetchRefCommits(refs)
expected_oids = [master_sha1, foo_sha1]
[(_, observed_oids)] = hosting_fixture.getCommits.extract_args()
self.assertContentEqual(expected_oids, observed_oids)
+ self.assertEqual(
+ [{"filter_paths": None, "logger": None}],
+ hosting_fixture.getCommits.extract_kwargs())
expected_author_addr = "%s <%s>" % (author.displayname, author_email)
[expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
[expected_author_addr]).values()
@@ -2126,10 +2130,74 @@ class TestGitRepositoryRefs(TestCaseWithFactory):
def test_fetchRefCommits_empty(self):
# If given an empty refs dictionary, fetchRefCommits returns early
# without contacting the hosting service.
+ repository = self.factory.makeGitRepository()
hosting_fixture = self.useFixture(GitHostingFixture())
- GitRepository.fetchRefCommits("dummy", {})
+ repository.fetchRefCommits({})
self.assertEqual([], hosting_fixture.getCommits.calls)
+ def test_fetchRefCommits_filter_paths(self):
+ # fetchRefCommits can be asked to filter commits to include only
+ # those containing the specified paths, and to return the contents
+ # of those paths.
+ repository = self.factory.makeGitRepository()
+ master_sha1 = six.ensure_text(
+ hashlib.sha1(b"refs/heads/master").hexdigest())
+ foo_sha1 = six.ensure_text(hashlib.sha1(b"refs/heads/foo").hexdigest())
+ author = self.factory.makePerson()
+ with person_logged_in(author):
+ author_email = author.preferredemail.email
+ author_date = datetime(2015, 1, 1, tzinfo=pytz.UTC)
+ hosting_fixture = self.useFixture(GitHostingFixture(commits=[
+ {
+ "sha1": master_sha1,
+ "message": "tip of master",
+ "author": {
+ "name": author.displayname,
+ "email": author_email,
+ "time": int(seconds_since_epoch(author_date)),
+ },
+ "parents": [],
+ "tree": six.ensure_text(hashlib.sha1(b"").hexdigest()),
+ "blobs": {".launchpad.yaml": b"foo"},
+ }]))
+ refs = {
+ "refs/heads/master": {
+ "sha1": master_sha1,
+ "type": GitObjectType.COMMIT,
+ },
+ "refs/heads/foo": {
+ "sha1": foo_sha1,
+ "type": GitObjectType.COMMIT,
+ },
+ }
+ repository.fetchRefCommits(refs, filter_paths=[".launchpad.yaml"])
+
+ expected_oids = [master_sha1, foo_sha1]
+ [(_, observed_oids)] = hosting_fixture.getCommits.extract_args()
+ self.assertContentEqual(expected_oids, observed_oids)
+ self.assertEqual(
+ [{"filter_paths": [".launchpad.yaml"], "logger": None}],
+ hosting_fixture.getCommits.extract_kwargs())
+ expected_author_addr = "%s <%s>" % (author.displayname, author_email)
+ [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
+ [expected_author_addr]).values()
+ expected_refs = {
+ "refs/heads/master": {
+ "sha1": master_sha1,
+ "type": GitObjectType.COMMIT,
+ "author": expected_author,
+ "author_addr": expected_author_addr,
+ "author_date": author_date,
+ "commit_message": "tip of master",
+ "blobs": {".launchpad.yaml": b"foo"},
+ },
+ "refs/heads/foo": {
+ "sha1": foo_sha1,
+ "type": GitObjectType.COMMIT,
+ },
+ }
+ self.assertEqual(expected_refs, refs)
+
def test_synchroniseRefs(self):
# synchroniseRefs copes with synchronising a repository where some
# refs have been created, some deleted, and some changed.