launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #32892
[Merge] ~ines-almeida/turnip:add-diff-stats-endpoint into turnip:master
Ines Almeida has proposed merging ~ines-almeida/turnip:add-diff-stats-endpoint into turnip:master.
Commit message:
Add diff stats endpoint
This endpoint will list which files changed on a diff between 2 commits, allowing also cross-repo (using the same mechanisms as the already existing diff endpoints) and a diff from an empty tree
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~ines-almeida/turnip/+git/turnip/+merge/491273
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~ines-almeida/turnip:add-diff-stats-endpoint into turnip:master.
diff --git a/turnip/api/store.py b/turnip/api/store.py
index 346c1f3..43e9078 100644
--- a/turnip/api/store.py
+++ b/turnip/api/store.py
@@ -13,6 +13,10 @@ from collections import defaultdict
import six
from contextlib2 import ExitStack, contextmanager
from pygit2 import (
+ GIT_DELTA_ADDED,
+ GIT_DELTA_DELETED,
+ GIT_DELTA_MODIFIED,
+ GIT_DELTA_RENAMED,
GIT_OBJ_COMMIT,
GIT_OBJ_TAG,
GIT_REF_OID,
@@ -1183,6 +1187,68 @@ def merge_async(
)
+def _parse_diff(diff):
+ """Parse pygit2.Diff object and return stats
+
+ Currently, only returning file stats (modified, added, deleted, renamed)
+ """
+
+ result = {
+ "added": [],
+ "modified": [],
+ "deleted": [],
+ "renamed": [],
+ }
+
+ for delta in diff.deltas:
+ old_file = delta.old_file.path
+ new_file = delta.new_file.path
+ if delta.status == GIT_DELTA_ADDED:
+ result["added"].append(new_file)
+ elif delta.status == GIT_DELTA_DELETED:
+ result["deleted"].append(old_file)
+ elif delta.status == GIT_DELTA_MODIFIED:
+ result["modified"].append(new_file)
+ elif delta.status == GIT_DELTA_RENAMED:
+ result["renamed"].append({"old": old_file, "new": new_file})
+
+ return result
+
+
+def get_diff_stats(repo_store, repo_name, sha1_from, sha1_to, diff_type):
+ """Get diff stats and associated commits of two sha1s.
+
+ :param sha1_from: diff from sha1. If empty, diffs against empty tree.
+ :param sha1_to: diff to sha1.
+ :param diff_type: type of diff, either '..' or '...'
+ """
+
+ with open_repo(repo_store, repo_name) as repo:
+ # If we want to compare against the base (no source)
+ if sha1_from is None or sha1_from == "":
+ empty_tree_id = repo.TreeBuilder().write()
+ from_tree = repo[empty_tree_id]
+
+ else:
+ # If we want to compare against a common ancester
+ if diff_type == "...":
+ common_ancestor = repo.merge_base(sha1_from, sha1_to)
+ if common_ancestor is not None:
+ # We have a merge base
+ sha1_from = common_ancestor
+
+ from_tree = repo[sha1_from].tree
+
+ to_tree = repo[sha1_to].tree
+
+ diff = repo.diff(from_tree, to_tree)
+ # Enable rename/copy similarity detection so we can classify renames
+ diff.find_similar()
+
+ return _parse_diff(diff)
+ return {}
+
+
def get_diff(repo_store, repo_name, sha1_from, sha1_to, context_lines=3):
"""Get patch and associated commits of two sha1s.
diff --git a/turnip/api/tests/test_api.py b/turnip/api/tests/test_api.py
index dcb2d68..91c6ca8 100644
--- a/turnip/api/tests/test_api.py
+++ b/turnip/api/tests/test_api.py
@@ -1927,6 +1927,164 @@ class ApiTestCase(TestCase, ApiRepoStoreMixin):
self.assertEqual(400, resp.status_code)
mock_apply_async.assert_not_called()
+ def test_diff_stats_basic(self):
+ """Test diff stats for cases of adding, modifying and deleting files"""
+ repo = RepoFactory(self.repo_store)
+ c1 = repo.add_commit("a", "file.txt")
+ c2 = repo.add_commit("b", "file.txt", parents=[c1])
+ path = f"/repo/{self.repo_path}/compare/{c1}..{c2}/stats"
+ resp = self.app.get(path)
+ self.assertEqual(200, resp.status_code)
+ self.assertIn("modified", resp.json)
+ self.assertIn("file.txt", resp.json["modified"])
+
+ c3 = repo.add_commit("x", "new.txt", parents=[c2])
+ resp2 = self.app.get(
+ f"/repo/{self.repo_path}/compare/{c2}..{c3}/stats"
+ )
+ self.assertIn("new.txt", resp2.json["added"])
+
+ # delete file.txt (remove then commit another file)
+ repo.repo.index.remove("file.txt")
+ c4 = repo.add_commit("y", "another.txt", parents=[c3])
+ resp3 = self.app.get(
+ f"/repo/{self.repo_path}/compare/{c3}..{c4}/stats"
+ )
+ self.assertIn("file.txt", resp3.json["deleted"])
+
+ def test_diff_stats_renamed(self):
+ """Test diff stats for renaming files"""
+ repo = RepoFactory(self.repo_store)
+ c1 = repo.add_commit("foo\n", "foo.txt")
+ repo.repo.index.remove("foo.txt")
+ c2 = repo.add_commit("foo\n", "bar.txt", parents=[c1])
+ resp = self.app.get(f"/repo/{self.repo_path}/compare/{c1}..{c2}/stats")
+ self.assertEqual(200, resp.status_code)
+ self.assertEqual(1, len(resp.json["renamed"]))
+ self.assertEqual(
+ {"old": "foo.txt", "new": "bar.txt"}, resp.json["renamed"][0]
+ )
+
+ def test_diff_stats_triple_dot(self):
+ """Test triple dot diff stats"""
+ repo = RepoFactory(self.repo_store)
+ base = repo.add_commit("base", "base.txt")
+ left = repo.add_commit("left", "left.txt", parents=[base])
+ repo.repo.index.remove("left.txt")
+ right = repo.add_commit("right", "right.txt", parents=[base])
+ resp = self.app.get(
+ f"/repo/{self.repo_path}/compare/{left}...{right}/stats"
+ )
+ self.assertEqual(200, resp.status_code)
+ self.assertIn("right.txt", resp.json["added"])
+ self.assertNotIn("left.txt", resp.json["added"])
+
+ def test_diff_stats_invalid_separator(self):
+ """Test invalid separator returns 400 BadRequest"""
+ # invalid separator or invalid shas should result in 400/404
+ RepoFactory(self.repo_store).build()
+ resp = self.app.get(
+ f"/repo/{self.repo_path}/compare/1++2/stats", expect_errors=True
+ )
+ self.assertEqual(400, resp.status_code)
+
+ def test_diff_stats_nonexistent_from_sha(self):
+ """Test diff stats with non-existent 'from' SHA"""
+ repo = RepoFactory(self.repo_store)
+ c1 = repo.add_commit("test", "file.txt")
+
+ # Use a non-existent SHA for the 'from' parameter
+ resp = self.app.get(
+ f"/repo/{self.repo_path}/compare/bar..{c1.hex}/stats",
+ expect_errors=True,
+ )
+ self.assertEqual(404, resp.status_code)
+
+ def test_diff_stats_nonexistent_to_sha(self):
+ """Test diff stats with non-existent 'to' SHA"""
+ repo = RepoFactory(self.repo_store)
+ c1 = repo.add_commit("test", "file.txt")
+
+ # Use a non-existent SHA for the 'to' parameter
+ resp = self.app.get(
+ f"/repo/{self.repo_path}/compare/{c1.hex}..foo/stats",
+ expect_errors=True,
+ )
+ self.assertEqual(404, resp.status_code)
+
+ def test_diff_stats_empty_to_sha(self):
+ """Test diff stats with empty 'to' SHA"""
+ repo = RepoFactory(self.repo_store)
+ c1 = repo.add_commit("a", "file.txt")
+
+ resp = self.app.get(
+ f"/repo/{self.repo_path}/compare/{c1.hex}../stats",
+ expect_errors=True,
+ )
+ self.assertEqual(400, resp.status_code)
+
+ def test_diff_stats_empty_from_sha(self):
+ """Test diff stats with empty 'from' SHA"""
+ repo = RepoFactory(self.repo_store)
+ c1 = repo.add_commit("a", "file.txt")
+
+ resp2 = self.app.get(
+ f"/repo/{self.repo_path}/compare/..{c1.hex}/stats",
+ expect_errors=True,
+ )
+ self.assertEqual(200, resp2.status_code)
+
+ def test_diff_stats_nonexistent_repo(self):
+ """Test diff stats with non-existent repository"""
+ resp = self.app.get(
+ "/repo/nonexistent/compare/abc123..def456/stats",
+ expect_errors=True,
+ )
+ self.assertEqual(404, resp.status_code)
+
+ def test_diff_stats_cross_repo_basic(self):
+ """Test diff stats across two repos via ephemeral alternates."""
+ # Create target repo under expected name
+ target = RepoFactory(self.repo_store)
+ base = target.add_commit("base", "base.txt")
+
+ # Create source repo under a different name and clone from target
+ source_repo_path = os.path.join(self.repo_root, "source")
+ source = RepoFactory(source_repo_path, clone_from=target)
+ source_commit = source.add_commit(
+ "source change", "right.txt", parents=[base]
+ )
+
+ # Cross-repo name format: <target>:<source>
+ path = (
+ f"/repo/{self.repo_path}:source/compare/"
+ f"{base.hex}..{source_commit.hex}/stats"
+ )
+ resp = self.app.get(path)
+ self.assertEqual(200, resp.status_code)
+ self.assertIn("right.txt", resp.json["added"])
+
+ def test_diff_stats_cross_repo_empty_from(self):
+ """Test cross-repo stats when 'from' is empty (diff from empty tree)"""
+ target = RepoFactory(self.repo_store)
+ base = target.add_commit("base", "base.txt")
+
+ source_repo_path = os.path.join(self.repo_root, "source")
+ source = RepoFactory(source_repo_path, clone_from=target)
+ source_commit = source.add_commit(
+ "source change", "right.txt", parents=[base]
+ )
+
+ path = (
+ f"/repo/{self.repo_path}:source/compare/"
+ f"..{source_commit.hex}/stats"
+ )
+ resp = self.app.get(path)
+ self.assertEqual(200, resp.status_code)
+ # Entire tree of the 'to' commit should appear as added
+ self.assertIn("base.txt", resp.json["added"])
+ self.assertIn("right.txt", resp.json["added"])
+
class AsyncRepoCreationAPI(TestCase, ApiRepoStoreMixin):
def setUp(self):
diff --git a/turnip/api/tests/test_store.py b/turnip/api/tests/test_store.py
index c5242f6..3f819f2 100644
--- a/turnip/api/tests/test_store.py
+++ b/turnip/api/tests/test_store.py
@@ -1815,3 +1815,215 @@ class RequestMergeTestCase(TestCase):
commit = self.target_repo.get(ref.target)
# No merge happended
self.assertEqual(commit.hex, self.initial_commit.hex)
+
+
+class DiffStatsStoreTestCase(TestCase):
+ """Test cases for the get_diff_stats function in the store module."""
+
+ def setUp(self):
+ super().setUp()
+ self.repo_store = self.useFixture(TempDir()).path
+ self.useFixture(EnvironmentVariable("REPO_STORE", self.repo_store))
+ self.repo_path = os.path.join(self.repo_store, uuid.uuid1().hex)
+ self.factory = RepoFactory(self.repo_path)
+
+ # Create base commits
+ c1 = self.factory.add_commit("d", "to_delete.txt")
+ c2 = self.factory.add_commit("a", "file.txt", parents=[c1])
+ self.base = self.factory.add_commit("r", "to_rename.txt", parents=[c2])
+
+ def test_get_diff_stats_non_existing_sha1_from(self):
+ """Test file modifications are correctly identified in diff stats"""
+ c1 = self.factory.add_commit("b", "file.txt", parents=[self.base])
+ self.assertRaises(
+ ValueError,
+ store.get_diff_stats,
+ self.repo_store,
+ self.repo_path,
+ "unkonwn",
+ c1.hex,
+ "..",
+ )
+
+ def test_get_diff_stats_non_existing_sha1_to(self):
+ """Test file modifications are correctly identified in diff stats"""
+ self.assertRaises(
+ ValueError,
+ store.get_diff_stats,
+ self.repo_store,
+ self.repo_path,
+ self.base,
+ "unkonwn",
+ "..",
+ )
+
+ def test_get_diff_stats_basic_modified(self):
+ """Test file modifications are correctly identified in diff stats"""
+ c1 = self.factory.add_commit("b", "file.txt", parents=[self.base])
+ stats_mod = store.get_diff_stats(
+ self.repo_store, self.repo_path, self.base.hex, c1.hex, ".."
+ )
+ self.assertEqual([], stats_mod["added"])
+ self.assertIn("file.txt", stats_mod["modified"])
+ self.assertEqual([], stats_mod["deleted"])
+ self.assertEqual([], stats_mod["renamed"])
+
+ def test_get_diff_stats_basic_added(self):
+ """Test file additions are correctly identified in diff stats"""
+ c1 = self.factory.add_commit("n", "new.txt", parents=[self.base])
+ stats_add = store.get_diff_stats(
+ self.repo_store, self.repo_path, self.base.hex, c1.hex, ".."
+ )
+ self.assertIn("new.txt", stats_add["added"])
+ self.assertEqual([], stats_add["modified"])
+ self.assertEqual([], stats_add["deleted"])
+ self.assertEqual([], stats_add["renamed"])
+
+ def test_get_diff_stats_basic_deleted(self):
+ """Test file deletions are correctly identified in diff stats"""
+ # Delete file.txt in next commit (by removing then committing a change)
+ self.factory.repo.index.remove("file.txt")
+ c1 = self.factory.add_commit("y", "another.txt", parents=[self.base])
+ stats_del = store.get_diff_stats(
+ self.repo_store, self.repo_path, self.base.hex, c1.hex, ".."
+ )
+ self.assertIn("another.txt", stats_del["added"])
+ self.assertEqual([], stats_del["modified"])
+ self.assertIn("file.txt", stats_del["deleted"])
+ self.assertEqual([], stats_del["renamed"])
+
+ def test_get_diff_stats_renamed(self):
+ """Test file renames are correctly identified in diff stats"""
+ self.factory.repo.index.remove("to_rename.txt")
+ c1 = self.factory.add_commit("r", "renamed.txt", parents=[self.base])
+
+ stats = store.get_diff_stats(
+ self.repo_store, self.repo_path, self.base.hex, c1.hex, ".."
+ )
+ self.assertEqual([], stats["added"])
+ self.assertEqual([], stats["deleted"])
+ self.assertEqual([], stats["modified"])
+ self.assertEqual(1, len(stats["renamed"]))
+ self.assertEqual(
+ {"old": "to_rename.txt", "new": "renamed.txt"}, stats["renamed"][0]
+ )
+
+ def test_get_diff_stats_multiple_added_modified_deleted(self):
+ """Test diff stats with multiple changes across several commits"""
+ # Create commits to compare against
+ c1 = self.factory.add_commit("f", "file.txt", parents=[self.base])
+ c2 = self.factory.add_commit("n", "new.txt", parents=[c1])
+ self.factory.repo.index.remove("to_delete.txt")
+ c3 = self.factory.add_commit("a", "another.txt", parents=[c2])
+
+ self.factory.repo.index.remove("to_rename.txt")
+ c4 = self.factory.add_commit("r", "renamed.txt", parents=[c3])
+
+ stats = store.get_diff_stats(
+ self.repo_store, self.repo_path, self.base.hex, c4.hex, ".."
+ )
+ self.assertIn("new.txt", stats["added"])
+ self.assertIn("another.txt", stats["added"])
+ self.assertIn("file.txt", stats["modified"])
+ self.assertIn("to_delete.txt", stats["deleted"])
+ self.assertIn(
+ {"old": "to_rename.txt", "new": "renamed.txt"}, stats["renamed"]
+ )
+
+ def test_get_diff_stats_no_from_sha(self):
+ """Test diff stats when comparing against no source commit"""
+ c1 = self.factory.add_commit("b", "file.txt", parents=[self.base])
+ stats = store.get_diff_stats(
+ self.repo_store, self.repo_path, None, c1.hex, "..."
+ )
+ self.assertIn("file.txt", stats["added"])
+ self.assertEqual([], stats["modified"])
+ self.assertEqual([], stats["deleted"])
+ self.assertEqual([], stats["renamed"])
+
+ stats = store.get_diff_stats(
+ self.repo_store, self.repo_path, "", c1.hex, "..."
+ )
+ self.assertIn("file.txt", stats["added"])
+ self.assertEqual([], stats["modified"])
+ self.assertEqual([], stats["deleted"])
+ self.assertEqual([], stats["renamed"])
+
+ def test_get_diff_stats_triple_dot_uses_merge_base(self):
+ """Test that triple-dot diff notation correctly uses common base"""
+ left = self.factory.add_commit("left", "left.txt", parents=[self.base])
+ self.factory.repo.index.remove("left.txt")
+ right = self.factory.add_commit(
+ "right", "right.txt", parents=[self.base]
+ )
+
+ # Compare left...right should use merge-base (base) vs right
+ stats = store.get_diff_stats(
+ self.repo_store, self.repo_path, left.hex, right.hex, "..."
+ )
+ self.assertIn("right.txt", stats["added"])
+ self.assertNotIn("left.txt", stats["added"])
+
+ def test_get_diff_stats_empty_diff(self):
+ """Test diff stats when comparing identical commits"""
+ stats = store.get_diff_stats(
+ self.repo_store, self.repo_path, self.base.hex, self.base.hex, ".."
+ )
+ self.assertEqual([], stats["added"])
+ self.assertEqual([], stats["modified"])
+ self.assertEqual([], stats["deleted"])
+ self.assertEqual([], stats["renamed"])
+
+ def test_cross_repo_basic_diff_stats(self):
+ """Compare commits across repos using ephemeral alternates."""
+ # Create target repo
+ target_path = os.path.join(self.repo_store, "target")
+ target_factory = RepoFactory(target_path)
+ target_repo = target_factory.build()
+ shared_base = target_factory.add_commit("base", "base.txt")
+ target_repo.create_branch("main", target_repo.get(shared_base))
+ target_repo.set_head("refs/heads/main")
+
+ # Create source repo as a clone of target (shares history up to base)
+ source_path = os.path.join(self.repo_store, "source")
+ source_factory = RepoFactory(source_path, clone_from=target_factory)
+ source_change = source_factory.add_commit(
+ "source change", "right.txt", parents=[shared_base]
+ )
+
+ stats = store.get_diff_stats(
+ self.repo_store,
+ "target:source",
+ shared_base.hex,
+ source_change.hex,
+ "..",
+ )
+ self.assertIn("right.txt", stats["added"])
+ self.assertEqual([], stats["modified"])
+
+ def test_cross_repo_empty_from_diff_stats(self):
+ """Empty 'from' should diff against empty tree across repos."""
+ # Create target repo
+ target_path = os.path.join(self.repo_store, "target")
+ target_factory = RepoFactory(target_path)
+ target_repo = target_factory.build()
+ shared_base = target_factory.add_commit("base", "base.txt")
+ target_repo.create_branch("main", target_repo.get(shared_base))
+ target_repo.set_head("refs/heads/main")
+
+ # Create source repo as a clone of target (shares history up to base)
+ source_path = os.path.join(self.repo_store, "source")
+ source_factory = RepoFactory(source_path, clone_from=target_factory)
+ source_change = source_factory.add_commit(
+ "source change", "right.txt", parents=[shared_base]
+ )
+
+ stats = store.get_diff_stats(
+ self.repo_store,
+ "target:source",
+ None,
+ source_change.hex,
+ "..",
+ )
+ self.assertIn("base.txt", stats["added"]) # from initial commit
+ self.assertIn("right.txt", stats["added"]) # from source commit
diff --git a/turnip/api/views.py b/turnip/api/views.py
index 6931d17..116e0f4 100644
--- a/turnip/api/views.py
+++ b/turnip/api/views.py
@@ -414,6 +414,53 @@ class DiffAPI(BaseAPI):
return patch
+@resource(path="/repo/{name}/compare/{commits}/stats")
+class DiffStatsAPI(BaseAPI):
+ """Provides HTTP API for rev-rev 'double' and 'triple dot' diff stats.
+
+ {commits} can be in the form sha1..sha1 or sha1...sha1.
+ Two dots provides a simple diff, equivalent to `git diff A B`.
+ Three dots provides the symmetric or common ancestor diff, equivalent
+ to `git diff $(git-merge-base A B) B`.
+ The first sha1 can be empty, meaning we want the diff against an empty tree
+ {name} can be two : separated repositories, for a cross repository diff.
+ """
+
+ def __init__(self, request, context=None):
+ super().__init__()
+ self.request = request
+
+ @validate_path
+ def get(self, repo_store, repo_name):
+ """Returns diff statas of two commits."""
+ commits = re.split(r"(\.{2,3})", self.request.matchdict["commits"])
+ if not len(commits) == 3:
+ return exc.HTTPBadRequest()
+
+ sha1_from = commits[0]
+ diff_type = commits[1]
+ sha1_to = commits[2]
+
+ # sha1_to shouldn't be empty
+ if sha1_to == "" or sha1_to is None:
+ return exc.HTTPBadRequest(
+ "You need to set the last commit against which to compare"
+ )
+
+ try:
+ diff_stats = store.get_diff_stats(
+ repo_store,
+ repo_name,
+ sha1_from,
+ sha1_to,
+ diff_type,
+ )
+ except (KeyError, ValueError, GitError):
+ # invalid pygit2 sha1's return ValueError: 1: Ambiguous lookup
+ return exc.HTTPNotFound()
+ return diff_stats
+
+
@resource(path="/repo/{name}/compare-merge/{base}:{head}")
class DiffMergeAPI(BaseAPI):
"""Provides an HTTP API for merge previews.