← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~ines-almeida/turnip:add-diff-stats-endpoint into turnip:master

 

Ines Almeida has proposed merging ~ines-almeida/turnip:add-diff-stats-endpoint into turnip:master.

Commit message:
Add diff stats endpoint

This endpoint will list which files changed on a diff between 2 commits, allowing also cross-repo (using the same mechanisms as the already existing diff endpoints) and a diff from an empty tree

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~ines-almeida/turnip/+git/turnip/+merge/491273
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~ines-almeida/turnip:add-diff-stats-endpoint into turnip:master.
diff --git a/turnip/api/store.py b/turnip/api/store.py
index 346c1f3..43e9078 100644
--- a/turnip/api/store.py
+++ b/turnip/api/store.py
@@ -13,6 +13,10 @@ from collections import defaultdict
 import six
 from contextlib2 import ExitStack, contextmanager
 from pygit2 import (
+    GIT_DELTA_ADDED,
+    GIT_DELTA_DELETED,
+    GIT_DELTA_MODIFIED,
+    GIT_DELTA_RENAMED,
     GIT_OBJ_COMMIT,
     GIT_OBJ_TAG,
     GIT_REF_OID,
@@ -1183,6 +1187,68 @@ def merge_async(
             )
 
 
+def _parse_diff(diff):
+    """Parse pygit2.Diff object and return stats
+
+    Currently, only returning file stats (modified, added, deleted, renamed)
+    """
+
+    result = {
+        "added": [],
+        "modified": [],
+        "deleted": [],
+        "renamed": [],
+    }
+
+    for delta in diff.deltas:
+        old_file = delta.old_file.path
+        new_file = delta.new_file.path
+        if delta.status == GIT_DELTA_ADDED:
+            result["added"].append(new_file)
+        elif delta.status == GIT_DELTA_DELETED:
+            result["deleted"].append(old_file)
+        elif delta.status == GIT_DELTA_MODIFIED:
+            result["modified"].append(new_file)
+        elif delta.status == GIT_DELTA_RENAMED:
+            result["renamed"].append({"old": old_file, "new": new_file})
+
+    return result
+
+
+def get_diff_stats(repo_store, repo_name, sha1_from, sha1_to, diff_type):
+    """Get diff stats and associated commits of two sha1s.
+
+    :param sha1_from: diff from sha1. If empty, diffs against empty tree.
+    :param sha1_to: diff to sha1.
+    :param diff_type: type of diff, either '..' or '...'
+    """
+
+    with open_repo(repo_store, repo_name) as repo:
+        # If we want to compare against the base (no source)
+        if sha1_from is None or sha1_from == "":
+            empty_tree_id = repo.TreeBuilder().write()
+            from_tree = repo[empty_tree_id]
+
+        else:
+            # If we want to compare against a common ancester
+            if diff_type == "...":
+                common_ancestor = repo.merge_base(sha1_from, sha1_to)
+                if common_ancestor is not None:
+                    # We have a merge base
+                    sha1_from = common_ancestor
+
+            from_tree = repo[sha1_from].tree
+
+        to_tree = repo[sha1_to].tree
+
+        diff = repo.diff(from_tree, to_tree)
+        # Enable rename/copy similarity detection so we can classify renames
+        diff.find_similar()
+
+        return _parse_diff(diff)
+    return {}
+
+
 def get_diff(repo_store, repo_name, sha1_from, sha1_to, context_lines=3):
     """Get patch and associated commits of two sha1s.
 
diff --git a/turnip/api/tests/test_api.py b/turnip/api/tests/test_api.py
index dcb2d68..91c6ca8 100644
--- a/turnip/api/tests/test_api.py
+++ b/turnip/api/tests/test_api.py
@@ -1927,6 +1927,164 @@ class ApiTestCase(TestCase, ApiRepoStoreMixin):
         self.assertEqual(400, resp.status_code)
         mock_apply_async.assert_not_called()
 
+    def test_diff_stats_basic(self):
+        """Test diff stats for cases of adding, modifying and deleting files"""
+        repo = RepoFactory(self.repo_store)
+        c1 = repo.add_commit("a", "file.txt")
+        c2 = repo.add_commit("b", "file.txt", parents=[c1])
+        path = f"/repo/{self.repo_path}/compare/{c1}..{c2}/stats"
+        resp = self.app.get(path)
+        self.assertEqual(200, resp.status_code)
+        self.assertIn("modified", resp.json)
+        self.assertIn("file.txt", resp.json["modified"])
+
+        c3 = repo.add_commit("x", "new.txt", parents=[c2])
+        resp2 = self.app.get(
+            f"/repo/{self.repo_path}/compare/{c2}..{c3}/stats"
+        )
+        self.assertIn("new.txt", resp2.json["added"])
+
+        # delete file.txt (remove then commit another file)
+        repo.repo.index.remove("file.txt")
+        c4 = repo.add_commit("y", "another.txt", parents=[c3])
+        resp3 = self.app.get(
+            f"/repo/{self.repo_path}/compare/{c3}..{c4}/stats"
+        )
+        self.assertIn("file.txt", resp3.json["deleted"])
+
+    def test_diff_stats_renamed(self):
+        """Test diff stats for renaming files"""
+        repo = RepoFactory(self.repo_store)
+        c1 = repo.add_commit("foo\n", "foo.txt")
+        repo.repo.index.remove("foo.txt")
+        c2 = repo.add_commit("foo\n", "bar.txt", parents=[c1])
+        resp = self.app.get(f"/repo/{self.repo_path}/compare/{c1}..{c2}/stats")
+        self.assertEqual(200, resp.status_code)
+        self.assertEqual(1, len(resp.json["renamed"]))
+        self.assertEqual(
+            {"old": "foo.txt", "new": "bar.txt"}, resp.json["renamed"][0]
+        )
+
+    def test_diff_stats_triple_dot(self):
+        """Test triple dot diff stats"""
+        repo = RepoFactory(self.repo_store)
+        base = repo.add_commit("base", "base.txt")
+        left = repo.add_commit("left", "left.txt", parents=[base])
+        repo.repo.index.remove("left.txt")
+        right = repo.add_commit("right", "right.txt", parents=[base])
+        resp = self.app.get(
+            f"/repo/{self.repo_path}/compare/{left}...{right}/stats"
+        )
+        self.assertEqual(200, resp.status_code)
+        self.assertIn("right.txt", resp.json["added"])
+        self.assertNotIn("left.txt", resp.json["added"])
+
+    def test_diff_stats_invalid_separator(self):
+        """Test invalid separator returns 400 BadRequest"""
+        # invalid separator or invalid shas should result in 400/404
+        RepoFactory(self.repo_store).build()
+        resp = self.app.get(
+            f"/repo/{self.repo_path}/compare/1++2/stats", expect_errors=True
+        )
+        self.assertEqual(400, resp.status_code)
+
+    def test_diff_stats_nonexistent_from_sha(self):
+        """Test diff stats with non-existent 'from' SHA"""
+        repo = RepoFactory(self.repo_store)
+        c1 = repo.add_commit("test", "file.txt")
+
+        # Use a non-existent SHA for the 'from' parameter
+        resp = self.app.get(
+            f"/repo/{self.repo_path}/compare/bar..{c1.hex}/stats",
+            expect_errors=True,
+        )
+        self.assertEqual(404, resp.status_code)
+
+    def test_diff_stats_nonexistent_to_sha(self):
+        """Test diff stats with non-existent 'to' SHA"""
+        repo = RepoFactory(self.repo_store)
+        c1 = repo.add_commit("test", "file.txt")
+
+        # Use a non-existent SHA for the 'to' parameter
+        resp = self.app.get(
+            f"/repo/{self.repo_path}/compare/{c1.hex}..foo/stats",
+            expect_errors=True,
+        )
+        self.assertEqual(404, resp.status_code)
+
+    def test_diff_stats_empty_to_sha(self):
+        """Test diff stats with empty 'to' SHA"""
+        repo = RepoFactory(self.repo_store)
+        c1 = repo.add_commit("a", "file.txt")
+
+        resp = self.app.get(
+            f"/repo/{self.repo_path}/compare/{c1.hex}../stats",
+            expect_errors=True,
+        )
+        self.assertEqual(400, resp.status_code)
+
+    def test_diff_stats_empty_from_sha(self):
+        """Test diff stats with empty 'from' SHA"""
+        repo = RepoFactory(self.repo_store)
+        c1 = repo.add_commit("a", "file.txt")
+
+        resp2 = self.app.get(
+            f"/repo/{self.repo_path}/compare/..{c1.hex}/stats",
+            expect_errors=True,
+        )
+        self.assertEqual(200, resp2.status_code)
+
+    def test_diff_stats_nonexistent_repo(self):
+        """Test diff stats with non-existent repository"""
+        resp = self.app.get(
+            "/repo/nonexistent/compare/abc123..def456/stats",
+            expect_errors=True,
+        )
+        self.assertEqual(404, resp.status_code)
+
+    def test_diff_stats_cross_repo_basic(self):
+        """Test diff stats across two repos via ephemeral alternates."""
+        # Create target repo under expected name
+        target = RepoFactory(self.repo_store)
+        base = target.add_commit("base", "base.txt")
+
+        # Create source repo under a different name and clone from target
+        source_repo_path = os.path.join(self.repo_root, "source")
+        source = RepoFactory(source_repo_path, clone_from=target)
+        source_commit = source.add_commit(
+            "source change", "right.txt", parents=[base]
+        )
+
+        # Cross-repo name format: <target>:<source>
+        path = (
+            f"/repo/{self.repo_path}:source/compare/"
+            f"{base.hex}..{source_commit.hex}/stats"
+        )
+        resp = self.app.get(path)
+        self.assertEqual(200, resp.status_code)
+        self.assertIn("right.txt", resp.json["added"])
+
+    def test_diff_stats_cross_repo_empty_from(self):
+        """Test cross-repo stats when 'from' is empty (diff from empty tree)"""
+        target = RepoFactory(self.repo_store)
+        base = target.add_commit("base", "base.txt")
+
+        source_repo_path = os.path.join(self.repo_root, "source")
+        source = RepoFactory(source_repo_path, clone_from=target)
+        source_commit = source.add_commit(
+            "source change", "right.txt", parents=[base]
+        )
+
+        path = (
+            f"/repo/{self.repo_path}:source/compare/"
+            f"..{source_commit.hex}/stats"
+        )
+        resp = self.app.get(path)
+        self.assertEqual(200, resp.status_code)
+        # Entire tree of the 'to' commit should appear as added
+        self.assertIn("base.txt", resp.json["added"])
+        self.assertIn("right.txt", resp.json["added"])
+
 
 class AsyncRepoCreationAPI(TestCase, ApiRepoStoreMixin):
     def setUp(self):
diff --git a/turnip/api/tests/test_store.py b/turnip/api/tests/test_store.py
index c5242f6..3f819f2 100644
--- a/turnip/api/tests/test_store.py
+++ b/turnip/api/tests/test_store.py
@@ -1815,3 +1815,215 @@ class RequestMergeTestCase(TestCase):
         commit = self.target_repo.get(ref.target)
         # No merge happended
         self.assertEqual(commit.hex, self.initial_commit.hex)
+
+
+class DiffStatsStoreTestCase(TestCase):
+    """Test cases for the get_diff_stats function in the store module."""
+
+    def setUp(self):
+        super().setUp()
+        self.repo_store = self.useFixture(TempDir()).path
+        self.useFixture(EnvironmentVariable("REPO_STORE", self.repo_store))
+        self.repo_path = os.path.join(self.repo_store, uuid.uuid1().hex)
+        self.factory = RepoFactory(self.repo_path)
+
+        # Create base commits
+        c1 = self.factory.add_commit("d", "to_delete.txt")
+        c2 = self.factory.add_commit("a", "file.txt", parents=[c1])
+        self.base = self.factory.add_commit("r", "to_rename.txt", parents=[c2])
+
+    def test_get_diff_stats_non_existing_sha1_from(self):
+        """Test file modifications are correctly identified in diff stats"""
+        c1 = self.factory.add_commit("b", "file.txt", parents=[self.base])
+        self.assertRaises(
+            ValueError,
+            store.get_diff_stats,
+            self.repo_store,
+            self.repo_path,
+            "unkonwn",
+            c1.hex,
+            "..",
+        )
+
+    def test_get_diff_stats_non_existing_sha1_to(self):
+        """Test file modifications are correctly identified in diff stats"""
+        self.assertRaises(
+            ValueError,
+            store.get_diff_stats,
+            self.repo_store,
+            self.repo_path,
+            self.base,
+            "unkonwn",
+            "..",
+        )
+
+    def test_get_diff_stats_basic_modified(self):
+        """Test file modifications are correctly identified in diff stats"""
+        c1 = self.factory.add_commit("b", "file.txt", parents=[self.base])
+        stats_mod = store.get_diff_stats(
+            self.repo_store, self.repo_path, self.base.hex, c1.hex, ".."
+        )
+        self.assertEqual([], stats_mod["added"])
+        self.assertIn("file.txt", stats_mod["modified"])
+        self.assertEqual([], stats_mod["deleted"])
+        self.assertEqual([], stats_mod["renamed"])
+
+    def test_get_diff_stats_basic_added(self):
+        """Test file additions are correctly identified in diff stats"""
+        c1 = self.factory.add_commit("n", "new.txt", parents=[self.base])
+        stats_add = store.get_diff_stats(
+            self.repo_store, self.repo_path, self.base.hex, c1.hex, ".."
+        )
+        self.assertIn("new.txt", stats_add["added"])
+        self.assertEqual([], stats_add["modified"])
+        self.assertEqual([], stats_add["deleted"])
+        self.assertEqual([], stats_add["renamed"])
+
+    def test_get_diff_stats_basic_deleted(self):
+        """Test file deletions are correctly identified in diff stats"""
+        # Delete file.txt in next commit (by removing then committing a change)
+        self.factory.repo.index.remove("file.txt")
+        c1 = self.factory.add_commit("y", "another.txt", parents=[self.base])
+        stats_del = store.get_diff_stats(
+            self.repo_store, self.repo_path, self.base.hex, c1.hex, ".."
+        )
+        self.assertIn("another.txt", stats_del["added"])
+        self.assertEqual([], stats_del["modified"])
+        self.assertIn("file.txt", stats_del["deleted"])
+        self.assertEqual([], stats_del["renamed"])
+
+    def test_get_diff_stats_renamed(self):
+        """Test file renames are correctly identified in diff stats"""
+        self.factory.repo.index.remove("to_rename.txt")
+        c1 = self.factory.add_commit("r", "renamed.txt", parents=[self.base])
+
+        stats = store.get_diff_stats(
+            self.repo_store, self.repo_path, self.base.hex, c1.hex, ".."
+        )
+        self.assertEqual([], stats["added"])
+        self.assertEqual([], stats["deleted"])
+        self.assertEqual([], stats["modified"])
+        self.assertEqual(1, len(stats["renamed"]))
+        self.assertEqual(
+            {"old": "to_rename.txt", "new": "renamed.txt"}, stats["renamed"][0]
+        )
+
+    def test_get_diff_stats_multiple_added_modified_deleted(self):
+        """Test diff stats with multiple changes across several commits"""
+        # Create commits to compare against
+        c1 = self.factory.add_commit("f", "file.txt", parents=[self.base])
+        c2 = self.factory.add_commit("n", "new.txt", parents=[c1])
+        self.factory.repo.index.remove("to_delete.txt")
+        c3 = self.factory.add_commit("a", "another.txt", parents=[c2])
+
+        self.factory.repo.index.remove("to_rename.txt")
+        c4 = self.factory.add_commit("r", "renamed.txt", parents=[c3])
+
+        stats = store.get_diff_stats(
+            self.repo_store, self.repo_path, self.base.hex, c4.hex, ".."
+        )
+        self.assertIn("new.txt", stats["added"])
+        self.assertIn("another.txt", stats["added"])
+        self.assertIn("file.txt", stats["modified"])
+        self.assertIn("to_delete.txt", stats["deleted"])
+        self.assertIn(
+            {"old": "to_rename.txt", "new": "renamed.txt"}, stats["renamed"]
+        )
+
+    def test_get_diff_stats_no_from_sha(self):
+        """Test diff stats when comparing against no source commit"""
+        c1 = self.factory.add_commit("b", "file.txt", parents=[self.base])
+        stats = store.get_diff_stats(
+            self.repo_store, self.repo_path, None, c1.hex, "..."
+        )
+        self.assertIn("file.txt", stats["added"])
+        self.assertEqual([], stats["modified"])
+        self.assertEqual([], stats["deleted"])
+        self.assertEqual([], stats["renamed"])
+
+        stats = store.get_diff_stats(
+            self.repo_store, self.repo_path, "", c1.hex, "..."
+        )
+        self.assertIn("file.txt", stats["added"])
+        self.assertEqual([], stats["modified"])
+        self.assertEqual([], stats["deleted"])
+        self.assertEqual([], stats["renamed"])
+
+    def test_get_diff_stats_triple_dot_uses_merge_base(self):
+        """Test that triple-dot diff notation correctly uses common base"""
+        left = self.factory.add_commit("left", "left.txt", parents=[self.base])
+        self.factory.repo.index.remove("left.txt")
+        right = self.factory.add_commit(
+            "right", "right.txt", parents=[self.base]
+        )
+
+        # Compare left...right should use merge-base (base) vs right
+        stats = store.get_diff_stats(
+            self.repo_store, self.repo_path, left.hex, right.hex, "..."
+        )
+        self.assertIn("right.txt", stats["added"])
+        self.assertNotIn("left.txt", stats["added"])
+
+    def test_get_diff_stats_empty_diff(self):
+        """Test diff stats when comparing identical commits"""
+        stats = store.get_diff_stats(
+            self.repo_store, self.repo_path, self.base.hex, self.base.hex, ".."
+        )
+        self.assertEqual([], stats["added"])
+        self.assertEqual([], stats["modified"])
+        self.assertEqual([], stats["deleted"])
+        self.assertEqual([], stats["renamed"])
+
+    def test_cross_repo_basic_diff_stats(self):
+        """Compare commits across repos using ephemeral alternates."""
+        # Create target repo
+        target_path = os.path.join(self.repo_store, "target")
+        target_factory = RepoFactory(target_path)
+        target_repo = target_factory.build()
+        shared_base = target_factory.add_commit("base", "base.txt")
+        target_repo.create_branch("main", target_repo.get(shared_base))
+        target_repo.set_head("refs/heads/main")
+
+        # Create source repo as a clone of target (shares history up to base)
+        source_path = os.path.join(self.repo_store, "source")
+        source_factory = RepoFactory(source_path, clone_from=target_factory)
+        source_change = source_factory.add_commit(
+            "source change", "right.txt", parents=[shared_base]
+        )
+
+        stats = store.get_diff_stats(
+            self.repo_store,
+            "target:source",
+            shared_base.hex,
+            source_change.hex,
+            "..",
+        )
+        self.assertIn("right.txt", stats["added"])
+        self.assertEqual([], stats["modified"])
+
+    def test_cross_repo_empty_from_diff_stats(self):
+        """Empty 'from' should diff against empty tree across repos."""
+        # Create target repo
+        target_path = os.path.join(self.repo_store, "target")
+        target_factory = RepoFactory(target_path)
+        target_repo = target_factory.build()
+        shared_base = target_factory.add_commit("base", "base.txt")
+        target_repo.create_branch("main", target_repo.get(shared_base))
+        target_repo.set_head("refs/heads/main")
+
+        # Create source repo as a clone of target (shares history up to base)
+        source_path = os.path.join(self.repo_store, "source")
+        source_factory = RepoFactory(source_path, clone_from=target_factory)
+        source_change = source_factory.add_commit(
+            "source change", "right.txt", parents=[shared_base]
+        )
+
+        stats = store.get_diff_stats(
+            self.repo_store,
+            "target:source",
+            None,
+            source_change.hex,
+            "..",
+        )
+        self.assertIn("base.txt", stats["added"])  # from initial commit
+        self.assertIn("right.txt", stats["added"])  # from source commit
diff --git a/turnip/api/views.py b/turnip/api/views.py
index 6931d17..116e0f4 100644
--- a/turnip/api/views.py
+++ b/turnip/api/views.py
@@ -414,6 +414,53 @@ class DiffAPI(BaseAPI):
         return patch
 
 
+@resource(path="/repo/{name}/compare/{commits}/stats")
+class DiffStatsAPI(BaseAPI):
+    """Provides HTTP API for rev-rev 'double' and 'triple dot' diff stats.
+
+    {commits} can be in the form sha1..sha1 or sha1...sha1.
+    Two dots provides a simple diff, equivalent to `git diff A B`.
+    Three dots provides the symmetric or common ancestor diff, equivalent
+    to `git diff $(git-merge-base A B) B`.
+    The first sha1 can be empty, meaning we want the diff against an empty tree
+    {name} can be two : separated repositories, for a cross repository diff.
+    """
+
+    def __init__(self, request, context=None):
+        super().__init__()
+        self.request = request
+
+    @validate_path
+    def get(self, repo_store, repo_name):
+        """Returns diff statas of two commits."""
+        commits = re.split(r"(\.{2,3})", self.request.matchdict["commits"])
+        if not len(commits) == 3:
+            return exc.HTTPBadRequest()
+
+        sha1_from = commits[0]
+        diff_type = commits[1]
+        sha1_to = commits[2]
+
+        # sha1_to shouldn't be empty
+        if sha1_to == "" or sha1_to is None:
+            return exc.HTTPBadRequest(
+                "You need to set the last commit against which to compare"
+            )
+
+        try:
+            diff_stats = store.get_diff_stats(
+                repo_store,
+                repo_name,
+                sha1_from,
+                sha1_to,
+                diff_type,
+            )
+        except (KeyError, ValueError, GitError):
+            # invalid pygit2 sha1's return ValueError: 1: Ambiguous lookup
+            return exc.HTTPNotFound()
+        return diff_stats
+
+
 @resource(path="/repo/{name}/compare-merge/{base}:{head}")
 class DiffMergeAPI(BaseAPI):
     """Provides an HTTP API for merge previews.