← Back to team overview

dulwich-users team mailing list archive

[PATCH 04/28] diff: Prune identical subtrees during tree_changes.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: Ic1c772b478a49c670175fdefc3e72824414b44aa
---
 dulwich/diff.py            |    8 ++++++--
 dulwich/tests/test_diff.py |   19 +++++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/dulwich/diff.py b/dulwich/diff.py
index 5c86fa1..3961d40 100644
--- a/dulwich/diff.py
+++ b/dulwich/diff.py
@@ -89,7 +89,7 @@ def _merge_entries(path, tree1, tree2):
     return result
 
 
-def walk_trees(store, tree1_id, tree2_id):
+def walk_trees(store, tree1_id, tree2_id, prune_identical=False):
     """Recursively walk all the entries of two trees.
 
     Iteration is depth-first pre-order, as in e.g. os.walk.
@@ -97,6 +97,7 @@ def walk_trees(store, tree1_id, tree2_id):
     :param store: An ObjectStore for looking up objects.
     :param tree1_id: The SHA of the first Tree object to iterate, or None.
     :param tree2_id: The SHA of the second Tree object to iterate, or None.
+    :param prune_identical: If True, identical subtrees will not be walked.
     :yield: Pairs of TreeEntry objects for each pair of entries in the trees and
         their subtrees recursively. If an entry exists in one tree but not the
         other, the other entry will have all attributes set to None. If neither
@@ -110,6 +111,8 @@ def walk_trees(store, tree1_id, tree2_id):
         entry1, entry2 = todo.pop()
         is_tree1 = entry1.mode and stat.S_ISDIR(entry1.mode)
         is_tree2 = entry2.mode and stat.S_ISDIR(entry2.mode)
+        if prune_identical and is_tree1 and is_tree2 and entry1 == entry2:
+            continue
 
         tree1 = is_tree1 and store[entry1.sha] or None
         tree2 = is_tree2 and store[entry2.sha] or None
@@ -135,7 +138,8 @@ def tree_changes(store, tree1_id, tree2_id, want_unchanged=False):
     :yield: TreeChange instances for each change between the source and target
         tree.
     """
-    entries = walk_trees(store, tree1_id, tree2_id)
+    entries = walk_trees(store, tree1_id, tree2_id,
+                         prune_identical=(not want_unchanged))
     for entry1, entry2 in entries:
         if entry1 == entry2 and not want_unchanged:
             continue
diff --git a/dulwich/tests/test_diff.py b/dulwich/tests/test_diff.py
index e17ce49..38c15a2 100644
--- a/dulwich/tests/test_diff.py
+++ b/dulwich/tests/test_diff.py
@@ -228,3 +228,22 @@ class TreeChangesTest(TestCase):
            TreeChange(CHANGE_DELETE, ('a.', 0100644, blob.id), _NULL_ENTRY),
            TreeChange(CHANGE_ADD, _NULL_ENTRY, ('a./x', 0100644, blob.id))],
           tree1, tree2)
+
+    def test_tree_changes_prune(self):
+        blob_a1 = make_object(Blob, data='a1')
+        blob_a2 = make_object(Blob, data='a2')
+        blob_x = make_object(Blob, data='x')
+        tree1 = self.commit_tree([('a', blob_a1, 0100644),
+                                  ('b/x', blob_x, 0100644)])
+        tree2 = self.commit_tree([('a', blob_a2, 0100644),
+                                  ('b/x', blob_x, 0100644)])
+        # Remove identical items so lookups will fail unless we prune.
+        subtree = self.store[tree1['b'][1]]
+        for entry in subtree.iteritems():
+            del self.store[entry.sha]
+        del self.store[subtree.id]
+
+        self.assertChangesEqual(
+          [TreeChange(CHANGE_MODIFY, ('a', 0100644, blob_a1.id),
+                      ('a', 0100644, blob_a2.id))],
+          tree1, tree2)
-- 
1.7.3.2.168.gd6b63




References