← Back to team overview

dulwich-users team mailing list archive

[PATCH 03/13] diff_tree: Do easy copy detection without find_copies_harder.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

find_copies_harder should only affect how unchanged entries are handled;
modified files can still be considered the source of copies.

This makes the default rename detection behave the same as
"git diff-tree -M -C" rather than just -M.

Change-Id: I9cf4141b009e568cb2eb53cad062bca97cc4dcd8
---
 NEWS                            |    3 +++
 dulwich/diff_tree.py            |   16 +++++++++-------
 dulwich/tests/test_diff_tree.py |   36 +++++++++++++++++++++---------------
 3 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/NEWS b/NEWS
index 7a6efc6..5d953fb 100644
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,9 @@
 
   * New tree_changes_for_merge function in diff_tree. (Dave Borowitz)
 
+  * Easy rename detection in RenameDetector even without find_copies_harder.
+    (Dave Borowitz)
+
  BUG FIXES
 
   * Avoid storing all objects in memory when writing pack.
diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py
index eb0b152..2e6fa3b 100644
--- a/dulwich/diff_tree.py
+++ b/dulwich/diff_tree.py
@@ -437,10 +437,12 @@ class RenameDetector(object):
             elif self._should_split(change):
                 self._deletes.append(TreeChange.delete(change.old))
                 self._adds.append(TreeChange.add(change.new))
-            elif (self._find_copies_harder and (
-              change.type == CHANGE_MODIFY or change.type == CHANGE_UNCHANGED)):
-                # Treat modified/unchanged as deleted rather than splitting it,
-                # to avoid spurious renames.
+            elif ((self._find_copies_harder and change.type == CHANGE_UNCHANGED)
+                  or change.type == CHANGE_MODIFY):
+                # Treat all modifies as potential deletes for rename detection,
+                # but don't split them (to avoid spurious renames). Setting
+                # find_copies_harder means we treat unchanged the same as
+                # modified.
                 self._deletes.append(change)
             else:
                 self._changes.append(change)
@@ -457,8 +459,7 @@ class RenameDetector(object):
         delete_map = defaultdict(list)
         for delete in self._deletes:
             # Keep track of whether the delete was actually marked as a delete.
-            # If not, it must have been added due to find_copies_harder, and
-            # needs to be marked as a copy.
+            # If not, it needs to be marked as a copy.
             is_delete = delete.type == CHANGE_DELETE
             delete_map[delete.old.sha].append((delete.old, is_delete))
 
@@ -469,7 +470,8 @@ class RenameDetector(object):
             for (old, is_delete), new in itertools.izip(sha_deletes, sha_adds):
                 if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
                     continue
-                delete_paths.add(old.path)
+                if is_delete:
+                    delete_paths.add(old.path)
                 add_paths.add(new.path)
                 new_type = is_delete and CHANGE_RENAME or CHANGE_COPY
                 self._changes.append(TreeChange(new_type, old, new))
diff --git a/dulwich/tests/test_diff_tree.py b/dulwich/tests/test_diff_tree.py
index 02c86c4..0dc6182 100644
--- a/dulwich/tests/test_diff_tree.py
+++ b/dulwich/tests/test_diff_tree.py
@@ -617,6 +617,26 @@ class RenameDetectionTest(DiffTestCase):
            TreeChange(CHANGE_RENAME, ('b', F, blob.id), ('d', F, blob.id))],
           self.detect_renames(tree1, tree2))
 
+    def test_exact_copy_modify(self):
+        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
+        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
+        tree1 = self.commit_tree([('a', blob1)])
+        tree2 = self.commit_tree([('a', blob2), ('b', blob1)])
+        self.assertEqual(
+          [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
+           TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob1.id))],
+          self.detect_renames(tree1, tree2))
+
+    def test_exact_copy_change_mode(self):
+        blob = make_object(Blob, data='a\nb\nc\nd\n')
+        tree1 = self.commit_tree([('a', blob)])
+        tree2 = self.commit_tree([('a', blob, 0100755), ('b', blob)])
+        self.assertEqual(
+          [TreeChange(CHANGE_MODIFY, ('a', F, blob.id),
+                      ('a', 0100755, blob.id)),
+           TreeChange(CHANGE_COPY, ('a', F, blob.id), ('b', F, blob.id))],
+          self.detect_renames(tree1, tree2))
+
     def test_rename_threshold(self):
         blob1 = make_object(Blob, data='a\nb\nc\n')
         blob2 = make_object(Blob, data='a\nb\nd\n')
@@ -766,7 +786,7 @@ class RenameDetectionTest(DiffTestCase):
 
         no_renames = [
           TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
-          TreeChange.add(('b', F, blob2.id))]
+          TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))]
         self.assertEqual(
           no_renames, self.detect_renames(tree1, tree2))
         self.assertEqual(
@@ -797,20 +817,6 @@ class RenameDetectionTest(DiffTestCase):
           [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))],
           self.detect_renames(tree1, tree2, find_copies_harder=True))
 
-    def test_find_copies_harder_modify(self):
-        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
-        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
-        tree1 = self.commit_tree([('a', blob1)])
-        tree2 = self.commit_tree([('a', blob2), ('b', blob2)])
-        self.assertEqual(
-          [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
-           TreeChange.add(('b', F, blob2.id))],
-          self.detect_renames(tree1, tree2))
-        self.assertEqual(
-          [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
-           TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))],
-          self.detect_renames(tree1, tree2, find_copies_harder=True))
-
     def test_find_copies_harder_with_rewrites(self):
         blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
         blob_a2 = make_object(Blob, data='f\ng\nh\ni\n')
-- 
1.7.3.1



References