dulwich-users team mailing list archive
-
dulwich-users team
-
Mailing list archive
-
Message #00646
[PATCH 03/13] diff_tree: Do easy copy detection without find_copies_harder.
From: Dave Borowitz <dborowitz@xxxxxxxxxx>
find_copies_harder should only affect how unchanged entries are handled;
modified files can still be considered the source of copies.
This makes the default rename detection behave the same as
"git diff-tree -M -C" rather than just -M.
Change-Id: I9cf4141b009e568cb2eb53cad062bca97cc4dcd8
---
NEWS | 3 +++
dulwich/diff_tree.py | 16 +++++++++-------
dulwich/tests/test_diff_tree.py | 36 +++++++++++++++++++++---------------
3 files changed, 33 insertions(+), 22 deletions(-)
diff --git a/NEWS b/NEWS
index 7a6efc6..5d953fb 100644
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,9 @@
* New tree_changes_for_merge function in diff_tree. (Dave Borowitz)
+ * Easy rename detection in RenameDetector even without find_copies_harder.
+ (Dave Borowitz)
+
BUG FIXES
* Avoid storing all objects in memory when writing pack.
diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py
index eb0b152..2e6fa3b 100644
--- a/dulwich/diff_tree.py
+++ b/dulwich/diff_tree.py
@@ -437,10 +437,12 @@ class RenameDetector(object):
elif self._should_split(change):
self._deletes.append(TreeChange.delete(change.old))
self._adds.append(TreeChange.add(change.new))
- elif (self._find_copies_harder and (
- change.type == CHANGE_MODIFY or change.type == CHANGE_UNCHANGED)):
- # Treat modified/unchanged as deleted rather than splitting it,
- # to avoid spurious renames.
+ elif ((self._find_copies_harder and change.type == CHANGE_UNCHANGED)
+ or change.type == CHANGE_MODIFY):
+ # Treat all modifies as potential deletes for rename detection,
+ # but don't split them (to avoid spurious renames). Setting
+ # find_copies_harder means we treat unchanged the same as
+ # modified.
self._deletes.append(change)
else:
self._changes.append(change)
@@ -457,8 +459,7 @@ class RenameDetector(object):
delete_map = defaultdict(list)
for delete in self._deletes:
# Keep track of whether the delete was actually marked as a delete.
- # If not, it must have been added due to find_copies_harder, and
- # needs to be marked as a copy.
+ # If not, it needs to be marked as a copy.
is_delete = delete.type == CHANGE_DELETE
delete_map[delete.old.sha].append((delete.old, is_delete))
@@ -469,7 +470,8 @@ class RenameDetector(object):
for (old, is_delete), new in itertools.izip(sha_deletes, sha_adds):
if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
continue
- delete_paths.add(old.path)
+ if is_delete:
+ delete_paths.add(old.path)
add_paths.add(new.path)
new_type = is_delete and CHANGE_RENAME or CHANGE_COPY
self._changes.append(TreeChange(new_type, old, new))
diff --git a/dulwich/tests/test_diff_tree.py b/dulwich/tests/test_diff_tree.py
index 02c86c4..0dc6182 100644
--- a/dulwich/tests/test_diff_tree.py
+++ b/dulwich/tests/test_diff_tree.py
@@ -617,6 +617,26 @@ class RenameDetectionTest(DiffTestCase):
TreeChange(CHANGE_RENAME, ('b', F, blob.id), ('d', F, blob.id))],
self.detect_renames(tree1, tree2))
+ def test_exact_copy_modify(self):
+ blob1 = make_object(Blob, data='a\nb\nc\nd\n')
+ blob2 = make_object(Blob, data='a\nb\nc\ne\n')
+ tree1 = self.commit_tree([('a', blob1)])
+ tree2 = self.commit_tree([('a', blob2), ('b', blob1)])
+ self.assertEqual(
+ [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
+ TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob1.id))],
+ self.detect_renames(tree1, tree2))
+
+ def test_exact_copy_change_mode(self):
+ blob = make_object(Blob, data='a\nb\nc\nd\n')
+ tree1 = self.commit_tree([('a', blob)])
+ tree2 = self.commit_tree([('a', blob, 0100755), ('b', blob)])
+ self.assertEqual(
+ [TreeChange(CHANGE_MODIFY, ('a', F, blob.id),
+ ('a', 0100755, blob.id)),
+ TreeChange(CHANGE_COPY, ('a', F, blob.id), ('b', F, blob.id))],
+ self.detect_renames(tree1, tree2))
+
def test_rename_threshold(self):
blob1 = make_object(Blob, data='a\nb\nc\n')
blob2 = make_object(Blob, data='a\nb\nd\n')
@@ -766,7 +786,7 @@ class RenameDetectionTest(DiffTestCase):
no_renames = [
TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
- TreeChange.add(('b', F, blob2.id))]
+ TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))]
self.assertEqual(
no_renames, self.detect_renames(tree1, tree2))
self.assertEqual(
@@ -797,20 +817,6 @@ class RenameDetectionTest(DiffTestCase):
[TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))],
self.detect_renames(tree1, tree2, find_copies_harder=True))
- def test_find_copies_harder_modify(self):
- blob1 = make_object(Blob, data='a\nb\nc\nd\n')
- blob2 = make_object(Blob, data='a\nb\nc\ne\n')
- tree1 = self.commit_tree([('a', blob1)])
- tree2 = self.commit_tree([('a', blob2), ('b', blob2)])
- self.assertEqual(
- [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
- TreeChange.add(('b', F, blob2.id))],
- self.detect_renames(tree1, tree2))
- self.assertEqual(
- [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
- TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))],
- self.detect_renames(tree1, tree2, find_copies_harder=True))
-
def test_find_copies_harder_with_rewrites(self):
blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
blob_a2 = make_object(Blob, data='f\ng\nh\ni\n')
--
1.7.3.1
References