dulwich-users team mailing list archive
-
dulwich-users team
-
Mailing list archive
-
Message #00293
[PATCH 15/28] diff: RenameDetector with exact rename detection.
From: Dave Borowitz <dborowitz@xxxxxxxxxx>
Change-Id: I227e478b27b866515c536f20f2002a529d0853c1
---
dulwich/diff.py | 75 ++++++++++++++++++++++++++++++++++++++++++++
dulwich/tests/test_diff.py | 64 +++++++++++++++++++++++++++++++++++++-
2 files changed, 138 insertions(+), 1 deletions(-)
diff --git a/dulwich/diff.py b/dulwich/diff.py
index a38e8ef..25cae6b 100644
--- a/dulwich/diff.py
+++ b/dulwich/diff.py
@@ -252,3 +252,78 @@ def _tree_change_key(entry):
if path2 is None:
path2 = path1
return (path1, path2)
+
+
+class RenameDetector(object):
+ """Object for handling rename detection between two trees."""
+
+ def __init__(self, store, tree1_id, tree2_id):
+ """Initialize the rename detector.
+
+ :param store: An ObjectStore for looking up objects.
+ :param tree1_id: The SHA of the first Tree.
+ :param tree2_id: The SHA of the second Tree.
+ """
+ self._tree1_id = tree1_id
+ self._tree2_id = tree2_id
+ self._store = store
+
+ self._adds = []
+ self._deletes = []
+ self._changes = []
+
+ def _collect_changes(self):
+ for change in tree_changes(self._store, self._tree1_id, self._tree2_id):
+ if change.type == CHANGE_ADD:
+ self._adds.append(change)
+ elif change.type == CHANGE_DELETE:
+ self._deletes.append(change)
+ else:
+ self._changes.append(change)
+
+ def _prune(self, add_paths, delete_paths):
+ self._adds = [a for a in self._adds if a.new.path not in add_paths]
+ self._deletes = [d for d in self._deletes
+ if d.old.path not in delete_paths]
+
+ def _find_exact_renames(self):
+ add_map = defaultdict(list)
+ for add in self._adds:
+ add_map[add.new.sha].append(add.new)
+ delete_map = defaultdict(list)
+ for delete in self._deletes:
+ delete_map[delete.old.sha].append(delete.old)
+
+ add_paths = set()
+ delete_paths = set()
+ for sha, sha_deletes in delete_map.iteritems():
+ sha_adds = add_map[sha]
+ for old, new in itertools.izip(sha_deletes, sha_adds):
+ if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
+ continue
+ delete_paths.add(old.path)
+ add_paths.add(new.path)
+ self._changes.append(TreeChange(CHANGE_RENAME, old, new))
+
+ num_extra_adds = len(sha_adds) - len(sha_deletes)
+ # TODO(dborowitz): Less arbitrary way of dealing with extra copies.
+ old = sha_deletes[0]
+ if num_extra_adds:
+ for new in sha_adds[-num_extra_adds:]:
+ add_paths.add(new.path)
+ self._changes.append(TreeChange(CHANGE_COPY, old, new))
+ self._prune(add_paths, delete_paths)
+
+ def _sorted_changes(self):
+ result = []
+ result.extend(self._adds)
+ result.extend(self._deletes)
+ result.extend(self._changes)
+ result.sort(key=_tree_change_key)
+ return result
+
+ def changes_with_renames(self):
+ """Iterate TreeChanges between the two trees, with rename detection."""
+ self._collect_changes()
+ self._find_exact_renames()
+ return self._sorted_changes()
diff --git a/dulwich/tests/test_diff.py b/dulwich/tests/test_diff.py
index 8e77d9c..57492d4 100644
--- a/dulwich/tests/test_diff.py
+++ b/dulwich/tests/test_diff.py
@@ -29,6 +29,7 @@ from dulwich.diff import (
_count_blocks,
_similarity_score,
_tree_change_key,
+ RenameDetector,
)
from dulwich.index import (
commit_tree,
@@ -252,7 +253,7 @@ class TreeChangesTest(DiffTestCase):
tree1, tree2)
-class RenameDetectionTest(TestCase):
+class RenameDetectionTest(DiffTestCase):
def test_count_blocks(self):
blob = make_object(Blob, data='a\nb\na\n')
@@ -326,3 +327,64 @@ class RenameDetectionTest(TestCase):
for perm in permutations(expected_entries):
self.assertEqual(expected_entries,
sorted(perm, key=_tree_change_key))
+
+ def detect_renames(self, tree1, tree2, **kwargs):
+ detector = RenameDetector(self.store, tree1.id, tree2.id, **kwargs)
+ return detector.changes_with_renames()
+
+ def test_exact_rename_one_to_one(self):
+ blob1 = make_object(Blob, data='1')
+ blob2 = make_object(Blob, data='2')
+ tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
+ tree2 = self.commit_tree([('c', blob1), ('d', blob2)])
+ self.assertEqual(
+ [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob1.id)),
+ TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('d', F, blob2.id))],
+ self.detect_renames(tree1, tree2))
+
+ def test_exact_rename_split_different_type(self):
+ blob = make_object(Blob, data='/foo')
+ tree1 = self.commit_tree([('a', blob, 0100644)])
+ tree2 = self.commit_tree([('a', blob, 0120000)])
+ self.assertEqual(
+ [TreeChange.add(('a', 0120000, blob.id)),
+ TreeChange.delete(('a', 0100644, blob.id))],
+ self.detect_renames(tree1, tree2))
+
+ def test_exact_rename_and_different_type(self):
+ blob1 = make_object(Blob, data='1')
+ blob2 = make_object(Blob, data='2')
+ tree1 = self.commit_tree([('a', blob1)])
+ tree2 = self.commit_tree([('a', blob2, 0120000), ('b', blob1)])
+ self.assertEqual(
+ [TreeChange.add(('a', 0120000, blob2.id)),
+ TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id))],
+ self.detect_renames(tree1, tree2))
+
+ def test_exact_rename_one_to_many(self):
+ blob = make_object(Blob, data='1')
+ tree1 = self.commit_tree([('a', blob)])
+ tree2 = self.commit_tree([('b', blob), ('c', blob)])
+ self.assertEqual(
+ [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id)),
+ TreeChange(CHANGE_COPY, ('a', F, blob.id), ('c', F, blob.id))],
+ self.detect_renames(tree1, tree2))
+
+ def test_exact_rename_many_to_one(self):
+ blob = make_object(Blob, data='1')
+ tree1 = self.commit_tree([('a', blob), ('b', blob)])
+ tree2 = self.commit_tree([('c', blob)])
+ self.assertEqual(
+ [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
+ TreeChange.delete(('b', F, blob.id))],
+ self.detect_renames(tree1, tree2))
+
+ def test_exact_rename_many_to_many(self):
+ blob = make_object(Blob, data='1')
+ tree1 = self.commit_tree([('a', blob), ('b', blob)])
+ tree2 = self.commit_tree([('c', blob), ('d', blob), ('e', blob)])
+ self.assertEqual(
+ [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
+ TreeChange(CHANGE_COPY, ('a', F, blob.id), ('e', F, blob.id)),
+ TreeChange(CHANGE_RENAME, ('b', F, blob.id), ('d', F, blob.id))],
+ self.detect_renames(tree1, tree2))
--
1.7.3.2.168.gd6b63
References