← Back to team overview

dulwich-users team mailing list archive

[PATCH 15/28] diff: RenameDetector with exact rename detection.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: I227e478b27b866515c536f20f2002a529d0853c1
---
 dulwich/diff.py            |   75 ++++++++++++++++++++++++++++++++++++++++++++
 dulwich/tests/test_diff.py |   64 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 138 insertions(+), 1 deletions(-)

diff --git a/dulwich/diff.py b/dulwich/diff.py
index a38e8ef..25cae6b 100644
--- a/dulwich/diff.py
+++ b/dulwich/diff.py
@@ -252,3 +252,78 @@ def _tree_change_key(entry):
     if path2 is None:
         path2 = path1
     return (path1, path2)
+
+
+class RenameDetector(object):
+    """Object for handling rename detection between two trees."""
+
+    def __init__(self, store, tree1_id, tree2_id):
+        """Initialize the rename detector.
+
+        :param store: An ObjectStore for looking up objects.
+        :param tree1_id: The SHA of the first Tree.
+        :param tree2_id: The SHA of the second Tree.
+        """
+        self._tree1_id = tree1_id
+        self._tree2_id = tree2_id
+        self._store = store
+
+        self._adds = []
+        self._deletes = []
+        self._changes = []
+
+    def _collect_changes(self):
+        for change in tree_changes(self._store, self._tree1_id, self._tree2_id):
+            if change.type == CHANGE_ADD:
+                self._adds.append(change)
+            elif change.type == CHANGE_DELETE:
+                self._deletes.append(change)
+            else:
+                self._changes.append(change)
+
+    def _prune(self, add_paths, delete_paths):
+        self._adds = [a for a in self._adds if a.new.path not in add_paths]
+        self._deletes = [d for d in self._deletes
+                         if d.old.path not in delete_paths]
+
+    def _find_exact_renames(self):
+        add_map = defaultdict(list)
+        for add in self._adds:
+            add_map[add.new.sha].append(add.new)
+        delete_map = defaultdict(list)
+        for delete in self._deletes:
+            delete_map[delete.old.sha].append(delete.old)
+
+        add_paths = set()
+        delete_paths = set()
+        for sha, sha_deletes in delete_map.iteritems():
+            sha_adds = add_map[sha]
+            for old, new in itertools.izip(sha_deletes, sha_adds):
+                if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
+                    continue
+                delete_paths.add(old.path)
+                add_paths.add(new.path)
+                self._changes.append(TreeChange(CHANGE_RENAME, old, new))
+
+            num_extra_adds = len(sha_adds) - len(sha_deletes)
+            # TODO(dborowitz): Less arbitrary way of dealing with extra copies.
+            old = sha_deletes[0]
+            if num_extra_adds:
+                for new in sha_adds[-num_extra_adds:]:
+                    add_paths.add(new.path)
+                    self._changes.append(TreeChange(CHANGE_COPY, old, new))
+        self._prune(add_paths, delete_paths)
+
+    def _sorted_changes(self):
+        result = []
+        result.extend(self._adds)
+        result.extend(self._deletes)
+        result.extend(self._changes)
+        result.sort(key=_tree_change_key)
+        return result
+
+    def changes_with_renames(self):
+        """Iterate TreeChanges between the two trees, with rename detection."""
+        self._collect_changes()
+        self._find_exact_renames()
+        return self._sorted_changes()
diff --git a/dulwich/tests/test_diff.py b/dulwich/tests/test_diff.py
index 8e77d9c..57492d4 100644
--- a/dulwich/tests/test_diff.py
+++ b/dulwich/tests/test_diff.py
@@ -29,6 +29,7 @@ from dulwich.diff import (
     _count_blocks,
     _similarity_score,
     _tree_change_key,
+    RenameDetector,
     )
 from dulwich.index import (
     commit_tree,
@@ -252,7 +253,7 @@ class TreeChangesTest(DiffTestCase):
           tree1, tree2)
 
 
-class RenameDetectionTest(TestCase):
+class RenameDetectionTest(DiffTestCase):
 
     def test_count_blocks(self):
         blob = make_object(Blob, data='a\nb\na\n')
@@ -326,3 +327,64 @@ class RenameDetectionTest(TestCase):
         for perm in permutations(expected_entries):
             self.assertEqual(expected_entries,
                              sorted(perm, key=_tree_change_key))
+
+    def detect_renames(self, tree1, tree2, **kwargs):
+        detector = RenameDetector(self.store, tree1.id, tree2.id, **kwargs)
+        return detector.changes_with_renames()
+
+    def test_exact_rename_one_to_one(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='2')
+        tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
+        tree2 = self.commit_tree([('c', blob1), ('d', blob2)])
+        self.assertEqual(
+          [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob1.id)),
+           TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('d', F, blob2.id))],
+          self.detect_renames(tree1, tree2))
+
+    def test_exact_rename_split_different_type(self):
+        blob = make_object(Blob, data='/foo')
+        tree1 = self.commit_tree([('a', blob, 0100644)])
+        tree2 = self.commit_tree([('a', blob, 0120000)])
+        self.assertEqual(
+          [TreeChange.add(('a', 0120000, blob.id)),
+           TreeChange.delete(('a', 0100644, blob.id))],
+          self.detect_renames(tree1, tree2))
+
+    def test_exact_rename_and_different_type(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='2')
+        tree1 = self.commit_tree([('a', blob1)])
+        tree2 = self.commit_tree([('a', blob2, 0120000), ('b', blob1)])
+        self.assertEqual(
+          [TreeChange.add(('a', 0120000, blob2.id)),
+           TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id))],
+          self.detect_renames(tree1, tree2))
+
+    def test_exact_rename_one_to_many(self):
+        blob = make_object(Blob, data='1')
+        tree1 = self.commit_tree([('a', blob)])
+        tree2 = self.commit_tree([('b', blob), ('c', blob)])
+        self.assertEqual(
+          [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id)),
+           TreeChange(CHANGE_COPY, ('a', F, blob.id), ('c', F, blob.id))],
+          self.detect_renames(tree1, tree2))
+
+    def test_exact_rename_many_to_one(self):
+        blob = make_object(Blob, data='1')
+        tree1 = self.commit_tree([('a', blob), ('b', blob)])
+        tree2 = self.commit_tree([('c', blob)])
+        self.assertEqual(
+          [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
+           TreeChange.delete(('b', F, blob.id))],
+          self.detect_renames(tree1, tree2))
+
+    def test_exact_rename_many_to_many(self):
+        blob = make_object(Blob, data='1')
+        tree1 = self.commit_tree([('a', blob), ('b', blob)])
+        tree2 = self.commit_tree([('c', blob), ('d', blob), ('e', blob)])
+        self.assertEqual(
+          [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
+           TreeChange(CHANGE_COPY, ('a', F, blob.id), ('e', F, blob.id)),
+           TreeChange(CHANGE_RENAME, ('b', F, blob.id), ('d', F, blob.id))],
+          self.detect_renames(tree1, tree2))
-- 
1.7.3.2.168.gd6b63




References