← Back to team overview

dulwich-users team mailing list archive

[PATCH 10/34] diff_tree: Add function for getting tree changes for merges.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: Idfc916a233d016df6f733941c19a8a34cdd3dad9
---
 NEWS                            |    2 +
 dulwich/diff_tree.py            |   85 +++++++++++++++++++++++++++++++
 dulwich/tests/test_diff_tree.py |  105 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 192 insertions(+), 0 deletions(-)

diff --git a/NEWS b/NEWS
index 7dba793..e00c9c5 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,8 @@
   * New walk module with a Walker class for customizable commit walking.
     (Dave Borowitz)
 
+  * New tree_changes_for_merge function in diff_tree. (Dave Borowitz)
+
  BUG FIXES
 
   * Avoid storing all objects in memory when writing pack.
diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py
index affbe54..444355b 100644
--- a/dulwich/diff_tree.py
+++ b/dulwich/diff_tree.py
@@ -193,6 +193,91 @@ def tree_changes(store, tree1_id, tree2_id, want_unchanged=False):
         yield TreeChange(change_type, entry1, entry2)
 
 
+def _all_eq(seq, key, value):
+    for e in seq:
+        if key(e) != value:
+            return False
+    return True
+
+
+def _all_same(seq, key):
+    return _all_eq(seq[1:], key, key(seq[0]))
+
+
+def _matches_any_parent(store, parent_tree_ids, changes):
+    have = [c for c in changes if c is not None]
+    assert have
+    new = have[0].new
+
+    # Look in changes for parents we already have first.
+    for change in have:
+        if new.sha == change.old.sha:
+            return True
+
+    # A change may be None if that path was unchanged, so we need to actually
+    # look up the SHA for that path in any parent trees.
+    # TODO: We could precompute these old_shas (e.g. by passing want_unchanged
+    # to tree_changes), but the assumption is that the cost of tree lookups due
+    # to conflicts is less than the savings we're getting by pruning identical
+    # subtrees.
+    missing = [p for p, c in zip(parent_tree_ids, changes) if c is None]
+    get = store.__getitem__
+    for parent_tree_id in missing:
+        tree = get(parent_tree_id)
+        try:
+            _, old_sha = tree.lookup_path(get, new.path)
+        except KeyError:
+            continue
+        if new.sha == old_sha:
+            return True
+    return False
+
+
+def tree_changes_for_merge(store, parent_tree_ids, tree_id):
+    """Get the tree changes for a merge tree relative to all its parents.
+
+    :param store: An ObjectStore for looking up objects.
+    :param parent_tree_ids: An iterable of the SHAs of the parent trees.
+    :param tree_id: The SHA of the merge tree.
+
+    :yield: Lists of TreeChange objects, one per conflicted path in the merge.
+
+        Each list contains one element per parent, with the TreeChange for that
+        path relative to that parent. An element may be None if it never existed
+        in one parent and was deleted in two others.
+
+        A path is only included in the output if it is a conflict, i.e. its SHA
+        in the merge tree is not found in any of the parents, or in the case of
+        deletes, if not all of the old SHAs match.
+    """
+    all_parent_changes = [tree_changes(store, t, tree_id)
+                          for t in parent_tree_ids]
+    num_parents = len(parent_tree_ids)
+    changes_by_path = defaultdict(lambda: [None] * num_parents)
+
+    # Organize by path.
+    for i, parent_changes in enumerate(all_parent_changes):
+        for change in parent_changes:
+            if change.type == CHANGE_DELETE:
+                path = change.old.path
+            else:
+                path = change.new.path
+            changes_by_path[path][i] = change
+
+    old_sha = lambda c: c.old.sha
+    change_type = lambda c: c.type
+
+    # Yield only conflicting changes.
+    for _, changes in sorted(changes_by_path.iteritems()):
+        assert len(changes) == num_parents
+        have = [c for c in changes if c is not None]
+        if _all_eq(have, change_type, CHANGE_DELETE):
+            if not _all_same(have, old_sha):
+                yield changes
+        elif not _matches_any_parent(store, parent_tree_ids, changes):
+            yield changes
+
+
 _BLOCK_SIZE = 64
 
 
diff --git a/dulwich/tests/test_diff_tree.py b/dulwich/tests/test_diff_tree.py
index d5fa5c4..aa544ac 100644
--- a/dulwich/tests/test_diff_tree.py
+++ b/dulwich/tests/test_diff_tree.py
@@ -27,6 +27,7 @@ from dulwich.diff_tree import (
     _merge_entries,
     _merge_entries_py,
     tree_changes,
+    tree_changes_for_merge,
     _count_blocks,
     _count_blocks_py,
     _similarity_score,
@@ -288,6 +289,110 @@ class TreeChangesTest(DiffTestCase):
                       ('a', F, blob_a2.id))],
           tree1, tree2)
 
+    def assertChangesForMergeEqual(self, expected, parent_trees, merge_tree,
+                                   **kwargs):
+        parent_tree_ids = [t.id for t in parent_trees]
+        actual = list(tree_changes_for_merge(
+          self.store, parent_tree_ids, merge_tree.id, **kwargs))
+        self.assertEqual(expected, actual)
+
+        parent_tree_ids.reverse()
+        expected = [list(reversed(cs)) for cs in expected]
+        actual = list(tree_changes_for_merge(
+          self.store, parent_tree_ids, merge_tree.id, **kwargs))
+        self.assertEqual(expected, actual)
+
+    def test_tree_changes_for_merge_add_no_conflict(self):
+        blob = make_object(Blob, data='blob')
+        parent1 = self.commit_tree([])
+        parent2 = merge = self.commit_tree([('a', blob)])
+        self.assertChangesForMergeEqual([], [parent1, parent2], merge)
+        self.assertChangesForMergeEqual([], [parent2, parent2], merge)
+
+    def test_tree_changes_for_merge_add_modify_conflict(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='2')
+        parent1 = self.commit_tree([])
+        parent2 = self.commit_tree([('a', blob1)])
+        merge = self.commit_tree([('a', blob2)])
+        self.assertChangesForMergeEqual(
+          [[TreeChange.add(('a', F, blob2.id)),
+            TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id))]],
+          [parent1, parent2], merge)
+
+    def test_tree_changes_for_merge_modify_modify_conflict(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='2')
+        blob3 = make_object(Blob, data='3')
+        parent1 = self.commit_tree([('a', blob1)])
+        parent2 = self.commit_tree([('a', blob2)])
+        merge = self.commit_tree([('a', blob3)])
+        self.assertChangesForMergeEqual(
+          [[TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
+            TreeChange(CHANGE_MODIFY, ('a', F, blob2.id), ('a', F, blob3.id))]],
+          [parent1, parent2], merge)
+
+    def test_tree_changes_for_merge_modify_no_conflict(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='2')
+        parent1 = self.commit_tree([('a', blob1)])
+        parent2 = merge = self.commit_tree([('a', blob2)])
+        self.assertChangesForMergeEqual([], [parent1, parent2], merge)
+
+    def test_tree_changes_for_merge_delete_delete_conflict(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='2')
+        parent1 = self.commit_tree([('a', blob1)])
+        parent2 = self.commit_tree([('a', blob2)])
+        merge = self.commit_tree([])
+        self.assertChangesForMergeEqual(
+          [[TreeChange.delete(('a', F, blob1.id)),
+            TreeChange.delete(('a', F, blob2.id))]],
+          [parent1, parent2], merge)
+
+    def test_tree_changes_for_merge_delete_no_conflict(self):
+        blob = make_object(Blob, data='blob')
+        has = self.commit_tree([('a', blob)])
+        doesnt_have = self.commit_tree([])
+        self.assertChangesForMergeEqual([], [has, has], doesnt_have)
+        self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
+
+    def test_tree_changes_for_merge_octopus_no_conflict(self):
+        r = range(5)
+        blobs = [make_object(Blob, data=str(i)) for i in r]
+        parents = [self.commit_tree([('a', blobs[i])]) for i in r]
+        for i in r:
+            # Take the SHA from each of the parents.
+            self.assertChangesForMergeEqual([], parents, parents[i])
+
+    def test_tree_changes_for_merge_octopus_modify_conflict(self):
+        # Because the octopus merge strategy is limited, I doubt it's possible
+        # to create this with the git command line. But the output is well-
+        # defined, so test it anyway.
+        r = range(5)
+        parent_blobs = [make_object(Blob, data=str(i)) for i in r]
+        merge_blob = make_object(Blob, data='merge')
+        parents = [self.commit_tree([('a', parent_blobs[i])]) for i in r]
+        merge = self.commit_tree([('a', merge_blob)])
+        expected = [[TreeChange(CHANGE_MODIFY, ('a', F, parent_blobs[i].id),
+                                ('a', F, merge_blob.id)) for i in r]]
+        self.assertChangesForMergeEqual(expected, parents, merge)
+
+    def test_tree_changes_for_merge_octopus_delete(self):
+        blob1 = make_object(Blob, data='1')
+        blob2 = make_object(Blob, data='3')
+        parent1 = self.commit_tree([('a', blob1)])
+        parent2 = self.commit_tree([('a', blob2)])
+        parent3 = merge = self.commit_tree([])
+        self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
+        self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
+        self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
+        self.assertChangesForMergeEqual(
+          [[TreeChange.delete(('a', F, blob1.id)),
+            TreeChange.delete(('a', F, blob2.id)),
+            None]],
+          [parent1, parent2, parent3], merge)
+
 
 class RenameDetectionTest(DiffTestCase):
 
-- 
1.7.3.1



References