dulwich-users team mailing list archive
-
dulwich-users team
-
Mailing list archive
-
Message #00607
[PATCH 10/34] diff_tree: Add function for getting tree changes for merges.
From: Dave Borowitz <dborowitz@xxxxxxxxxx>
Change-Id: Idfc916a233d016df6f733941c19a8a34cdd3dad9
---
NEWS | 2 +
dulwich/diff_tree.py | 85 +++++++++++++++++++++++++++++++
dulwich/tests/test_diff_tree.py | 105 +++++++++++++++++++++++++++++++++++++++
3 files changed, 192 insertions(+), 0 deletions(-)
diff --git a/NEWS b/NEWS
index 7dba793..e00c9c5 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,8 @@
* New walk module with a Walker class for customizable commit walking.
(Dave Borowitz)
+ * New tree_changes_for_merge function in diff_tree. (Dave Borowitz)
+
BUG FIXES
* Avoid storing all objects in memory when writing pack.
diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py
index affbe54..444355b 100644
--- a/dulwich/diff_tree.py
+++ b/dulwich/diff_tree.py
@@ -193,6 +193,91 @@ def tree_changes(store, tree1_id, tree2_id, want_unchanged=False):
yield TreeChange(change_type, entry1, entry2)
+def _all_eq(seq, key, value):
+ for e in seq:
+ if key(e) != value:
+ return False
+ return True
+
+
+def _all_same(seq, key):
+ return _all_eq(seq[1:], key, key(seq[0]))
+
+
+def _matches_any_parent(store, parent_tree_ids, changes):
+ have = [c for c in changes if c is not None]
+ assert have
+ new = have[0].new
+
+ # Look in changes for parents we already have first.
+ for change in have:
+ if new.sha == change.old.sha:
+ return True
+
+ # A change may be None if that path was unchanged, so we need to actually
+ # look up the SHA for that path in any parent trees.
+ # TODO: We could precompute these old_shas (e.g. by passing want_unchanged
+ # to tree_changes), but the assumption is that the cost of tree lookups due
+ # to conflicts is less than the savings we're getting by pruning identical
+ # subtrees.
+ missing = [p for p, c in zip(parent_tree_ids, changes) if c is None]
+ get = store.__getitem__
+ for parent_tree_id in missing:
+ tree = get(parent_tree_id)
+ try:
+ _, old_sha = tree.lookup_path(get, new.path)
+ except KeyError:
+ continue
+ if new.sha == old_sha:
+ return True
+ return False
+
+
+def tree_changes_for_merge(store, parent_tree_ids, tree_id):
+ """Get the tree changes for a merge tree relative to all its parents.
+
+ :param store: An ObjectStore for looking up objects.
+ :param parent_tree_ids: An iterable of the SHAs of the parent trees.
+ :param tree_id: The SHA of the merge tree.
+
+ :yield: Lists of TreeChange objects, one per conflicted path in the merge.
+
+ Each list contains one element per parent, with the TreeChange for that
+ path relative to that parent. An element may be None if it never existed
+ in one parent and was deleted in two others.
+
+ A path is only included in the output if it is a conflict, i.e. its SHA
+ in the merge tree is not found in any of the parents, or in the case of
+ deletes, if not all of the old SHAs match.
+ """
+ all_parent_changes = [tree_changes(store, t, tree_id)
+ for t in parent_tree_ids]
+ num_parents = len(parent_tree_ids)
+ changes_by_path = defaultdict(lambda: [None] * num_parents)
+
+ # Organize by path.
+ for i, parent_changes in enumerate(all_parent_changes):
+ for change in parent_changes:
+ if change.type == CHANGE_DELETE:
+ path = change.old.path
+ else:
+ path = change.new.path
+ changes_by_path[path][i] = change
+
+ old_sha = lambda c: c.old.sha
+ change_type = lambda c: c.type
+
+ # Yield only conflicting changes.
+ for _, changes in sorted(changes_by_path.iteritems()):
+ assert len(changes) == num_parents
+ have = [c for c in changes if c is not None]
+ if _all_eq(have, change_type, CHANGE_DELETE):
+ if not _all_same(have, old_sha):
+ yield changes
+ elif not _matches_any_parent(store, parent_tree_ids, changes):
+ yield changes
+
+
_BLOCK_SIZE = 64
diff --git a/dulwich/tests/test_diff_tree.py b/dulwich/tests/test_diff_tree.py
index d5fa5c4..aa544ac 100644
--- a/dulwich/tests/test_diff_tree.py
+++ b/dulwich/tests/test_diff_tree.py
@@ -27,6 +27,7 @@ from dulwich.diff_tree import (
_merge_entries,
_merge_entries_py,
tree_changes,
+ tree_changes_for_merge,
_count_blocks,
_count_blocks_py,
_similarity_score,
@@ -288,6 +289,110 @@ class TreeChangesTest(DiffTestCase):
('a', F, blob_a2.id))],
tree1, tree2)
+ def assertChangesForMergeEqual(self, expected, parent_trees, merge_tree,
+ **kwargs):
+ parent_tree_ids = [t.id for t in parent_trees]
+ actual = list(tree_changes_for_merge(
+ self.store, parent_tree_ids, merge_tree.id, **kwargs))
+ self.assertEqual(expected, actual)
+
+ parent_tree_ids.reverse()
+ expected = [list(reversed(cs)) for cs in expected]
+ actual = list(tree_changes_for_merge(
+ self.store, parent_tree_ids, merge_tree.id, **kwargs))
+ self.assertEqual(expected, actual)
+
+ def test_tree_changes_for_merge_add_no_conflict(self):
+ blob = make_object(Blob, data='blob')
+ parent1 = self.commit_tree([])
+ parent2 = merge = self.commit_tree([('a', blob)])
+ self.assertChangesForMergeEqual([], [parent1, parent2], merge)
+ self.assertChangesForMergeEqual([], [parent2, parent2], merge)
+
+ def test_tree_changes_for_merge_add_modify_conflict(self):
+ blob1 = make_object(Blob, data='1')
+ blob2 = make_object(Blob, data='2')
+ parent1 = self.commit_tree([])
+ parent2 = self.commit_tree([('a', blob1)])
+ merge = self.commit_tree([('a', blob2)])
+ self.assertChangesForMergeEqual(
+ [[TreeChange.add(('a', F, blob2.id)),
+ TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id))]],
+ [parent1, parent2], merge)
+
+ def test_tree_changes_for_merge_modify_modify_conflict(self):
+ blob1 = make_object(Blob, data='1')
+ blob2 = make_object(Blob, data='2')
+ blob3 = make_object(Blob, data='3')
+ parent1 = self.commit_tree([('a', blob1)])
+ parent2 = self.commit_tree([('a', blob2)])
+ merge = self.commit_tree([('a', blob3)])
+ self.assertChangesForMergeEqual(
+ [[TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
+ TreeChange(CHANGE_MODIFY, ('a', F, blob2.id), ('a', F, blob3.id))]],
+ [parent1, parent2], merge)
+
+ def test_tree_changes_for_merge_modify_no_conflict(self):
+ blob1 = make_object(Blob, data='1')
+ blob2 = make_object(Blob, data='2')
+ parent1 = self.commit_tree([('a', blob1)])
+ parent2 = merge = self.commit_tree([('a', blob2)])
+ self.assertChangesForMergeEqual([], [parent1, parent2], merge)
+
+ def test_tree_changes_for_merge_delete_delete_conflict(self):
+ blob1 = make_object(Blob, data='1')
+ blob2 = make_object(Blob, data='2')
+ parent1 = self.commit_tree([('a', blob1)])
+ parent2 = self.commit_tree([('a', blob2)])
+ merge = self.commit_tree([])
+ self.assertChangesForMergeEqual(
+ [[TreeChange.delete(('a', F, blob1.id)),
+ TreeChange.delete(('a', F, blob2.id))]],
+ [parent1, parent2], merge)
+
+ def test_tree_changes_for_merge_delete_no_conflict(self):
+ blob = make_object(Blob, data='blob')
+ has = self.commit_tree([('a', blob)])
+ doesnt_have = self.commit_tree([])
+ self.assertChangesForMergeEqual([], [has, has], doesnt_have)
+ self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
+
+ def test_tree_changes_for_merge_octopus_no_conflict(self):
+ r = range(5)
+ blobs = [make_object(Blob, data=str(i)) for i in r]
+ parents = [self.commit_tree([('a', blobs[i])]) for i in r]
+ for i in r:
+ # Take the SHA from each of the parents.
+ self.assertChangesForMergeEqual([], parents, parents[i])
+
+ def test_tree_changes_for_merge_octopus_modify_conflict(self):
+ # Because the octopus merge strategy is limited, I doubt it's possible
+ # to create this with the git command line. But the output is well-
+ # defined, so test it anyway.
+ r = range(5)
+ parent_blobs = [make_object(Blob, data=str(i)) for i in r]
+ merge_blob = make_object(Blob, data='merge')
+ parents = [self.commit_tree([('a', parent_blobs[i])]) for i in r]
+ merge = self.commit_tree([('a', merge_blob)])
+ expected = [[TreeChange(CHANGE_MODIFY, ('a', F, parent_blobs[i].id),
+ ('a', F, merge_blob.id)) for i in r]]
+ self.assertChangesForMergeEqual(expected, parents, merge)
+
+ def test_tree_changes_for_merge_octopus_delete(self):
+ blob1 = make_object(Blob, data='1')
+ blob2 = make_object(Blob, data='3')
+ parent1 = self.commit_tree([('a', blob1)])
+ parent2 = self.commit_tree([('a', blob2)])
+ parent3 = merge = self.commit_tree([])
+ self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
+ self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
+ self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
+ self.assertChangesForMergeEqual(
+ [[TreeChange.delete(('a', F, blob1.id)),
+ TreeChange.delete(('a', F, blob2.id)),
+ None]],
+ [parent1, parent2, parent3], merge)
+
class RenameDetectionTest(DiffTestCase):
--
1.7.3.1
References