dulwich-users team mailing list archive
-
dulwich-users team
-
Mailing list archive
-
Message #00280
[PATCH 02/28] Add tree-diffing functionality in a 'diff' module.
From: Dave Borowitz <dborowitz@xxxxxxxxxx>
This is a rewrite of some of the functionality in
BaseObjectStore.iter_tree_contents. It mainly differs in being more
structured and providing the same depth-first pre-order traversal as in
BaseObjectStore.iter_tree_contents.
Change-Id: I6c1504ebf508c485259072b9033c644123c12752
---
dulwich/diff.py | 164 +++++++++++++++++++++++++++++++
dulwich/misc.py | 41 ++++++++
dulwich/tests/test_diff.py | 230 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 435 insertions(+), 0 deletions(-)
create mode 100644 dulwich/diff.py
create mode 100644 dulwich/tests/test_diff.py
diff --git a/dulwich/diff.py b/dulwich/diff.py
new file mode 100644
index 0000000..5c86fa1
--- /dev/null
+++ b/dulwich/diff.py
@@ -0,0 +1,164 @@
+# diff.py -- Utilities for diffing files and trees.
+# Copyright (C) 2010 Google, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# or (at your option) a later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+"""Utilities for diffing files and trees."""
+
+import stat
+
+from dulwich.misc import (
+ TreeChangeTuple,
+ )
+from dulwich.objects import (
+ TreeEntry,
+ )
+
+# TreeChange type constants.
+CHANGE_ADD = 'add'
+CHANGE_MODIFY = 'modify'
+CHANGE_DELETE = 'delete'
+CHANGE_RENAME = 'rename'
+CHANGE_COPY = 'copy'
+CHANGE_UNCHANGED = 'unchanged'
+
+_NULL_ENTRY = TreeEntry(None, None, None)
+
+
+class TreeChange(TreeChangeTuple):
+ """Class encapsulating a single change between two trees."""
+
+
+def _tree_entries(path, tree):
+ result = []
+ if not tree:
+ return result
+ for entry in tree.iteritems(name_order=True):
+ result.append(entry.in_path(path))
+ return result
+
+
+def _merge_entries(path, tree1, tree2):
+ """Merge the entries of two trees.
+
+ :param path: A path to prepend to all tree entry names.
+ :param tree1: The first Tree object to iterate, or None.
+ :param tree2: The second Tree object to iterate, or None.
+ :return: A list of pairs of TreeEntry objects for each pair of entries in
+ the trees. If an entry exists in one tree but not the other, the other
+ entry will have all attributes set to None. If neither entry's path is
+ None, they are guaranteed to match.
+ """
+ entries1 = _tree_entries(path, tree1)
+ entries2 = _tree_entries(path, tree2)
+ i1 = i2 = 0
+ len1 = len(entries1)
+ len2 = len(entries2)
+
+ result = []
+ while i1 < len1 and i2 < len2:
+ entry1 = entries1[i1]
+ entry2 = entries2[i2]
+ if entry1.path < entry2.path:
+ result.append((entry1, _NULL_ENTRY))
+ i1 += 1
+ elif entry1.path > entry2.path:
+ result.append((_NULL_ENTRY, entry2))
+ i2 += 1
+ else:
+ result.append((entry1, entry2))
+ i1 += 1
+ i2 += 1
+ for i in xrange(i1, len1):
+ result.append((entries1[i], _NULL_ENTRY))
+ for i in xrange(i2, len2):
+ result.append((_NULL_ENTRY, entries2[i]))
+ return result
+
+
+def walk_trees(store, tree1_id, tree2_id):
+ """Recursively walk all the entries of two trees.
+
+ Iteration is depth-first pre-order, as in e.g. os.walk.
+
+ :param store: An ObjectStore for looking up objects.
+ :param tree1_id: The SHA of the first Tree object to iterate, or None.
+ :param tree2_id: The SHA of the second Tree object to iterate, or None.
+ :yield: Pairs of TreeEntry objects for each pair of entries in the trees and
+ their subtrees recursively. If an entry exists in one tree but not the
+ other, the other entry will have all attributes set to None. If neither
+ entry's path is None, they are guaranteed to match.
+ """
+ # This could be fairly easily generalized to >2 trees if we find a use case.
+ mode1 = tree1_id and stat.S_IFDIR or None
+ mode2 = tree2_id and stat.S_IFDIR or None
+ todo = [(TreeEntry('', mode1, tree1_id), TreeEntry('', mode2, tree2_id))]
+ while todo:
+ entry1, entry2 = todo.pop()
+ is_tree1 = entry1.mode and stat.S_ISDIR(entry1.mode)
+ is_tree2 = entry2.mode and stat.S_ISDIR(entry2.mode)
+
+ tree1 = is_tree1 and store[entry1.sha] or None
+ tree2 = is_tree2 and store[entry2.sha] or None
+ path = entry1.path or entry2.path
+ todo.extend(reversed(_merge_entries(path, tree1, tree2)))
+ yield entry1, entry2
+
+
+def _skip_tree(entry):
+ if entry.mode is None or stat.S_ISDIR(entry.mode):
+ return _NULL_ENTRY
+ return entry
+
+
+def tree_changes(store, tree1_id, tree2_id, want_unchanged=False):
+ """Find the differences between the contents of two trees.
+
+ :param store: An ObjectStore for looking up objects.
+ :param tree1_id: The SHA of the source tree.
+ :param tree2_id: The SHA of the target tree.
+ :param want_unchanged: If True, include TreeChanges for unmodified entries
+ as well.
+ :yield: TreeChange instances for each change between the source and target
+ tree.
+ """
+ entries = walk_trees(store, tree1_id, tree2_id)
+ for entry1, entry2 in entries:
+ if entry1 == entry2 and not want_unchanged:
+ continue
+
+ # Treat entries for trees as missing.
+ entry1 = _skip_tree(entry1)
+ entry2 = _skip_tree(entry2)
+
+ if entry1 != _NULL_ENTRY and entry2 != _NULL_ENTRY:
+ if stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode):
+ # File type changed: report as delete/add.
+ yield TreeChange(CHANGE_DELETE, entry1, _NULL_ENTRY)
+ entry1 = _NULL_ENTRY
+ change_type = CHANGE_ADD
+ elif entry1 == entry2:
+ change_type = CHANGE_UNCHANGED
+ else:
+ change_type = CHANGE_MODIFY
+ elif entry1 != _NULL_ENTRY:
+ change_type = CHANGE_DELETE
+ elif entry2 != _NULL_ENTRY:
+ change_type = CHANGE_ADD
+ else:
+ # Both were None because at least one was a tree.
+ continue
+ yield TreeChange(change_type, entry1, entry2)
diff --git a/dulwich/misc.py b/dulwich/misc.py
index 7c74751..c7f00c0 100644
--- a/dulwich/misc.py
+++ b/dulwich/misc.py
@@ -105,6 +105,7 @@ try:
from collections import namedtuple
TreeEntryTuple = namedtuple('TreeEntryTuple', ['path', 'mode', 'sha'])
+ TreeChangeTuple = namedtuple('TreeChangeTuple', ['type', 'old', 'new'])
except ImportError:
# Provide manual implementations of namedtuples for Python <2.5.
# If the class definitions change, be sure to keep these in sync by running
@@ -153,3 +154,43 @@ except ImportError:
path = _property(_itemgetter(0))
mode = _property(_itemgetter(1))
sha = _property(_itemgetter(2))
+
+
+ class TreeChangeTuple(tuple):
+ 'TreeChangeTuple(type, old, new)'
+
+ __slots__ = ()
+
+ _fields = ('type', 'old', 'new')
+
+ def __new__(_cls, type, old, new):
+ return _tuple.__new__(_cls, (type, old, new))
+
+ @classmethod
+ def _make(cls, iterable, new=tuple.__new__, len=len):
+ 'Make a new TreeChangeTuple object from a sequence or iterable'
+ result = new(cls, iterable)
+ if len(result) != 3:
+ raise TypeError('Expected 3 arguments, got %d' % len(result))
+ return result
+
+ def __repr__(self):
+ return 'TreeChangeTuple(type=%r, old=%r, new=%r)' % self
+
+ def _asdict(t):
+ 'Return a new dict which maps field names to their values'
+ return {'type': t[0], 'old': t[1], 'new': t[2]}
+
+ def _replace(_self, **kwds):
+ 'Return a new TreeChangeTuple object replacing specified fields with new values'
+ result = _self._make(map(kwds.pop, ('type', 'old', 'new'), _self))
+ if kwds:
+ raise ValueError('Got unexpected field names: %r' % kwds.keys())
+ return result
+
+ def __getnewargs__(self):
+ return tuple(self)
+
+ type = _property(_itemgetter(0))
+ old = _property(_itemgetter(1))
+ new = _property(_itemgetter(2))
diff --git a/dulwich/tests/test_diff.py b/dulwich/tests/test_diff.py
new file mode 100644
index 0000000..e17ce49
--- /dev/null
+++ b/dulwich/tests/test_diff.py
@@ -0,0 +1,230 @@
+# diff.py -- Utilities for diffing files and trees.
+# Copyright (C) 2010 Google, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# or (at your option) a later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+"""Tests for diff utilities."""
+
+from dulwich.diff import (
+ CHANGE_ADD,
+ CHANGE_MODIFY,
+ CHANGE_DELETE,
+ CHANGE_UNCHANGED,
+ _NULL_ENTRY,
+ TreeChange,
+ _merge_entries,
+ tree_changes,
+ )
+from dulwich.index import (
+ commit_tree,
+ )
+from dulwich.object_store import (
+ MemoryObjectStore,
+ )
+from dulwich.objects import (
+ Blob,
+ )
+from dulwich.tests import (
+ TestCase,
+ )
+from dulwich.tests.utils import (
+ make_object,
+ )
+
+
+class TreeChangesTest(TestCase):
+
+ def setUp(self):
+ self.store = MemoryObjectStore()
+ self.empty_tree = self.commit_tree([])
+
+ def commit_tree(self, blobs):
+ commit_blobs = []
+ for path, blob, mode in blobs:
+ self.store.add_object(blob)
+ commit_blobs.append((path, blob.id, mode))
+ return self.store[commit_tree(self.store, commit_blobs)]
+
+ def test_merge_entries(self):
+ blob_a1 = make_object(Blob, data='a1')
+ blob_a2 = make_object(Blob, data='a2')
+ blob_b1 = make_object(Blob, data='b1')
+ blob_c2 = make_object(Blob, data='c2')
+ tree1 = self.commit_tree([('a', blob_a1, 0100644),
+ ('b', blob_b1, 0100755)])
+ tree2 = self.commit_tree([('a', blob_a2, 0100644),
+ ('c', blob_c2, 0100755)])
+
+ self.assertEqual([], _merge_entries('', self.empty_tree,
+ self.empty_tree))
+ self.assertEqual([
+ ((None, None, None), ('a', 0100644, blob_a1.id)),
+ ((None, None, None), ('b', 0100755, blob_b1.id)),
+ ], _merge_entries('', self.empty_tree, tree1))
+ self.assertEqual([
+ ((None, None, None), ('x/a', 0100644, blob_a1.id)),
+ ((None, None, None), ('x/b', 0100755, blob_b1.id)),
+ ], _merge_entries('x', self.empty_tree, tree1))
+
+ self.assertEqual([
+ (('a', 0100644, blob_a2.id), (None, None, None)),
+ (('c', 0100755, blob_c2.id), (None, None, None)),
+ ], _merge_entries('', tree2, self.empty_tree))
+
+ self.assertEqual([
+ (('a', 0100644, blob_a1.id), ('a', 0100644, blob_a2.id)),
+ (('b', 0100755, blob_b1.id), (None, None, None)),
+ ((None, None, None), ('c', 0100755, blob_c2.id)),
+ ], _merge_entries('', tree1, tree2))
+
+ self.assertEqual([
+ (('a', 0100644, blob_a2.id), ('a', 0100644, blob_a1.id)),
+ ((None, None, None), ('b', 0100755, blob_b1.id)),
+ (('c', 0100755, blob_c2.id), (None, None, None)),
+ ], _merge_entries('', tree2, tree1))
+
+ def assertChangesEqual(self, expected, tree1, tree2, **kwargs):
+ actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
+ self.assertEqual(expected, actual)
+
+ # For brevity, the following tests use tuples instead of TreeEntry objects.
+
+ def test_tree_changes_empty(self):
+ self.assertChangesEqual([], self.empty_tree, self.empty_tree)
+
+ def test_tree_changes_no_changes(self):
+ blob = make_object(Blob, data='blob')
+ tree = self.commit_tree([('a', blob, 0100644),
+ ('b/c', blob, 0100644)])
+ self.assertChangesEqual([], self.empty_tree, self.empty_tree)
+ self.assertChangesEqual([], tree, tree)
+ self.assertChangesEqual(
+ [TreeChange(CHANGE_UNCHANGED, ('a', 0100644, blob.id),
+ ('a', 0100644, blob.id)),
+ TreeChange(CHANGE_UNCHANGED, ('b/c', 0100644, blob.id),
+ ('b/c', 0100644, blob.id))],
+ tree, tree, want_unchanged=True)
+
+ def test_tree_changes_add_delete(self):
+ blob_a = make_object(Blob, data='a')
+ blob_b = make_object(Blob, data='b')
+ tree = self.commit_tree([('a', blob_a, 0100644),
+ ('x/b', blob_b, 0100755)])
+ self.assertChangesEqual(
+ [TreeChange(CHANGE_ADD, _NULL_ENTRY, ('a', 0100644, blob_a.id)),
+ TreeChange(CHANGE_ADD, _NULL_ENTRY, ('x/b', 0100755, blob_b.id))],
+ self.empty_tree, tree)
+ self.assertChangesEqual(
+ [TreeChange(CHANGE_DELETE, ('a', 0100644, blob_a.id), _NULL_ENTRY),
+ TreeChange(CHANGE_DELETE, ('x/b', 0100755, blob_b.id), _NULL_ENTRY)],
+ tree, self.empty_tree)
+
+ def test_tree_changes_modify_contents(self):
+ blob_a1 = make_object(Blob, data='a1')
+ blob_a2 = make_object(Blob, data='a2')
+ tree1 = self.commit_tree([('a', blob_a1, 0100644)])
+ tree2 = self.commit_tree([('a', blob_a2, 0100644)])
+ self.assertChangesEqual(
+ [TreeChange(CHANGE_MODIFY, ('a', 0100644, blob_a1.id),
+ ('a', 0100644, blob_a2.id))], tree1, tree2)
+
+ def test_tree_changes_modify_mode(self):
+ blob_a = make_object(Blob, data='a')
+ tree1 = self.commit_tree([('a', blob_a, 0100644)])
+ tree2 = self.commit_tree([('a', blob_a, 0100755)])
+ self.assertChangesEqual(
+ [TreeChange(CHANGE_MODIFY, ('a', 0100644, blob_a.id),
+ ('a', 0100755, blob_a.id))], tree1, tree2)
+
+ def test_tree_changes_change_type(self):
+ blob_a1 = make_object(Blob, data='a')
+ blob_a2 = make_object(Blob, data='/foo/bar')
+ tree1 = self.commit_tree([('a', blob_a1, 0100644)])
+ tree2 = self.commit_tree([('a', blob_a2, 0120000)])
+ self.assertChangesEqual(
+ [TreeChange(CHANGE_DELETE, ('a', 0100644, blob_a1.id), _NULL_ENTRY),
+ TreeChange(CHANGE_ADD, _NULL_ENTRY, ('a', 0120000, blob_a2.id))],
+ tree1, tree2)
+
+ def test_tree_changes_to_tree(self):
+ blob_a = make_object(Blob, data='a')
+ blob_x = make_object(Blob, data='x')
+ tree1 = self.commit_tree([('a', blob_a, 0100644)])
+ tree2 = self.commit_tree([('a/x', blob_x, 0100644)])
+ self.assertChangesEqual(
+ [TreeChange(CHANGE_DELETE, ('a', 0100644, blob_a.id), _NULL_ENTRY),
+ TreeChange(CHANGE_ADD, _NULL_ENTRY, ('a/x', 0100644, blob_x.id))],
+ tree1, tree2)
+
+ def test_tree_changes_complex(self):
+ blob_a_1 = make_object(Blob, data='a1_1')
+ blob_bx1_1 = make_object(Blob, data='bx1_1')
+ blob_bx2_1 = make_object(Blob, data='bx2_1')
+ blob_by1_1 = make_object(Blob, data='by1_1')
+ blob_by2_1 = make_object(Blob, data='by2_1')
+ tree1 = self.commit_tree([
+ ('a', blob_a_1, 0100644),
+ ('b/x/1', blob_bx1_1, 0100644),
+ ('b/x/2', blob_bx2_1, 0100644),
+ ('b/y/1', blob_by1_1, 0100644),
+ ('b/y/2', blob_by2_1, 0100644),
+ ])
+
+ blob_a_2 = make_object(Blob, data='a1_2')
+ blob_bx1_2 = blob_bx1_1
+ blob_by_2 = make_object(Blob, data='by_2')
+ blob_c_2 = make_object(Blob, data='c_2')
+ tree2 = self.commit_tree([
+ ('a', blob_a_2, 0100644),
+ ('b/x/1', blob_bx1_2, 0100644),
+ ('b/y', blob_by_2, 0100644),
+ ('c', blob_c_2, 0100644),
+ ])
+
+ self.assertChangesEqual(
+ [TreeChange(CHANGE_MODIFY, ('a', 0100644, blob_a_1.id),
+ ('a', 0100644, blob_a_2.id)),
+ TreeChange(CHANGE_DELETE, ('b/x/2', 0100644, blob_bx2_1.id),
+ _NULL_ENTRY),
+ TreeChange(CHANGE_ADD, _NULL_ENTRY, ('b/y', 0100644, blob_by_2.id)),
+ TreeChange(CHANGE_DELETE, ('b/y/1', 0100644, blob_by1_1.id),
+ _NULL_ENTRY),
+ TreeChange(CHANGE_DELETE, ('b/y/2', 0100644, blob_by2_1.id),
+ _NULL_ENTRY),
+ TreeChange(CHANGE_ADD, _NULL_ENTRY, ('c', 0100644, blob_c_2.id))],
+ tree1, tree2)
+
+ def test_tree_changes_name_order(self):
+ blob = make_object(Blob, data='a')
+ tree1 = self.commit_tree([
+ ('a', blob, 0100644),
+ ('a.', blob, 0100644),
+ ('a..', blob, 0100644),
+ ])
+ # Tree order is the reverse of this, so if we used tree order, 'a..'
+ # would not be merged.
+ tree2 = self.commit_tree([
+ ('a/x', blob, 0100644),
+ ('a./x', blob, 0100644),
+ ('a..', blob, 0100644),
+ ])
+
+ self.assertChangesEqual(
+ [TreeChange(CHANGE_DELETE, ('a', 0100644, blob.id), _NULL_ENTRY),
+ TreeChange(CHANGE_ADD, _NULL_ENTRY, ('a/x', 0100644, blob.id)),
+ TreeChange(CHANGE_DELETE, ('a.', 0100644, blob.id), _NULL_ENTRY),
+ TreeChange(CHANGE_ADD, _NULL_ENTRY, ('a./x', 0100644, blob.id))],
+ tree1, tree2)
--
1.7.3.2.168.gd6b63
Follow ups
References