← Back to team overview

dulwich-users team mailing list archive

[PATCH 21/28] diff: C implementation of _merge_entries.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: Idd816c7fbec69e7a81ab9fe9a83e897bc6bdf506
---
 dulwich/_diff.c            |  237 ++++++++++++++++++++++++++++++++++++++++++++
 dulwich/diff.py            |    3 +-
 dulwich/tests/test_diff.py |   35 +++++--
 3 files changed, 266 insertions(+), 9 deletions(-)

diff --git a/dulwich/_diff.c b/dulwich/_diff.c
index 5da7d67..4838a91 100644
--- a/dulwich/_diff.c
+++ b/dulwich/_diff.c
@@ -20,6 +20,225 @@
 #include <Python.h>
 #include <sys/stat.h>
 
+#if (PY_VERSION_HEX < 0x02050000)
+typedef int Py_ssize_t;
+#endif
+
+#if (PY_VERSION_HEX < 0x02060000)
+#define Py_SIZE(x) Py_Size(x)
+#endif
+
+static PyObject *tree_entry_cls, *null_entry;
+
+/**
+ * Free an array of PyObject pointers, decrementing any references.
+ */
+static void free_objects(PyObject **objs, Py_ssize_t n) {
+	Py_ssize_t i;
+	for (i = 0; i < n; i++)
+		Py_XDECREF(objs[i]);
+	PyMem_Free(objs);
+}
+
+/**
+ * Get the entries of a tree, prepending the given path.
+ *
+ * :param path: The path to prepend, without trailing slashes.
+ * :param path_len: The length of path.
+ * :param tree: The Tree object to iterate.
+ * :param n: Set to the length of result.
+ * :return: A (C) array of PyObject pointers to TreeEntry objects for each path
+ *     in tree.
+ */
+static PyObject **tree_entries(char *path, Py_ssize_t path_len, PyObject *tree,
+		Py_ssize_t *n) {
+	PyObject *iteritems, *items, **result = NULL;
+	PyObject *old_entry, *name, *sha;
+	Py_ssize_t i = 0, name_len, new_path_len;
+	char *new_path;
+
+	if (tree == Py_None) {
+		*n = 0;
+		result = PyMem_New(PyObject*, 0);
+		if (!result) {
+			PyErr_SetNone(PyExc_MemoryError);
+			return NULL;
+		}
+		return result;
+	}
+
+	iteritems = PyObject_GetAttrString(tree, "iteritems");
+	if (!iteritems)
+		return NULL;
+	items = PyObject_CallFunctionObjArgs(iteritems, Py_True, NULL);
+	Py_DECREF(iteritems);
+	if (!items) {
+		return NULL;
+	}
+	/* The C implementation of iteritems returns a list, so depend on that. */
+	if (!PyList_Check(items)) {
+		PyErr_SetString(PyExc_TypeError,
+			"Tree.iteritems() did not return a list");
+		return NULL;
+	}
+
+	*n = PyList_Size(items);
+	result = PyMem_New(PyObject*, *n);
+	if (!result) {
+		PyErr_SetNone(PyExc_MemoryError);
+		goto error;
+	}
+	for (i = 0; i < *n; i++) {
+		old_entry = PyList_GetItem(items, i);
+		if (!old_entry)
+			goto error;
+		sha = PyTuple_GetItem(old_entry, 2);
+		if (!sha)
+			goto error;
+		name = PyTuple_GET_ITEM(old_entry, 0);
+		name_len = PyString_Size(name);
+		if (PyErr_Occurred())
+			goto error;
+
+		new_path_len = name_len;
+		if (path_len)
+			new_path_len += path_len + 1;
+		new_path = PyMem_Malloc(new_path_len);
+		if (!new_path) {
+			PyErr_SetNone(PyExc_MemoryError);
+			goto error;
+		}
+		if (path_len) {
+			memcpy(new_path, path, path_len);
+			new_path[path_len] = '/';
+			memcpy(new_path + path_len + 1, PyString_AS_STRING(name), name_len);
+		} else {
+			memcpy(new_path, PyString_AS_STRING(name), name_len);
+		}
+
+		result[i] = PyObject_CallFunction(tree_entry_cls, "s#OO", new_path,
+			new_path_len, PyTuple_GET_ITEM(old_entry, 1), sha);
+		PyMem_Free(new_path);
+		if (!result[i]) {
+			goto error;
+		}
+	}
+	Py_DECREF(items);
+	return result;
+
+error:
+	free_objects(result, i);
+	Py_DECREF(items);
+	return NULL;
+}
+
+/**
+ * Use strcmp to compare the paths of two TreeEntry objects.
+ */
+static int entry_path_cmp(PyObject *entry1, PyObject *entry2) {
+	PyObject *path1 = NULL, *path2 = NULL;
+	int result;
+
+	path1 = PyObject_GetAttrString(entry1, "path");
+	if (!path1)
+		goto done;
+	if (!PyString_Check(path1)) {
+		PyErr_SetString(PyExc_TypeError, "path is not a string");
+		goto done;
+	}
+
+	path2 = PyObject_GetAttrString(entry2, "path");
+	if (!path2)
+		goto done;
+	if (!PyString_Check(path2)) {
+		PyErr_SetString(PyExc_TypeError, "path is not a string");
+		goto done;
+	}
+
+	result = strcmp(PyString_AS_STRING(path1), PyString_AS_STRING(path2));
+
+done:
+	Py_XDECREF(path1);
+	Py_XDECREF(path2);
+	return result;
+}
+
+static PyObject *py_merge_entries(PyObject *self, PyObject *args)
+{
+	PyObject *path, *tree1, *tree2, **entries1 = NULL, **entries2 = NULL;
+	PyObject *e1, *e2, *pair, *result = NULL;
+	Py_ssize_t path_len, n1 = 0, n2 = 0, i1 = 0, i2 = 0;
+	char *path_str;
+	int cmp;
+
+	if (!PyArg_ParseTuple(args, "OOO", &path, &tree1, &tree2))
+		return NULL;
+
+	path_str = PyString_AsString(path);
+	if (!path_str) {
+		PyErr_SetString(PyExc_TypeError, "path is not a string");
+		return NULL;
+	}
+	path_len = PyString_GET_SIZE(path);
+
+	entries1 = tree_entries(path_str, path_len, tree1, &n1);
+	if (!entries1)
+		goto error;
+	entries2 = tree_entries(path_str, path_len, tree2, &n2);
+	if (!entries2)
+		goto error;
+
+	result = PyList_New(n1 + n2);
+	if (!result)
+		goto error;
+	/* PyList_New sets the len of the list, not its allocated size, so we
+	 * need to trim it to the size we actually use. */
+	Py_SIZE(result) = 0;
+
+	while (i1 < n1 && i2 < n2) {
+		cmp = entry_path_cmp(entries1[i1], entries2[i2]);
+		if (PyErr_Occurred())
+			goto error;
+		if (!cmp) {
+			e1 = entries1[i1++];
+			e2 = entries2[i2++];
+		} else if (cmp < 0) {
+			e1 = entries1[i1++];
+			e2 = null_entry;
+		} else {
+			e1 = null_entry;
+			e2 = entries2[i2++];
+		}
+		pair = PyTuple_Pack(2, e1, e2);
+		if (!pair)
+			goto error;
+		PyList_SET_ITEM(result, Py_SIZE(result)++, pair);
+	}
+
+	while (i1 < n1) {
+		pair = PyTuple_Pack(2, entries1[i1++], null_entry);
+		if (!pair)
+			goto error;
+		PyList_SET_ITEM(result, Py_SIZE(result)++, pair);
+	}
+	while (i2 < n2) {
+		pair = PyTuple_Pack(2, null_entry, entries2[i2++]);
+		if (!pair)
+			goto error;
+		PyList_SET_ITEM(result, Py_SIZE(result)++, pair);
+	}
+	goto done;
+
+error:
+	Py_XDECREF(result);
+	result = NULL;
+
+done:
+	free_objects(entries1, n1);
+	free_objects(entries2, n2);
+	return result;
+}
+
 static PyObject *py_is_tree(PyObject *self, PyObject *args)
 {
 	PyObject *entry, *mode, *result;
@@ -49,6 +268,7 @@ static PyObject *py_is_tree(PyObject *self, PyObject *args)
 
 static PyMethodDef py_diff_methods[] = {
 	{ "_is_tree", (PyCFunction)py_is_tree, METH_VARARGS, NULL },
+	{ "_merge_entries", (PyCFunction)py_merge_entries, METH_VARARGS, NULL },
 	{ NULL, NULL, 0, NULL }
 };
 
@@ -59,4 +279,21 @@ init_diff(void)
 	m = Py_InitModule("_diff", py_diff_methods);
 	if (!m)
 		return;
+
+	objects_mod = PyImport_ImportModule("dulwich.objects");
+	if (!objects_mod)
+		return;
+
+	tree_entry_cls = PyObject_GetAttrString(objects_mod, "TreeEntry");
+	Py_DECREF(objects_mod);
+	if (!tree_entry_cls)
+		return;
+
+	diff_mod = PyImport_ImportModule("dulwich.diff");
+	if (!diff_mod)
+		return;
+	null_entry = PyObject_GetAttrString(diff_mod, "_NULL_ENTRY");
+	Py_DECREF(diff_mod);
+	if (!null_entry)
+		return;
 }
diff --git a/dulwich/diff.py b/dulwich/diff.py
index ee32989..88274fd 100644
--- a/dulwich/diff.py
+++ b/dulwich/diff.py
@@ -396,8 +396,9 @@ class RenameDetector(object):
 
 # Hold on to the pure-python implementations for testing.
 _is_tree_py = _is_tree
+_merge_entries_py = _merge_entries
 try:
     # Try to import C versions
-    from dulwich._diff import _is_tree
+    from dulwich._diff import _is_tree, _merge_entries
 except ImportError:
     pass
diff --git a/dulwich/tests/test_diff.py b/dulwich/tests/test_diff.py
index 9457a79..4c02659 100644
--- a/dulwich/tests/test_diff.py
+++ b/dulwich/tests/test_diff.py
@@ -25,6 +25,7 @@ from dulwich.diff import (
     CHANGE_UNCHANGED,
     TreeChange,
     _merge_entries,
+    _merge_entries_py,
     tree_changes,
     _count_blocks,
     _similarity_score,
@@ -46,6 +47,7 @@ from dulwich.objects import (
     ShaFile,
     Blob,
     TreeEntry,
+    Tree,
     )
 from dulwich.tests import (
     TestCase,
@@ -84,7 +86,12 @@ class DiffTestCase(TestCase):
 
 class TreeChangesTest(DiffTestCase):
 
-    def test_merge_entries(self):
+    def assertMergeFails(self, merge_entries, name, mode, sha):
+        t = Tree()
+        t[name] = (mode, sha)
+        self.assertRaises(TypeError, merge_entries, '', t, t)
+
+    def _do_test_merge_entries(self, merge_entries):
         blob_a1 = make_object(Blob, data='a1')
         blob_a2 = make_object(Blob, data='a2')
         blob_b1 = make_object(Blob, data='b1')
@@ -94,33 +101,45 @@ class TreeChangesTest(DiffTestCase):
         tree2 = self.commit_tree([('a', blob_a2, 0100644),
                                   ('c', blob_c2, 0100755)])
 
-        self.assertEqual([], _merge_entries('', self.empty_tree,
-                                            self.empty_tree))
+        self.assertEqual([], merge_entries('', self.empty_tree,
+                                           self.empty_tree))
         self.assertEqual([
           ((None, None, None), ('a', 0100644, blob_a1.id)),
           ((None, None, None), ('b', 0100755, blob_b1.id)),
-          ], _merge_entries('', self.empty_tree, tree1))
+          ], merge_entries('', self.empty_tree, tree1))
         self.assertEqual([
           ((None, None, None), ('x/a', 0100644, blob_a1.id)),
           ((None, None, None), ('x/b', 0100755, blob_b1.id)),
-          ], _merge_entries('x', self.empty_tree, tree1))
+          ], merge_entries('x', self.empty_tree, tree1))
 
         self.assertEqual([
           (('a', 0100644, blob_a2.id), (None, None, None)),
           (('c', 0100755, blob_c2.id), (None, None, None)),
-          ], _merge_entries('', tree2, self.empty_tree))
+          ], merge_entries('', tree2, self.empty_tree))
 
         self.assertEqual([
           (('a', 0100644, blob_a1.id), ('a', 0100644, blob_a2.id)),
           (('b', 0100755, blob_b1.id), (None, None, None)),
           ((None, None, None), ('c', 0100755, blob_c2.id)),
-          ], _merge_entries('', tree1, tree2))
+          ], merge_entries('', tree1, tree2))
 
         self.assertEqual([
           (('a', 0100644, blob_a2.id), ('a', 0100644, blob_a1.id)),
           ((None, None, None), ('b', 0100755, blob_b1.id)),
           (('c', 0100755, blob_c2.id), (None, None, None)),
-          ], _merge_entries('', tree2, tree1))
+          ], merge_entries('', tree2, tree1))
+
+        self.assertMergeFails(merge_entries, 0xdeadbeef, 0100644, '1' * 40)
+        self.assertMergeFails(merge_entries, 'a', 'deadbeef', '1' * 40)
+        self.assertMergeFails(merge_entries, 'a', 0100644, 0xdeadbeef)
+
+    def test_merge_entries(self):
+        self._do_test_merge_entries(_merge_entries_py)
+
+    def test_merge_entries_extension(self):
+        if _merge_entries is _merge_entries_py:
+            raise TestSkipped('merge_entries extension not found')
+        self._do_test_merge_entries(_merge_entries)
 
     def _do_test_is_tree(self, is_tree):
         self.assertFalse(is_tree(TreeEntry(None, None, None)))
-- 
1.7.3.2.168.gd6b63




References