dulwich-users team mailing list archive
-
dulwich-users team
-
Mailing list archive
-
Message #00343
[PATCH 17/24] diff_tree: C implementation of _merge_entries.
From: Dave Borowitz <dborowitz@xxxxxxxxxx>
Change-Id: Idd816c7fbec69e7a81ab9fe9a83e897bc6bdf506
---
dulwich/_diff_tree.c | 237 +++++++++++++++++++++++++++++++++++++++
dulwich/diff_tree.py | 3 +-
dulwich/tests/test_diff_tree.py | 35 +++++--
3 files changed, 266 insertions(+), 9 deletions(-)
diff --git a/dulwich/_diff_tree.c b/dulwich/_diff_tree.c
index 9b87db4..aaefb46 100644
--- a/dulwich/_diff_tree.c
+++ b/dulwich/_diff_tree.c
@@ -20,6 +20,225 @@
#include <Python.h>
#include <sys/stat.h>
+#if (PY_VERSION_HEX < 0x02050000)
+typedef int Py_ssize_t;
+#endif
+
+#if (PY_VERSION_HEX < 0x02060000)
+#define Py_SIZE(x) Py_Size(x)
+#endif
+
+static PyObject *tree_entry_cls, *null_entry;
+
+/**
+ * Free an array of PyObject pointers, decrementing any references.
+ */
+static void free_objects(PyObject **objs, Py_ssize_t n) {
+ Py_ssize_t i;
+ for (i = 0; i < n; i++)
+ Py_XDECREF(objs[i]);
+ PyMem_Free(objs);
+}
+
+/**
+ * Get the entries of a tree, prepending the given path.
+ *
+ * :param path: The path to prepend, without trailing slashes.
+ * :param path_len: The length of path.
+ * :param tree: The Tree object to iterate.
+ * :param n: Set to the length of result.
+ * :return: A (C) array of PyObject pointers to TreeEntry objects for each path
+ * in tree.
+ */
+static PyObject **tree_entries(char *path, Py_ssize_t path_len, PyObject *tree,
+ Py_ssize_t *n) {
+ PyObject *iteritems, *items, **result = NULL;
+ PyObject *old_entry, *name, *sha;
+ Py_ssize_t i = 0, name_len, new_path_len;
+ char *new_path;
+
+ if (tree == Py_None) {
+ *n = 0;
+ result = PyMem_New(PyObject*, 0);
+ if (!result) {
+ PyErr_SetNone(PyExc_MemoryError);
+ return NULL;
+ }
+ return result;
+ }
+
+ iteritems = PyObject_GetAttrString(tree, "iteritems");
+ if (!iteritems)
+ return NULL;
+ items = PyObject_CallFunctionObjArgs(iteritems, Py_True, NULL);
+ Py_DECREF(iteritems);
+ if (!items) {
+ return NULL;
+ }
+ /* The C implementation of iteritems returns a list, so depend on that. */
+ if (!PyList_Check(items)) {
+ PyErr_SetString(PyExc_TypeError,
+ "Tree.iteritems() did not return a list");
+ return NULL;
+ }
+
+ *n = PyList_Size(items);
+ result = PyMem_New(PyObject*, *n);
+ if (!result) {
+ PyErr_SetNone(PyExc_MemoryError);
+ goto error;
+ }
+ for (i = 0; i < *n; i++) {
+ old_entry = PyList_GetItem(items, i);
+ if (!old_entry)
+ goto error;
+ sha = PyTuple_GetItem(old_entry, 2);
+ if (!sha)
+ goto error;
+ name = PyTuple_GET_ITEM(old_entry, 0);
+ name_len = PyString_Size(name);
+ if (PyErr_Occurred())
+ goto error;
+
+ new_path_len = name_len;
+ if (path_len)
+ new_path_len += path_len + 1;
+ new_path = PyMem_Malloc(new_path_len);
+ if (!new_path) {
+ PyErr_SetNone(PyExc_MemoryError);
+ goto error;
+ }
+ if (path_len) {
+ memcpy(new_path, path, path_len);
+ new_path[path_len] = '/';
+ memcpy(new_path + path_len + 1, PyString_AS_STRING(name), name_len);
+ } else {
+ memcpy(new_path, PyString_AS_STRING(name), name_len);
+ }
+
+ result[i] = PyObject_CallFunction(tree_entry_cls, "s#OO", new_path,
+ new_path_len, PyTuple_GET_ITEM(old_entry, 1), sha);
+ PyMem_Free(new_path);
+ if (!result[i]) {
+ goto error;
+ }
+ }
+ Py_DECREF(items);
+ return result;
+
+error:
+ free_objects(result, i);
+ Py_DECREF(items);
+ return NULL;
+}
+
+/**
+ * Use strcmp to compare the paths of two TreeEntry objects.
+ */
+static int entry_path_cmp(PyObject *entry1, PyObject *entry2) {
+ PyObject *path1 = NULL, *path2 = NULL;
+ int result;
+
+ path1 = PyObject_GetAttrString(entry1, "path");
+ if (!path1)
+ goto done;
+ if (!PyString_Check(path1)) {
+ PyErr_SetString(PyExc_TypeError, "path is not a string");
+ goto done;
+ }
+
+ path2 = PyObject_GetAttrString(entry2, "path");
+ if (!path2)
+ goto done;
+ if (!PyString_Check(path2)) {
+ PyErr_SetString(PyExc_TypeError, "path is not a string");
+ goto done;
+ }
+
+ result = strcmp(PyString_AS_STRING(path1), PyString_AS_STRING(path2));
+
+done:
+ Py_XDECREF(path1);
+ Py_XDECREF(path2);
+ return result;
+}
+
+static PyObject *py_merge_entries(PyObject *self, PyObject *args)
+{
+ PyObject *path, *tree1, *tree2, **entries1 = NULL, **entries2 = NULL;
+ PyObject *e1, *e2, *pair, *result = NULL;
+ Py_ssize_t path_len, n1 = 0, n2 = 0, i1 = 0, i2 = 0;
+ char *path_str;
+ int cmp;
+
+ if (!PyArg_ParseTuple(args, "OOO", &path, &tree1, &tree2))
+ return NULL;
+
+ path_str = PyString_AsString(path);
+ if (!path_str) {
+ PyErr_SetString(PyExc_TypeError, "path is not a string");
+ return NULL;
+ }
+ path_len = PyString_GET_SIZE(path);
+
+ entries1 = tree_entries(path_str, path_len, tree1, &n1);
+ if (!entries1)
+ goto error;
+ entries2 = tree_entries(path_str, path_len, tree2, &n2);
+ if (!entries2)
+ goto error;
+
+ result = PyList_New(n1 + n2);
+ if (!result)
+ goto error;
+ /* PyList_New sets the len of the list, not its allocated size, so we
+ * need to trim it to the size we actually use. */
+ Py_SIZE(result) = 0;
+
+ while (i1 < n1 && i2 < n2) {
+ cmp = entry_path_cmp(entries1[i1], entries2[i2]);
+ if (PyErr_Occurred())
+ goto error;
+ if (!cmp) {
+ e1 = entries1[i1++];
+ e2 = entries2[i2++];
+ } else if (cmp < 0) {
+ e1 = entries1[i1++];
+ e2 = null_entry;
+ } else {
+ e1 = null_entry;
+ e2 = entries2[i2++];
+ }
+ pair = PyTuple_Pack(2, e1, e2);
+ if (!pair)
+ goto error;
+ PyList_SET_ITEM(result, Py_SIZE(result)++, pair);
+ }
+
+ while (i1 < n1) {
+ pair = PyTuple_Pack(2, entries1[i1++], null_entry);
+ if (!pair)
+ goto error;
+ PyList_SET_ITEM(result, Py_SIZE(result)++, pair);
+ }
+ while (i2 < n2) {
+ pair = PyTuple_Pack(2, null_entry, entries2[i2++]);
+ if (!pair)
+ goto error;
+ PyList_SET_ITEM(result, Py_SIZE(result)++, pair);
+ }
+ goto done;
+
+error:
+ Py_XDECREF(result);
+ result = NULL;
+
+done:
+ free_objects(entries1, n1);
+ free_objects(entries2, n2);
+ return result;
+}
+
static PyObject *py_is_tree(PyObject *self, PyObject *args)
{
PyObject *entry, *mode, *result;
@@ -49,6 +268,7 @@ static PyObject *py_is_tree(PyObject *self, PyObject *args)
static PyMethodDef py_diff_tree_methods[] = {
{ "_is_tree", (PyCFunction)py_is_tree, METH_VARARGS, NULL },
+ { "_merge_entries", (PyCFunction)py_merge_entries, METH_VARARGS, NULL },
{ NULL, NULL, 0, NULL }
};
@@ -59,4 +279,21 @@ init_diff_tree(void)
m = Py_InitModule("_diff_tree", py_diff_tree_methods);
if (!m)
return;
+
+ objects_mod = PyImport_ImportModule("dulwich.objects");
+ if (!objects_mod)
+ return;
+
+ tree_entry_cls = PyObject_GetAttrString(objects_mod, "TreeEntry");
+ Py_DECREF(objects_mod);
+ if (!tree_entry_cls)
+ return;
+
+ diff_mod = PyImport_ImportModule("dulwich.diff");
+ if (!diff_mod)
+ return;
+ null_entry = PyObject_GetAttrString(diff_mod, "_NULL_ENTRY");
+ Py_DECREF(diff_mod);
+ if (!null_entry)
+ return;
}
diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py
index 40c5bc0..4381b64 100644
--- a/dulwich/diff_tree.py
+++ b/dulwich/diff_tree.py
@@ -396,8 +396,9 @@ class RenameDetector(object):
# Hold on to the pure-python implementations for testing.
_is_tree_py = _is_tree
+_merge_entries_py = _merge_entries
try:
# Try to import C versions
- from dulwich._diff_tree import _is_tree
+ from dulwich._diff_tree import _is_tree, _merge_entries
except ImportError:
pass
diff --git a/dulwich/tests/test_diff_tree.py b/dulwich/tests/test_diff_tree.py
index 1230f2b..1a9bd5c 100644
--- a/dulwich/tests/test_diff_tree.py
+++ b/dulwich/tests/test_diff_tree.py
@@ -25,6 +25,7 @@ from dulwich.diff_tree import (
CHANGE_UNCHANGED,
TreeChange,
_merge_entries,
+ _merge_entries_py,
tree_changes,
_count_blocks,
_similarity_score,
@@ -46,6 +47,7 @@ from dulwich.objects import (
ShaFile,
Blob,
TreeEntry,
+ Tree,
)
from dulwich.tests import (
TestCase,
@@ -84,7 +86,12 @@ class DiffTestCase(TestCase):
class TreeChangesTest(DiffTestCase):
- def test_merge_entries(self):
+ def assertMergeFails(self, merge_entries, name, mode, sha):
+ t = Tree()
+ t[name] = (mode, sha)
+ self.assertRaises(TypeError, merge_entries, '', t, t)
+
+ def _do_test_merge_entries(self, merge_entries):
blob_a1 = make_object(Blob, data='a1')
blob_a2 = make_object(Blob, data='a2')
blob_b1 = make_object(Blob, data='b1')
@@ -94,33 +101,45 @@ class TreeChangesTest(DiffTestCase):
tree2 = self.commit_tree([('a', blob_a2, 0100644),
('c', blob_c2, 0100755)])
- self.assertEqual([], _merge_entries('', self.empty_tree,
- self.empty_tree))
+ self.assertEqual([], merge_entries('', self.empty_tree,
+ self.empty_tree))
self.assertEqual([
((None, None, None), ('a', 0100644, blob_a1.id)),
((None, None, None), ('b', 0100755, blob_b1.id)),
- ], _merge_entries('', self.empty_tree, tree1))
+ ], merge_entries('', self.empty_tree, tree1))
self.assertEqual([
((None, None, None), ('x/a', 0100644, blob_a1.id)),
((None, None, None), ('x/b', 0100755, blob_b1.id)),
- ], _merge_entries('x', self.empty_tree, tree1))
+ ], merge_entries('x', self.empty_tree, tree1))
self.assertEqual([
(('a', 0100644, blob_a2.id), (None, None, None)),
(('c', 0100755, blob_c2.id), (None, None, None)),
- ], _merge_entries('', tree2, self.empty_tree))
+ ], merge_entries('', tree2, self.empty_tree))
self.assertEqual([
(('a', 0100644, blob_a1.id), ('a', 0100644, blob_a2.id)),
(('b', 0100755, blob_b1.id), (None, None, None)),
((None, None, None), ('c', 0100755, blob_c2.id)),
- ], _merge_entries('', tree1, tree2))
+ ], merge_entries('', tree1, tree2))
self.assertEqual([
(('a', 0100644, blob_a2.id), ('a', 0100644, blob_a1.id)),
((None, None, None), ('b', 0100755, blob_b1.id)),
(('c', 0100755, blob_c2.id), (None, None, None)),
- ], _merge_entries('', tree2, tree1))
+ ], merge_entries('', tree2, tree1))
+
+ self.assertMergeFails(merge_entries, 0xdeadbeef, 0100644, '1' * 40)
+ self.assertMergeFails(merge_entries, 'a', 'deadbeef', '1' * 40)
+ self.assertMergeFails(merge_entries, 'a', 0100644, 0xdeadbeef)
+
+ def test_merge_entries(self):
+ self._do_test_merge_entries(_merge_entries_py)
+
+ def test_merge_entries_extension(self):
+ if _merge_entries is _merge_entries_py:
+ raise TestSkipped('merge_entries extension not found')
+ self._do_test_merge_entries(_merge_entries)
def _do_test_is_tree(self, is_tree):
self.assertFalse(is_tree(TreeEntry(None, None, None)))
--
1.7.3.2.168.gd6b63
References