← Back to team overview

dulwich-users team mailing list archive

[PATCH 4 of 4] objects.parse_tree: more gracefully handle invalid modes in trees

 

# HG changeset patch
# User Augie Fackler <durin42@xxxxxxxxx>
# Date 1289783885 -21600
# Node ID b469476c99d82eff40684e23ccd957815fd05282
# Parent  130bbd3c82edb682baf86c27e0fac5b9c8acb624
objects.parse_tree: more gracefully handle invalid modes in trees

Change-Id: I89c3a0095b508ad09131608e8323ecafb60f463b

diff --git a/dulwich/_objects.c b/dulwich/_objects.c
--- a/dulwich/_objects.c
+++ b/dulwich/_objects.c
@@ -36,6 +36,7 @@
 #define bytehex(x) (((x)<0xa)?('0'+(x)):('a'-0xa+(x)))
 
 static PyObject *tree_entry_cls;
+static PyObject *object_format_exception_cls;
 
 static PyObject *sha_to_pyhex(const unsigned char *sha)
 {
@@ -49,15 +50,20 @@
 	return PyString_FromStringAndSize(hexsha, 40);
 }
 
-static PyObject *py_parse_tree(PyObject *self, PyObject *args)
+static PyObject *py_parse_tree(PyObject *self, PyObject *args, PyObject *kw)
 {
 	char *text, *start, *end;
-	int len, namelen;
-	PyObject *ret, *item, *name;
+	int len, namelen, strict;
+	PyObject *ret, *item, *name, *py_strict = NULL;
+	static char *kwlist[] = {"text", "strict", NULL};
 
-	if (!PyArg_ParseTuple(args, "s#", &text, &len))
+	if (!PyArg_ParseTupleAndKeywords(args, kw, "s#|O", kwlist,
+	                                 &text, &len, &py_strict))
 		return NULL;
 
+
+	strict = py_strict ?  PyObject_IsTrue(py_strict) : 0;
+
 	/* TODO: currently this returns a list; if memory usage is a concern,
 	 * consider rewriting as a custom iterator object */
 	ret = PyList_New(0);
@@ -71,6 +77,13 @@
 
 	while (text < end) {
 		long mode;
+		if (strict && text[0] == '0') {
+			PyErr_SetString(object_format_exception_cls,
+			                "Illegal leading zero on mode");
+			Py_DECREF(ret);
+			return NULL;
+		}
+
 		mode = strtol(text, &text, 8);
 
 		if (*text != ' ') {
@@ -222,7 +235,8 @@
 }
 
 static PyMethodDef py_objects_methods[] = {
-	{ "parse_tree", (PyCFunction)py_parse_tree, METH_VARARGS, NULL },
+	{ "parse_tree", (PyCFunction)py_parse_tree, METH_VARARGS | METH_KEYWORDS,
+	  NULL },
 	{ "sorted_tree_items", (PyCFunction)py_sorted_tree_items, METH_O, NULL },
 	{ NULL, NULL, 0, NULL }
 };
@@ -230,12 +244,23 @@
 PyMODINIT_FUNC
 init_objects(void)
 {
-	PyObject *m, *objects_mod;
+	PyObject *m, *objects_mod, *errors_mod;
 
 	m = Py_InitModule3("_objects", py_objects_methods, NULL);
 	if (m == NULL)
 		return;
 
+
+	errors_mod = PyImport_ImportModule("dulwich.errors");
+	if (errors_mod == NULL)
+		return;
+
+	object_format_exception_cls = PyObject_GetAttrString(
+		errors_mod, "ObjectFormatException");
+	Py_DECREF(errors_mod);
+	if (object_format_exception_cls == NULL)
+		return;
+
 	/* This is a circular import but should be safe since this module is
 	 * imported at at the very bottom of objects.py. */
 	objects_mod = PyImport_ImportModule("dulwich.objects");
diff --git a/dulwich/objects.py b/dulwich/objects.py
--- a/dulwich/objects.py
+++ b/dulwich/objects.py
@@ -694,18 +694,20 @@
         return TreeEntry(posixpath.join(path, self.path), self.mode, self.sha)
 
 
-def parse_tree(text):
+def parse_tree(text, strict=False):
     """Parse a tree text.
 
     :param text: Serialized text to parse
     :return: iterator of tuples of (name, mode, sha)
+    :raise ObjectFormatException: if the object was malformed in some way
     """
     count = 0
     l = len(text)
     while count < l:
         mode_end = text.index(' ', count)
         mode_text = text[count:mode_end]
-        assert mode_text[0] != '0'
+        if strict and mode_text.startswith('0'):
+            raise ObjectFormatException("Invalid mode '%s'" % mode_text)
         try:
             mode = int(mode_text, 8)
         except ValueError:
@@ -869,7 +871,8 @@
                          stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK,
                          # TODO: optionally exclude as in git fsck --strict
                          stat.S_IFREG | 0664)
-        for name, mode, sha in parse_tree("".join(self._chunked_text)):
+        for name, mode, sha in parse_tree("".join(self._chunked_text),
+                                          True):
             check_hexsha(sha, 'invalid sha %s' % sha)
             if '/' in name or name in ('', '.', '..'):
                 raise ObjectFormatException('invalid name %s' % name)
@@ -968,7 +971,7 @@
         self._parents = []
         self._extra = []
         self._author = None
-        for field, value in parse_commit("".join(self._chunked_text)):
+        for field, value in parse_commit(''.join(self._chunked_text)):
             if field == _TREE_HEADER:
                 self._tree = value
             elif field == _PARENT_HEADER:
diff --git a/dulwich/tests/test_objects.py b/dulwich/tests/test_objects.py
--- a/dulwich/tests/test_objects.py
+++ b/dulwich/tests/test_objects.py
@@ -23,6 +23,7 @@
 
 
 from cStringIO import StringIO
+import binascii
 import datetime
 import os
 import stat
@@ -465,6 +466,16 @@
         o = Tree.from_path(hex_to_filename(dir, tree_sha))
         self.assertEquals([('a', 0100644, a_sha), ('b', 0100644, b_sha)],
                           list(parse_tree(o.as_raw_string())))
+        # test a broken tree that has a leading 0 on the file mode
+        broken_tree = '0100644 foo\0' + hex_to_sha(a_sha)
+
+        def eval_parse_tree(*args, **kwargs):
+          return list(parse_tree(*args, **kwargs))
+
+        self.assertEquals([('foo', 0100644, a_sha)],
+                          eval_parse_tree(broken_tree))
+        self.assertRaises(ObjectFormatException,
+                          eval_parse_tree, broken_tree, strict=True)
 
     def test_parse_tree(self):
         self._do_test_parse_tree(_parse_tree_py)
@@ -520,6 +531,8 @@
         # TODO more whitelisted modes
         self.assertCheckFails(t, '123456 a\0%s' % sha)
         self.assertCheckFails(t, '123abc a\0%s' % sha)
+        # should fail check, but parses ok
+        self.assertCheckFails(t, '0100644 foo\0' + sha)
 
         # shas
         self.assertCheckFails(t, '100644 a\0%s' % ('x' * 5))



References