dulwich-users team mailing list archive
-
dulwich-users team
-
Mailing list archive
-
Message #00276
[PATCH 4 of 4] objects.parse_tree: more gracefully handle invalid modes in trees
# HG changeset patch
# User Augie Fackler <durin42@xxxxxxxxx>
# Date 1289783885 -21600
# Node ID b469476c99d82eff40684e23ccd957815fd05282
# Parent 130bbd3c82edb682baf86c27e0fac5b9c8acb624
objects.parse_tree: more gracefully handle invalid modes in trees
Change-Id: I89c3a0095b508ad09131608e8323ecafb60f463b
diff --git a/dulwich/_objects.c b/dulwich/_objects.c
--- a/dulwich/_objects.c
+++ b/dulwich/_objects.c
@@ -36,6 +36,7 @@
#define bytehex(x) (((x)<0xa)?('0'+(x)):('a'-0xa+(x)))
static PyObject *tree_entry_cls;
+static PyObject *object_format_exception_cls;
static PyObject *sha_to_pyhex(const unsigned char *sha)
{
@@ -49,15 +50,20 @@
return PyString_FromStringAndSize(hexsha, 40);
}
-static PyObject *py_parse_tree(PyObject *self, PyObject *args)
+static PyObject *py_parse_tree(PyObject *self, PyObject *args, PyObject *kw)
{
char *text, *start, *end;
- int len, namelen;
- PyObject *ret, *item, *name;
+ int len, namelen, strict;
+ PyObject *ret, *item, *name, *py_strict = NULL;
+ static char *kwlist[] = {"text", "strict", NULL};
- if (!PyArg_ParseTuple(args, "s#", &text, &len))
+ if (!PyArg_ParseTupleAndKeywords(args, kw, "s#|O", kwlist,
+ &text, &len, &py_strict))
return NULL;
+
+ strict = py_strict ? PyObject_IsTrue(py_strict) : 0;
+
/* TODO: currently this returns a list; if memory usage is a concern,
* consider rewriting as a custom iterator object */
ret = PyList_New(0);
@@ -71,6 +77,13 @@
while (text < end) {
long mode;
+ if (strict && text[0] == '0') {
+ PyErr_SetString(object_format_exception_cls,
+ "Illegal leading zero on mode");
+ Py_DECREF(ret);
+ return NULL;
+ }
+
mode = strtol(text, &text, 8);
if (*text != ' ') {
@@ -222,7 +235,8 @@
}
static PyMethodDef py_objects_methods[] = {
- { "parse_tree", (PyCFunction)py_parse_tree, METH_VARARGS, NULL },
+ { "parse_tree", (PyCFunction)py_parse_tree, METH_VARARGS | METH_KEYWORDS,
+ NULL },
{ "sorted_tree_items", (PyCFunction)py_sorted_tree_items, METH_O, NULL },
{ NULL, NULL, 0, NULL }
};
@@ -230,12 +244,23 @@
PyMODINIT_FUNC
init_objects(void)
{
- PyObject *m, *objects_mod;
+ PyObject *m, *objects_mod, *errors_mod;
m = Py_InitModule3("_objects", py_objects_methods, NULL);
if (m == NULL)
return;
+
+ errors_mod = PyImport_ImportModule("dulwich.errors");
+ if (errors_mod == NULL)
+ return;
+
+ object_format_exception_cls = PyObject_GetAttrString(
+ errors_mod, "ObjectFormatException");
+ Py_DECREF(errors_mod);
+ if (object_format_exception_cls == NULL)
+ return;
+
/* This is a circular import but should be safe since this module is
* imported at at the very bottom of objects.py. */
objects_mod = PyImport_ImportModule("dulwich.objects");
diff --git a/dulwich/objects.py b/dulwich/objects.py
--- a/dulwich/objects.py
+++ b/dulwich/objects.py
@@ -694,18 +694,20 @@
return TreeEntry(posixpath.join(path, self.path), self.mode, self.sha)
-def parse_tree(text):
+def parse_tree(text, strict=False):
"""Parse a tree text.
:param text: Serialized text to parse
:return: iterator of tuples of (name, mode, sha)
+ :raise ObjectFormatException: if the object was malformed in some way
"""
count = 0
l = len(text)
while count < l:
mode_end = text.index(' ', count)
mode_text = text[count:mode_end]
- assert mode_text[0] != '0'
+ if strict and mode_text.startswith('0'):
+ raise ObjectFormatException("Invalid mode '%s'" % mode_text)
try:
mode = int(mode_text, 8)
except ValueError:
@@ -869,7 +871,8 @@
stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK,
# TODO: optionally exclude as in git fsck --strict
stat.S_IFREG | 0664)
- for name, mode, sha in parse_tree("".join(self._chunked_text)):
+ for name, mode, sha in parse_tree("".join(self._chunked_text),
+ True):
check_hexsha(sha, 'invalid sha %s' % sha)
if '/' in name or name in ('', '.', '..'):
raise ObjectFormatException('invalid name %s' % name)
@@ -968,7 +971,7 @@
self._parents = []
self._extra = []
self._author = None
- for field, value in parse_commit("".join(self._chunked_text)):
+ for field, value in parse_commit(''.join(self._chunked_text)):
if field == _TREE_HEADER:
self._tree = value
elif field == _PARENT_HEADER:
diff --git a/dulwich/tests/test_objects.py b/dulwich/tests/test_objects.py
--- a/dulwich/tests/test_objects.py
+++ b/dulwich/tests/test_objects.py
@@ -23,6 +23,7 @@
from cStringIO import StringIO
+import binascii
import datetime
import os
import stat
@@ -465,6 +466,16 @@
o = Tree.from_path(hex_to_filename(dir, tree_sha))
self.assertEquals([('a', 0100644, a_sha), ('b', 0100644, b_sha)],
list(parse_tree(o.as_raw_string())))
+ # test a broken tree that has a leading 0 on the file mode
+ broken_tree = '0100644 foo\0' + hex_to_sha(a_sha)
+
+ def eval_parse_tree(*args, **kwargs):
+ return list(parse_tree(*args, **kwargs))
+
+ self.assertEquals([('foo', 0100644, a_sha)],
+ eval_parse_tree(broken_tree))
+ self.assertRaises(ObjectFormatException,
+ eval_parse_tree, broken_tree, strict=True)
def test_parse_tree(self):
self._do_test_parse_tree(_parse_tree_py)
@@ -520,6 +531,8 @@
# TODO more whitelisted modes
self.assertCheckFails(t, '123456 a\0%s' % sha)
self.assertCheckFails(t, '123abc a\0%s' % sha)
+ # should fail check, but parses ok
+ self.assertCheckFails(t, '0100644 foo\0' + sha)
# shas
self.assertCheckFails(t, '100644 a\0%s' % ('x' * 5))
References