dulwich-users team mailing list archive
-
dulwich-users team
-
Mailing list archive
-
Message #00145
[PATCH 4/9] object_store: Make iter_tree_contents depth-first.
From: Dave Borowitz <dborowitz@xxxxxxxxxx>
This mimics the behavior of os.walk as well as common tools like find
and ls -R.
Memory usage may also be better, as directory trees tend to have a
higher branching factor than depth. (Note that the overhead of Python
stack frames may make this not true in many cases, and I haven't run
any benchmarks.)
Change-Id: I585813c89722295ee4cfce92ca4e7b4fb40d2cb8
---
NEWS | 5 +++++
dulwich/object_store.py | 27 +++++++++++++++------------
dulwich/tests/test_object_store.py | 21 +++++++++++++++++++++
3 files changed, 41 insertions(+), 12 deletions(-)
diff --git a/NEWS b/NEWS
index ec32c5f..470d23a 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,11 @@
* Web server supports streaming progress/pack output. (Dave Borowitz)
+ API CHANGES
+
+ * ObjectStore.iter_tree_contents now walks contents in depth-first, sorted
+ order. (Dave Borowitz)
+
0.6.1 2010-07-22
diff --git a/dulwich/object_store.py b/dulwich/object_store.py
index 32f9b78..1a83439 100644
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -175,21 +175,24 @@ class BaseObjectStore(object):
else:
todo.add((None, newhexsha, childpath))
- def iter_tree_contents(self, tree):
- """Yield (path, mode, hexsha) tuples for all non-Tree objects in a tree.
+ def iter_tree_contents(self, tree_id):
+ """Iterate the contents of a tree and all subtrees.
- :param tree: SHA1 of the root of the tree
+ Iteration is depth-first, as in e.g. os.walk.
+
+ :param tree_id: SHA1 of the tree.
+ :yield: Tuples of (path, mode, hexhsa) for objects in a tree.
"""
- todo = set([(tree, "")])
+ todo = [('', stat.S_IFDIR, tree_id)]
while todo:
- (tid, tpath) = todo.pop()
- tree = self[tid]
- for name, mode, hexsha in tree.iteritems():
- path = posixpath.join(tpath, name)
- if stat.S_ISDIR(mode):
- todo.add((hexsha, path))
- else:
- yield path, mode, hexsha
+ path, mode, hexsha = todo.pop()
+ if stat.S_ISDIR(mode):
+ entries = reversed(self[hexsha].iteritems())
+ for name, entry_mode, entry_hexsha in entries:
+ entry_path = posixpath.join(path, name)
+ todo.append((entry_path, entry_mode, entry_hexsha))
+ else:
+ yield path, mode, hexsha
def find_missing_objects(self, haves, wants, progress=None,
get_tagged=None):
diff --git a/dulwich/tests/test_object_store.py b/dulwich/tests/test_object_store.py
index 15241ba..e53a612 100644
--- a/dulwich/tests/test_object_store.py
+++ b/dulwich/tests/test_object_store.py
@@ -23,6 +23,9 @@ import os
import shutil
import tempfile
+from dulwich.index import (
+ commit_tree,
+ )
from dulwich.objects import (
Blob,
)
@@ -75,6 +78,24 @@ class ObjectStoreTests(object):
r = self.store[testobject.id]
self.assertEquals(r, testobject)
+ def test_iter_tree_contents(self):
+ blob_a = make_object(Blob, data='a')
+ blob_b = make_object(Blob, data='b')
+ blob_c = make_object(Blob, data='c')
+ for blob in [blob_a, blob_b, blob_c]:
+ self.store.add_object(blob)
+
+ blobs = [
+ ('a', blob_a.id, 0100644),
+ ('ad/b', blob_b.id, 0100644),
+ ('ad/bd/c', blob_c.id, 0100755),
+ ('ad/c', blob_c.id, 0100644),
+ ('c', blob_c.id, 0100644),
+ ]
+ tree_id = commit_tree(self.store, blobs)
+ self.assertEquals([(p, m, h) for (p, h, m) in blobs],
+ list(self.store.iter_tree_contents(tree_id)))
+
class MemoryObjectStoreTests(ObjectStoreTests, TestCase):
--
1.7.1
Follow ups
References