← Back to team overview

dulwich-users team mailing list archive

[PATCH 4/9] object_store: Make iter_tree_contents depth-first.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

This mimics the behavior of os.walk as well as common tools like find
and ls -R.

Memory usage may also be better, as directory trees tend to have a
higher branching factor than depth. (Note that the overhead of Python
stack frames may make this not true in many cases, and I haven't run
any benchmarks.)

Change-Id: I585813c89722295ee4cfce92ca4e7b4fb40d2cb8
---
 NEWS                               |    5 +++++
 dulwich/object_store.py            |   27 +++++++++++++++------------
 dulwich/tests/test_object_store.py |   21 +++++++++++++++++++++
 3 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/NEWS b/NEWS
index ec32c5f..470d23a 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,11 @@
 
   * Web server supports streaming progress/pack output. (Dave Borowitz)
 
+ API CHANGES
+
+  * ObjectStore.iter_tree_contents now walks contents in depth-first, sorted
+    order. (Dave Borowitz)
+
 
 0.6.1	2010-07-22
 
diff --git a/dulwich/object_store.py b/dulwich/object_store.py
index 32f9b78..1a83439 100644
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -175,21 +175,24 @@ class BaseObjectStore(object):
                     else:
                         todo.add((None, newhexsha, childpath))
 
-    def iter_tree_contents(self, tree):
-        """Yield (path, mode, hexsha) tuples for all non-Tree objects in a tree.
+    def iter_tree_contents(self, tree_id):
+        """Iterate the contents of a tree and all subtrees.
 
-        :param tree: SHA1 of the root of the tree
+        Iteration is depth-first, as in e.g. os.walk.
+
+        :param tree_id: SHA1 of the tree.
+        :yield: Tuples of (path, mode, hexhsa) for objects in a tree.
         """
-        todo = set([(tree, "")])
+        todo = [('', stat.S_IFDIR, tree_id)]
         while todo:
-            (tid, tpath) = todo.pop()
-            tree = self[tid]
-            for name, mode, hexsha in tree.iteritems():
-                path = posixpath.join(tpath, name)
-                if stat.S_ISDIR(mode):
-                    todo.add((hexsha, path))
-                else:
-                    yield path, mode, hexsha
+            path, mode, hexsha = todo.pop()
+            if stat.S_ISDIR(mode):
+                entries = reversed(self[hexsha].iteritems())
+                for name, entry_mode, entry_hexsha in entries:
+                    entry_path = posixpath.join(path, name)
+                    todo.append((entry_path, entry_mode, entry_hexsha))
+            else:
+                yield path, mode, hexsha
 
     def find_missing_objects(self, haves, wants, progress=None,
                              get_tagged=None):
diff --git a/dulwich/tests/test_object_store.py b/dulwich/tests/test_object_store.py
index 15241ba..e53a612 100644
--- a/dulwich/tests/test_object_store.py
+++ b/dulwich/tests/test_object_store.py
@@ -23,6 +23,9 @@ import os
 import shutil
 import tempfile
 
+from dulwich.index import (
+    commit_tree,
+    )
 from dulwich.objects import (
     Blob,
     )
@@ -75,6 +78,24 @@ class ObjectStoreTests(object):
         r = self.store[testobject.id]
         self.assertEquals(r, testobject)
 
+    def test_iter_tree_contents(self):
+        blob_a = make_object(Blob, data='a')
+        blob_b = make_object(Blob, data='b')
+        blob_c = make_object(Blob, data='c')
+        for blob in [blob_a, blob_b, blob_c]:
+            self.store.add_object(blob)
+
+        blobs = [
+          ('a', blob_a.id, 0100644),
+          ('ad/b', blob_b.id, 0100644),
+          ('ad/bd/c', blob_c.id, 0100755),
+          ('ad/c', blob_c.id, 0100644),
+          ('c', blob_c.id, 0100644),
+          ]
+        tree_id = commit_tree(self.store, blobs)
+        self.assertEquals([(p, m, h) for (p, h, m) in blobs],
+                          list(self.store.iter_tree_contents(tree_id)))
+
 
 class MemoryObjectStoreTests(ObjectStoreTests, TestCase):
 
-- 
1.7.1




Follow ups

References