← Back to team overview

dulwich-users team mailing list archive

[PATCH 09/33] pack: Add a PackIndexer to index packs more quickly.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: I6e94e778beb221590dbf10c1a7aba302f700ef03
---
 NEWS            |    4 ++--
 dulwich/pack.py |   23 +++++++++++++++++------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/NEWS b/NEWS
index 404a49e..080d74b 100644
--- a/NEWS
+++ b/NEWS
@@ -2,8 +2,8 @@
 
  FEATURES
 
-  * New DeltaChainIterator abstract class for quickly inflating/iterating all
-    objects in a pack. (Dave Borowitz)
+  * New DeltaChainIterator abstract class for quickly iterating all objects in
+    a pack, with an implementation for pack indexing. (Dave Borowitz)
 
  BUG FIXES
 
diff --git a/dulwich/pack.py b/dulwich/pack.py
index 60eb306..e487786 100644
--- a/dulwich/pack.py
+++ b/dulwich/pack.py
@@ -900,12 +900,11 @@ class PackData(object):
             object count.
         :return: iterator of tuples with (sha, offset, crc32)
         """
-        for offset, type, obj, crc32 in self.iterobjects(progress=progress):
-            assert isinstance(offset, int)
-            assert isinstance(type, int)
-            assert isinstance(obj, list) or isinstance(obj, tuple)
-            type, obj = self.resolve_object(offset, type, obj)
-            yield obj_sha(type, obj), offset, crc32
+        num_objects = self._num_objects
+        for i, result in enumerate(PackIndexer.for_pack_data(self)):
+            if progress is not None:
+                progress(i, num_objects)
+            yield result
 
     def sorted_entries(self, progress=None):
         """Return entries in this pack, sorted by SHA.
@@ -1170,6 +1169,18 @@ class DeltaChainIterator(object):
             for result in self._follow_chain(new_offset, base_type_num, chunks):
                 yield result
 
+    def __iter__(self):
+        return self._walk_all_chains()
+
+
+class PackIndexer(DeltaChainIterator):
+    """Delta chain iterator that yields index entries."""
+
+    _compute_crc32 = True
+
+    def _result(self, offset, unused_type_num, unused_chunks, sha, crc32):
+        return sha, offset, crc32
+
 
 class SHA1Reader(object):
     """Wrapper around a file-like object that remembers the SHA1 of its data."""
-- 
1.7.3.1



References