← Back to team overview

dulwich-users team mailing list archive

[PATCH 32/33] pack: Remove comp_len from UnpackedObject.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

This was only ever used for offset calculation, which can be done
instead by actually backing up over the unused data (more closely
matching the comment).

Also, despite being called comp_len, it was also including the length of
the (not compressed) header, which could be confusing. For now, callers
that really need it can compute it from comp_chunks. (We could add
back comp_len and/or header_len in the future if it turns out to be
useful.)

Change-Id: I902d2f1817c1ad18ac45a086aaee429eea41bd36
---
 dulwich/pack.py            |   28 +++++++++-------------------
 dulwich/tests/test_pack.py |   11 ++---------
 2 files changed, 11 insertions(+), 28 deletions(-)

diff --git a/dulwich/pack.py b/dulwich/pack.py
index dd41596..db5c1cb 100644
--- a/dulwich/pack.py
+++ b/dulwich/pack.py
@@ -127,7 +127,6 @@ class UnpackedObject(object):
       'pack_type_num',  # Type of this object in the pack (may be a delta).
       'delta_base',     # Delta base offset or SHA.
       'comp_chunks',    # Compressed object chunks.
-      'comp_len',       # Compressed length of this object.
       'decomp_chunks',  # Decompressed object chunks.
       'decomp_len',     # Decompressed length of this object.
       'crc32',          # CRC32.
@@ -140,7 +139,6 @@ class UnpackedObject(object):
         self.pack_type_num = pack_type_num
         self.delta_base = delta_base
         self.comp_chunks = None
-        self.comp_len = None
         self.decomp_chunks = []
         self.decomp_len = decomp_len
         self.crc32 = crc32
@@ -203,7 +201,6 @@ def read_zlib_chunks(read_some, unpacked, include_comp=False,
         using this starting CRC32.
         After this function, will have the following attrs set:
             comp_chunks    (if include_comp is True)
-            comp_len
             decomp_chunks
             decomp_len
             crc32
@@ -219,7 +216,6 @@ def read_zlib_chunks(read_some, unpacked, include_comp=False,
     comp_chunks = []
     decomp_chunks = unpacked.decomp_chunks
     decomp_len = 0
-    comp_len = 0
     crc32 = unpacked.crc32
 
     while True:
@@ -227,14 +223,12 @@ def read_zlib_chunks(read_some, unpacked, include_comp=False,
         if not add:
             raise zlib.error('EOF before end of zlib stream')
         comp_chunks.append(add)
-        comp_len += len(add)
         decomp = decomp_obj.decompress(add)
         decomp_len += len(decomp)
         decomp_chunks.append(decomp)
         unused = decomp_obj.unused_data
         if unused:
             left = len(unused)
-            comp_len -= left
             if crc32 is not None:
                 crc32 = binascii.crc32(add[:-left], crc32)
             if include_comp:
@@ -248,7 +242,6 @@ def read_zlib_chunks(read_some, unpacked, include_comp=False,
     if decomp_len != unpacked.decomp_len:
         raise zlib.error('decompressed data does not match expected size')
 
-    unpacked.comp_len = comp_len
     unpacked.crc32 = crc32
     if include_comp:
         unpacked.comp_chunks = comp_chunks
@@ -672,7 +665,6 @@ def unpack_object(read_all, read_some=None, compute_crc32=False,
             pack_type_num
             delta_base     (for delta types)
             comp_chunks    (if include_comp is True)
-            comp_len
             decomp_chunks
             decomp_len
             crc32          (if compute_crc32 is True)
@@ -712,7 +704,6 @@ def unpack_object(read_all, read_some=None, compute_crc32=False,
     unpacked = UnpackedObject(type_num, delta_base, size, crc32)
     unused = read_zlib_chunks(read_some, unpacked, buffer_size=zlib_bufsize,
                               include_comp=include_comp)
-    unpacked.comp_len += raw_base
     return unpacked, unused
 
 
@@ -817,7 +808,6 @@ class PackStreamReader(object):
             obj_type_num
             obj_chunks (for non-delta types)
             delta_base (for delta types)
-            comp_len
             decomp_chunks
             decomp_len
             crc32 (if compute_crc32 is True)
@@ -1053,27 +1043,28 @@ class PackData(object):
         return type, chunks
 
     def iterobjects(self, progress=None, compute_crc32=True):
-        offset = self._header_size
+        self._file.seek(self._header_size)
         for i in xrange(1, self._num_objects + 1):
-            self._file.seek(offset)  # Back up over unused data.
-            unpacked, _ = unpack_object(
+            offset = self._file.tell()
+            unpacked, unused = unpack_object(
               self._file.read, compute_crc32=compute_crc32)
             if progress is not None:
                 progress(i, self._num_objects)
             yield (offset, unpacked.pack_type_num, unpacked._obj(),
                    unpacked.crc32)
-            offset += unpacked.comp_len
+            self._file.seek(-len(unused), SEEK_CUR)  # Back up over unused data.
 
     def _iter_unpacked(self):
         # TODO(dborowitz): Merge this with iterobjects, if we can change its
         # return type.
-        offset = self._header_size
+        self._file.seek(self._header_size)
         for _ in xrange(self._num_objects):
-            self._file.seek(offset)  # Back up over unused data.
-            unpacked, _ = unpack_object(self._file.read, compute_crc32=False)
+            offset = self._file.tell()
+            unpacked, unused = unpack_object(
+              self._file.read, compute_crc32=False)
             unpacked.offset = offset
             yield unpacked
-            offset += unpacked.comp_len
+            self._file.seek(-len(unused), SEEK_CUR)  # Back up over unused data.
 
     def iterentries(self, progress=None):
         """Yield entries summarizing the contents of this pack.
@@ -1186,7 +1177,6 @@ class DeltaChainIterator(object):
         pack_type_num
         delta_base     (for delta types)
         comp_chunks    (if _include_comp is True)
-        comp_len
         decomp_chunks
         decomp_len
         crc32          (if _compute_crc32 is True)
diff --git a/dulwich/tests/test_pack.py b/dulwich/tests/test_pack.py
index bf09cb6..eb56711 100644
--- a/dulwich/tests/test_pack.py
+++ b/dulwich/tests/test_pack.py
@@ -441,7 +441,6 @@ class WritePackTests(TestCase):
         self.assertEqual(Blob.type_num, unpacked.pack_type_num)
         self.assertEqual(Blob.type_num, unpacked.obj_type_num)
         self.assertEqual(['blob'], unpacked.decomp_chunks)
-        self.assertEqual(comp_len, unpacked.comp_len)
         self.assertEqual(crc32, unpacked.crc32)
         self.assertEqual('x', unused)
 
@@ -603,7 +602,6 @@ class ReadZlibTests(TestCase):
         read = StringIO(comp + self.extra).read
         unused = read_zlib_chunks(read, unpacked)
         self.assertEqual('', ''.join(unpacked.decomp_chunks))
-        self.assertEqual(len(comp), unpacked.comp_len)
         self.assertNotEquals('', unused)
         self.assertEquals(self.extra, unused + read())
 
@@ -616,7 +614,6 @@ class ReadZlibTests(TestCase):
         unused = read_zlib_chunks(self.read, self.unpacked,
                                   buffer_size=buffer_size, **kwargs)
         self.assertEquals(self.decomp, ''.join(self.unpacked.decomp_chunks))
-        self.assertEquals(len(self.comp), self.unpacked.comp_len)
         self.assertEquals(zlib.crc32(self.comp), self.unpacked.crc32)
         self.assertNotEquals('', unused)
         self.assertEquals(self.extra, unused + self.read())
@@ -691,8 +688,6 @@ class TestPackStreamReader(TestCase):
         self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num)
         self.assertEqual(None, unpacked_blob.delta_base)
         self.assertEqual('blob', ''.join(unpacked_blob.decomp_chunks))
-        self.assertEqual(unpacked_delta.offset - unpacked_blob.offset,
-                         unpacked_blob.comp_len)
         self.assertEqual(entries[0][4], unpacked_blob.crc32)
 
         self.assertEqual(entries[1][0], unpacked_delta.offset)
@@ -700,10 +695,8 @@ class TestPackStreamReader(TestCase):
         self.assertEqual(None, unpacked_delta.obj_type_num)
         self.assertEqual(unpacked_delta.offset - unpacked_blob.offset,
                          unpacked_delta.delta_base)
-        self.assertEqual(create_delta('blob', 'blob1'),
-                         ''.join(unpacked_delta.decomp_chunks))
-        self.assertEqual(len(f.getvalue()) - 20 - unpacked_delta.offset,
-                         unpacked_delta.comp_len)
+        delta = create_delta('blob', 'blob1')
+        self.assertEqual(delta, ''.join(unpacked_delta.decomp_chunks))
         self.assertEqual(entries[1][4], unpacked_delta.crc32)
 
     def test_read_objects_buffered(self):
-- 
1.7.3.1



References