← Back to team overview

dulwich-users team mailing list archive

[PATCH 27/33] pack: Nuke ThinPackData.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

This class originally existed to provide an alternative implementation
of iterentries that knew how to get external refs. The rewrite of the
pack inflation code is a better replacement for this implementation.

Additionally, making ThinPackData a first-class object made it seem like
it's ok to have thin pack files sitting around on disk. In reality, the
git way is to only handle thin packs in the receive-pack code path,
complete them immediately, and never open an existing file as a thin
pack. Not having the ThinPack code makes it clearer that they should
only rarely be created.

Change-Id: I5a843b346213d331da51f2e3ddd031d2fd0a1c66
---
 NEWS                       |    3 +-
 dulwich/object_store.py    |    1 -
 dulwich/pack.py            |   81 +------------------------------------------
 dulwich/tests/test_pack.py |   29 +++------------
 4 files changed, 10 insertions(+), 104 deletions(-)

diff --git a/NEWS b/NEWS
index b445391..0c78b1c 100644
--- a/NEWS
+++ b/NEWS
@@ -57,7 +57,8 @@
   * Extract a compute_file_sha function. (Dave Borowitz)
 
   * Remove move_in_thin_pack as a separate method; add_thin_pack now completes
-    the thin pack and moves it in in one step. (Dave Borowitz)
+    the thin pack and moves it in in one step. Remove ThinPackData as well.
+    (Dave Borowitz)
 
   * Custom buffer size in read_zlib_chunks. (Dave Borowitz)
 
diff --git a/dulwich/object_store.py b/dulwich/object_store.py
index 7a9b8b3..2ed160f 100644
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -56,7 +56,6 @@ from dulwich.objects import (
 from dulwich.pack import (
     Pack,
     PackData,
-    ThinPackData,
     obj_sha,
     iter_sha1,
     load_pack_index,
diff --git a/dulwich/pack.py b/dulwich/pack.py
index 55c4ed5..a2ab0d9 100644
--- a/dulwich/pack.py
+++ b/dulwich/pack.py
@@ -1061,81 +1061,6 @@ class PackData(object):
         return unpack_object(self._file.read)[:2]
 
 
-class ThinPackData(PackData):
-    """PackData for thin packs, which require an ObjectStore for resolving."""
-
-    def __init__(self, resolve_ext_ref, *args, **kwargs):
-        super(ThinPackData, self).__init__(*args, **kwargs)
-        self.resolve_ext_ref = resolve_ext_ref
-
-    @classmethod
-    def from_file(cls, resolve_ext_ref, file, size):
-        return cls(resolve_ext_ref, str(file), file=file, size=size)
-
-    def get_ref(self, sha):
-        """Resolve a reference looking in both this pack and the store."""
-        try:
-            # As part of completing a pack we create a Pack object with a
-            # ThinPackData and a full PackIndex, so check in the index first if
-            # possible.
-            # TODO(dborowitz): reevaluate this when the pack completion code is
-            # rewritten.
-            return super(ThinPackData, self).get_ref(sha)
-        except KeyError:
-            type, obj = self.resolve_ext_ref(sha)
-            return None, type, obj
-
-    def iterentries(self, progress=None):
-        """Yield entries summarizing the contents of this pack.
-
-        :param progress: Progress function, called with current and
-            total object count.
-
-        This will yield tuples with (sha, offset, crc32)
-        """
-        found = {}
-        postponed = defaultdict(list)
-
-        class Postpone(Exception):
-            """Raised to postpone delta resolving."""
-
-            def __init__(self, sha):
-                self.sha = sha
-
-        def get_ref_text(sha):
-            assert len(sha) == 20
-            if sha in found:
-                offset = found[sha]
-                type, obj = self.get_object_at(offset)
-                return offset, type, obj
-            try:
-                return self.get_ref(sha)
-            except KeyError:
-                raise Postpone(sha)
-
-        extra = []
-        todo = chain(self.iterobjects(progress=progress), extra)
-        for (offset, type, obj, crc32) in todo:
-            assert isinstance(offset, int)
-            if obj is None:
-                # Inflate postponed delta
-                obj, type = self.get_object_at(offset)
-            assert isinstance(type, int)
-            assert isinstance(obj, list) or isinstance(obj, tuple)
-            try:
-                type, obj = self.resolve_object(offset, type, obj, get_ref_text)
-            except Postpone, e:
-                # Save memory by not storing the inflated obj in postponed
-                postponed[e.sha].append((offset, type, None, crc32))
-            else:
-                sha = obj_sha(type, obj)
-                found[sha] = offset
-                yield sha, offset, crc32
-                extra.extend(postponed.pop(sha, []))
-        if postponed:
-            raise KeyError([sha_to_hex(h) for h in postponed.keys()])
-
-
 class DeltaChainIterator(object):
     """Abstract iterator over pack data based on delta chains.
 
@@ -1158,8 +1083,8 @@ class DeltaChainIterator(object):
         self._ext_refs = []
 
     @classmethod
-    def for_pack_data(cls, pack_data):
-        walker = cls(None)
+    def for_pack_data(cls, pack_data, resolve_ext_ref=None):
+        walker = cls(None, resolve_ext_ref=resolve_ext_ref)
         walker.set_pack_data(pack_data)
         for offset, type_num, obj, _ in pack_data.iterobjects():
             walker.record(offset, type_num, obj)
@@ -1178,8 +1103,6 @@ class DeltaChainIterator(object):
 
     def set_pack_data(self, pack_data):
         self._file = pack_data._file
-        if isinstance(pack_data, ThinPackData):
-            self._resolve_ext_ref = pack_data.resolve_ext_ref
 
     def _walk_all_chains(self):
         for offset, type_num in self._full_ofs:
diff --git a/dulwich/tests/test_pack.py b/dulwich/tests/test_pack.py
index 0f39c1a..6d8d32e 100644
--- a/dulwich/tests/test_pack.py
+++ b/dulwich/tests/test_pack.py
@@ -53,7 +53,6 @@ from dulwich.pack import (
     MemoryPackIndex,
     Pack,
     PackData,
-    ThinPackData,
     apply_delta,
     create_delta,
     deltify_pack_objects,
@@ -193,21 +192,6 @@ class TestPackData(PackTests):
         path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
         PackData.from_file(open(path), os.path.getsize(path))
 
-    # TODO: more ThinPackData tests.
-    def test_thin_from_file(self):
-        test_sha = '1' * 40
-
-        def resolve(sha):
-            self.assertEqual(test_sha, sha)
-            return 3, 'data'
-
-        path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
-        data = ThinPackData.from_file(resolve, open(path),
-                                      os.path.getsize(path))
-        idx = self.get_pack_index(pack1_sha)
-        Pack.from_objects(data, idx)
-        self.assertEqual((None, 3, 'data'), data.get_ref(test_sha))
-
     def test_pack_len(self):
         p = self.get_pack_data(pack1_sha)
         self.assertEquals(3, len(p))
@@ -716,8 +700,8 @@ class TestPackIterator(DeltaChainIterator):
 
     _compute_crc32 = True
 
-    def __init__(self, pack_data):
-        super(TestPackIterator, self).__init__(pack_data)
+    def __init__(self, *args, **kwargs):
+        super(TestPackIterator, self).__init__(*args, **kwargs)
         self._unpacked = set()
 
     def _result(self, offset, type_num, chunks, sha, crc32):
@@ -758,11 +742,10 @@ class DeltaChainIteratorTests(TestCase):
     def make_pack_iter(self, f, thin=None):
         if thin is None:
             thin = bool(list(self.store))
-        if thin:
-            data = ThinPackData(self.get_raw_no_repeat, 'test.pack', file=f)
-        else:
-            data = PackData('test.pack', file=f)
-        return TestPackIterator.for_pack_data(data)
+        resolve_ext_ref = thin and self.get_raw_no_repeat or None
+        data = PackData('test.pack', file=f)
+        return TestPackIterator.for_pack_data(
+          data, resolve_ext_ref=resolve_ext_ref)
 
     def assertEntriesMatch(self, expected_indexes, entries, pack_iter):
         expected = [entries[i] for i in expected_indexes]
-- 
1.7.3.1



References