← Back to team overview

dulwich-users team mailing list archive

[PATCH 27/33] pack: Nuke ThinPackData.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

This class originally existed to provide an alternative implementation
of iterentries that knew how to get external refs. The rewrite of the
pack inflation code is a better replacement for this implementation.

Additionally, making ThinPackData a first-class object made it seem like
it's ok to have thin pack files sitting around on disk. In reality, the
git way is to only handle thin packs in the receive-pack code path,
complete them immediately, and never open an existing file as a thin
pack. Not having the ThinPack code makes it clearer that they should
only rarely be created.

Change-Id: I5a843b346213d331da51f2e3ddd031d2fd0a1c66
---
 dulwich/object_store.py    |    1 -
 dulwich/pack.py            |   81 +------------------------------------------
 dulwich/tests/test_pack.py |   29 +++------------
 3 files changed, 8 insertions(+), 103 deletions(-)

diff --git a/dulwich/object_store.py b/dulwich/object_store.py
index 7a9b8b3..2ed160f 100644
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -56,7 +56,6 @@ from dulwich.objects import (
 from dulwich.pack import (
     Pack,
     PackData,
-    ThinPackData,
     obj_sha,
     iter_sha1,
     load_pack_index,
diff --git a/dulwich/pack.py b/dulwich/pack.py
index c3ba114..a11ff12 100644
--- a/dulwich/pack.py
+++ b/dulwich/pack.py
@@ -1061,81 +1061,6 @@ class PackData(object):
         return unpack_object(self._file.read)[:2]
 
 
-class ThinPackData(PackData):
-    """PackData for thin packs, which require an ObjectStore for resolving."""
-
-    def __init__(self, resolve_ext_ref, *args, **kwargs):
-        super(ThinPackData, self).__init__(*args, **kwargs)
-        self.resolve_ext_ref = resolve_ext_ref
-
-    @classmethod
-    def from_file(cls, resolve_ext_ref, file, size):
-        return cls(resolve_ext_ref, str(file), file=file, size=size)
-
-    def get_ref(self, sha):
-        """Resolve a reference looking in both this pack and the store."""
-        try:
-            # As part of completing a pack we create a Pack object with a
-            # ThinPackData and a full PackIndex, so check in the index first if
-            # possible.
-            # TODO(dborowitz): reevaluate this when the pack completion code is
-            # rewritten.
-            return super(ThinPackData, self).get_ref(sha)
-        except KeyError:
-            type, obj = self.resolve_ext_ref(sha)
-            return None, type, obj
-
-    def iterentries(self, progress=None):
-        """Yield entries summarizing the contents of this pack.
-
-        :param progress: Progress function, called with current and
-            total object count.
-
-        This will yield tuples with (sha, offset, crc32)
-        """
-        found = {}
-        postponed = defaultdict(list)
-
-        class Postpone(Exception):
-            """Raised to postpone delta resolving."""
-
-            def __init__(self, sha):
-                self.sha = sha
-
-        def get_ref_text(sha):
-            assert len(sha) == 20
-            if sha in found:
-                offset = found[sha]
-                type, obj = self.get_object_at(offset)
-                return offset, type, obj
-            try:
-                return self.get_ref(sha)
-            except KeyError:
-                raise Postpone(sha)
-
-        extra = []
-        todo = chain(self.iterobjects(progress=progress), extra)
-        for (offset, type, obj, crc32) in todo:
-            assert isinstance(offset, int)
-            if obj is None:
-                # Inflate postponed delta
-                obj, type = self.get_object_at(offset)
-            assert isinstance(type, int)
-            assert isinstance(obj, list) or isinstance(obj, tuple)
-            try:
-                type, obj = self.resolve_object(offset, type, obj, get_ref_text)
-            except Postpone, e:
-                # Save memory by not storing the inflated obj in postponed
-                postponed[e.sha].append((offset, type, None, crc32))
-            else:
-                sha = obj_sha(type, obj)
-                found[sha] = offset
-                yield sha, offset, crc32
-                extra.extend(postponed.pop(sha, []))
-        if postponed:
-            raise KeyError([sha_to_hex(h) for h in postponed.keys()])
-
-
 class DeltaChainIterator(object):
     """Abstract iterator over pack data based on delta chains.
 
@@ -1158,8 +1083,8 @@ class DeltaChainIterator(object):
         self._ext_refs = []
 
     @classmethod
-    def for_pack_data(cls, pack_data):
-        walker = cls(None)
+    def for_pack_data(cls, pack_data, resolve_ext_ref=None):
+        walker = cls(None, resolve_ext_ref=resolve_ext_ref)
         walker.set_pack_data(pack_data)
         for offset, type_num, obj, _ in pack_data.iterobjects():
             walker.record(offset, type_num, obj)
@@ -1178,8 +1103,6 @@ class DeltaChainIterator(object):
 
     def set_pack_data(self, pack_data):
         self._file = pack_data._file
-        if isinstance(pack_data, ThinPackData):
-            self._resolve_ext_ref = pack_data.resolve_ext_ref
 
     def _walk_all_chains(self):
         for offset, type_num in self._full_ofs:
diff --git a/dulwich/tests/test_pack.py b/dulwich/tests/test_pack.py
index cb33168..73d1fee 100644
--- a/dulwich/tests/test_pack.py
+++ b/dulwich/tests/test_pack.py
@@ -53,7 +53,6 @@ from dulwich.pack import (
     MemoryPackIndex,
     Pack,
     PackData,
-    ThinPackData,
     apply_delta,
     create_delta,
     deltify_pack_objects,
@@ -193,21 +192,6 @@ class TestPackData(PackTests):
         path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
         PackData.from_file(open(path), os.path.getsize(path))
 
-    # TODO: more ThinPackData tests.
-    def test_thin_from_file(self):
-        test_sha = '1' * 40
-
-        def resolve(sha):
-            self.assertEqual(test_sha, sha)
-            return 3, 'data'
-
-        path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha)
-        data = ThinPackData.from_file(resolve, open(path),
-                                      os.path.getsize(path))
-        idx = self.get_pack_index(pack1_sha)
-        Pack.from_objects(data, idx)
-        self.assertEqual((None, 3, 'data'), data.get_ref(test_sha))
-
     def test_pack_len(self):
         p = self.get_pack_data(pack1_sha)
         self.assertEquals(3, len(p))
@@ -716,8 +700,8 @@ class TestPackIterator(DeltaChainIterator):
 
     _compute_crc32 = True
 
-    def __init__(self, pack_data):
-        super(TestPackIterator, self).__init__(pack_data)
+    def __init__(self, *args, **kwargs):
+        super(TestPackIterator, self).__init__(*args, **kwargs)
         self._unpacked = set()
 
     def _result(self, offset, type_num, chunks, sha, crc32):
@@ -758,11 +742,10 @@ class DeltaChainIteratorTests(TestCase):
     def make_pack_iter(self, f, thin=None):
         if thin is None:
             thin = bool(list(self.store))
-        if thin:
-            data = ThinPackData(self.get_raw_no_repeat, 'test.pack', file=f)
-        else:
-            data = PackData('test.pack', file=f)
-        return TestPackIterator.for_pack_data(data)
+        resolve_ext_ref = thin and self.get_raw_no_repeat or None
+        data = PackData('test.pack', file=f)
+        return TestPackIterator.for_pack_data(
+          data, resolve_ext_ref=resolve_ext_ref)
 
     def assertEntriesMatch(self, expected_indexes, entries, pack_iter):
         expected = [entries[i] for i in expected_indexes]
-- 
1.7.3.1



References