← Back to team overview

dulwich-users team mailing list archive

[PATCH 23/33] Rewrite add_thin_pack to use the fast PackIndexer.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: I1cf586cbf02e207334fafd3d08c7c61231463826
---
 dulwich/object_store.py            |  112 ++++++++++++++++++++++++------------
 dulwich/server.py                  |    9 +--
 dulwich/tests/test_object_store.py |   24 ++++++-
 3 files changed, 97 insertions(+), 48 deletions(-)

diff --git a/dulwich/object_store.py b/dulwich/object_store.py
index 9309e50..7a9b8b3 100644
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -20,6 +20,7 @@
 """Git object store interfaces and implementation."""
 
 
+from cStringIO import StringIO
 import errno
 import itertools
 import os
@@ -27,6 +28,11 @@ import stat
 import tempfile
 import urllib2
 
+from dulwich._compat import (
+    make_sha,
+    SEEK_END,
+    SEEK_CUR,
+    )
 from dulwich.diff_tree import (
     tree_changes,
     walk_trees,
@@ -51,11 +57,17 @@ from dulwich.pack import (
     Pack,
     PackData,
     ThinPackData,
+    obj_sha,
     iter_sha1,
     load_pack_index,
     write_pack,
+    write_pack_header,
     write_pack_index_v2,
+    write_pack_object,
     write_pack_objects,
+    compute_file_sha,
+    PackIndexer,
+    PackStreamCopier,
     )
 
 INFODIR = 'info'
@@ -387,38 +399,82 @@ class DiskObjectStore(PackBasedObjectStore):
     def _remove_loose_object(self, sha):
         os.remove(self._get_shafile_path(sha))
 
-    def move_in_thin_pack(self, path):
+    def _complete_thin_pack(self, f, path, copier, indexer):
         """Move a specific file containing a pack into the pack directory.
 
         :note: The file should be on the same file system as the
             packs directory.
 
+        :param f: Open file object for the pack.
         :param path: Path to the pack file.
+        :param copier: A PackStreamCopier to use for writing pack data.
+        :param indexer: A PackIndexer for indexing the pack.
         """
-        data = ThinPackData(self.get_raw, path)
-
-        # Write index for the thin pack (do we really need this?)
-        temppath = os.path.join(self.pack_dir,
-            sha_to_hex(urllib2.randombytes(20))+".tempidx")
-        data.create_index_v2(temppath)
-        p = Pack.from_objects(data, load_pack_index(temppath))
-
+        entries = list(indexer)
+
+        # Update the header with the new number of objects.
+        f.seek(0)
+        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+
+        # Rescan the rest of the pack, computing the SHA with the new header.
+        new_sha = compute_file_sha(f, end_ofs=-20)
+
+        # Complete the pack.
+        for ext_sha in indexer.ext_refs():
+            type_num, data = self.get_raw(ext_sha)
+            offset = f.tell()
+            crc32 = write_pack_object(f, type_num, data, sha=new_sha)
+            entries.append((ext_sha, offset, crc32))
+        pack_sha = new_sha.digest()
+        f.write(pack_sha)
+        f.close()
+
+        # Move the pack in.
+        entries.sort()
+        pack_base_name = os.path.join(
+          self.pack_dir, 'pack-' + iter_sha1(e[0] for e in entries))
+        os.rename(path, pack_base_name + '.pack')
+
+        # Write the index.
+        index_file = GitFile(pack_base_name + '.idx', 'wb')
         try:
-            # Write a full pack version
-            temppath = os.path.join(self.pack_dir,
-                sha_to_hex(urllib2.randombytes(20))+".temppack")
-            write_pack(temppath, p.pack_tuples())
+            write_pack_index_v2(index_file, entries, pack_sha)
+            index_file.close()
         finally:
-            p.close()
+            index_file.abort()
 
-        pack_sha = load_pack_index(temppath+".idx").objects_sha1()
-        newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
-        os.rename(temppath+".pack", newbasename+".pack")
-        os.rename(temppath+".idx", newbasename+".idx")
-        final_pack = Pack(newbasename)
+        # Add the pack to the store and return it.
+        final_pack = Pack(pack_base_name)
+        final_pack.check_length_and_checksum()
         self._add_known_pack(final_pack)
         return final_pack
 
+    def add_thin_pack(self, read_all, read_some):
+        """Add a new thin pack to this object store.
+
+        Thin packs are packs that contain deltas with parents that exist outside
+        the pack. They should never be placed in the object store directly, and
+        always indexed and completed as they are copied.
+
+        :param read_all: Read function that blocks until the number of requested
+            bytes are read.
+        :param read_some: Read function that returns at least one byte, but may
+            not return the number of bytes requested.
+        :return: A Pack object pointing at the now-completed thin pack in the
+            objects/pack directory.
+        """
+        fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
+        f = os.fdopen(fd, 'w+b')
+
+        try:
+            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
+            copier = PackStreamCopier(read_all, read_some, f,
+                                      delta_iter=indexer)
+            copier.verify()
+            return self._complete_thin_pack(f, path, copier, indexer)
+        finally:
+            f.close()
+
     def move_in_pack(self, path):
         """Move a specific file containing a pack into the pack directory.
 
@@ -442,24 +498,6 @@ class DiskObjectStore(PackBasedObjectStore):
         self._add_known_pack(final_pack)
         return final_pack
 
-    def add_thin_pack(self):
-        """Add a new thin pack to this object store.
-
-        Thin packs are packs that contain deltas with parents that exist
-        in a different pack.
-        """
-        fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
-        f = os.fdopen(fd, 'wb')
-        def commit():
-            os.fsync(fd)
-            f.close()
-            if os.path.getsize(path) > 0:
-                return self.move_in_thin_pack(path)
-            else:
-                os.remove(path)
-                return None
-        return f, commit
-
     def add_pack(self):
         """Add a new pack to this object store.
 
diff --git a/dulwich/server.py b/dulwich/server.py
index 668a817..ec31651 100644
--- a/dulwich/server.py
+++ b/dulwich/server.py
@@ -44,7 +44,6 @@ from dulwich.objects import (
     hex_to_sha,
     )
 from dulwich.pack import (
-    PackStreamCopier,
     write_pack_objects,
     )
 from dulwich.protocol import (
@@ -613,18 +612,14 @@ class ReceivePackHandler(Handler):
         return ("report-status", "delete-refs", "side-band-64k")
 
     def _apply_pack(self, refs):
-        f, commit = self.repo.object_store.add_thin_pack()
         all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
                           AssertionError, socket.error, zlib.error,
                           ObjectFormatException)
         status = []
         # TODO: more informative error messages than just the exception string
         try:
-            PackStreamCopier(self.proto.read, self.proto.recv, f).verify()
-            p = commit()
-            if not p:
-                raise IOError('Failed to write pack')
-            p.check()
+            p = self.repo.object_store.add_thin_pack(self.proto.read,
+                                                     self.proto.recv)
             status.append(('unpack', 'ok'))
         except all_exceptions, e:
             status.append(('unpack', str(e).replace('\n', '')))
diff --git a/dulwich/tests/test_object_store.py b/dulwich/tests/test_object_store.py
index 314ef3f..7f440bd 100644
--- a/dulwich/tests/test_object_store.py
+++ b/dulwich/tests/test_object_store.py
@@ -19,6 +19,7 @@
 """Tests for the object store interface."""
 
 
+from cStringIO import StringIO
 import os
 import shutil
 import tempfile
@@ -30,6 +31,7 @@ from dulwich.errors import (
     NotTreeError,
     )
 from dulwich.objects import (
+    sha_to_hex,
     object_class,
     Blob,
     Tag,
@@ -42,6 +44,7 @@ from dulwich.object_store import (
     tree_lookup_path,
     )
 from dulwich.pack import (
+    REF_DELTA,
     write_pack_objects,
     )
 from dulwich.tests import (
@@ -49,6 +52,7 @@ from dulwich.tests import (
     )
 from dulwich.tests.utils import (
     make_object,
+    build_pack,
     )
 
 
@@ -236,10 +240,22 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
 
     def test_add_thin_pack(self):
         o = DiskObjectStore(self.store_dir)
-        f, commit = o.add_thin_pack()
-        b = make_object(Blob, data="more yummy data")
-        write_pack_objects(f, [(b, None)])
-        commit()
+        blob = make_object(Blob, data='yummy data')
+        o.add_object(blob)
+
+        f = StringIO()
+        entries = build_pack(f, [
+          (REF_DELTA, (blob.id, 'more yummy data')),
+          ], store=o)
+        pack = o.add_thin_pack(f.read, None)
+
+        packed_blob_sha = sha_to_hex(entries[0][3])
+        pack.check_length_and_checksum()
+        self.assertEqual(sorted([blob.id, packed_blob_sha]), list(pack))
+        self.assertTrue(o.contains_packed(packed_blob_sha))
+        self.assertTrue(o.contains_packed(blob.id))
+        self.assertEqual((Blob.type_num, 'more yummy data'),
+                         o.get_raw(packed_blob_sha))
 
 
 class TreeLookupPathTests(TestCase):
-- 
1.7.3.1



References