dulwich-users team mailing list archive
-
dulwich-users team
-
Mailing list archive
-
Message #00546
[PATCH 23/33] Rewrite add_thin_pack to use the fast PackIndexer.
From: Dave Borowitz <dborowitz@xxxxxxxxxx>
Change-Id: I1cf586cbf02e207334fafd3d08c7c61231463826
---
dulwich/object_store.py | 112 ++++++++++++++++++++++++------------
dulwich/server.py | 9 +--
dulwich/tests/test_object_store.py | 24 ++++++-
3 files changed, 97 insertions(+), 48 deletions(-)
diff --git a/dulwich/object_store.py b/dulwich/object_store.py
index 9309e50..7a9b8b3 100644
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -20,6 +20,7 @@
"""Git object store interfaces and implementation."""
+from cStringIO import StringIO
import errno
import itertools
import os
@@ -27,6 +28,11 @@ import stat
import tempfile
import urllib2
+from dulwich._compat import (
+ make_sha,
+ SEEK_END,
+ SEEK_CUR,
+ )
from dulwich.diff_tree import (
tree_changes,
walk_trees,
@@ -51,11 +57,17 @@ from dulwich.pack import (
Pack,
PackData,
ThinPackData,
+ obj_sha,
iter_sha1,
load_pack_index,
write_pack,
+ write_pack_header,
write_pack_index_v2,
+ write_pack_object,
write_pack_objects,
+ compute_file_sha,
+ PackIndexer,
+ PackStreamCopier,
)
INFODIR = 'info'
@@ -387,38 +399,82 @@ class DiskObjectStore(PackBasedObjectStore):
def _remove_loose_object(self, sha):
os.remove(self._get_shafile_path(sha))
- def move_in_thin_pack(self, path):
+ def _complete_thin_pack(self, f, path, copier, indexer):
"""Move a specific file containing a pack into the pack directory.
:note: The file should be on the same file system as the
packs directory.
+ :param f: Open file object for the pack.
:param path: Path to the pack file.
+ :param copier: A PackStreamCopier to use for writing pack data.
+ :param indexer: A PackIndexer for indexing the pack.
"""
- data = ThinPackData(self.get_raw, path)
-
- # Write index for the thin pack (do we really need this?)
- temppath = os.path.join(self.pack_dir,
- sha_to_hex(urllib2.randombytes(20))+".tempidx")
- data.create_index_v2(temppath)
- p = Pack.from_objects(data, load_pack_index(temppath))
-
+ entries = list(indexer)
+
+ # Update the header with the new number of objects.
+ f.seek(0)
+ write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+
+ # Rescan the rest of the pack, computing the SHA with the new header.
+ new_sha = compute_file_sha(f, end_ofs=-20)
+
+ # Complete the pack.
+ for ext_sha in indexer.ext_refs():
+ type_num, data = self.get_raw(ext_sha)
+ offset = f.tell()
+ crc32 = write_pack_object(f, type_num, data, sha=new_sha)
+ entries.append((ext_sha, offset, crc32))
+ pack_sha = new_sha.digest()
+ f.write(pack_sha)
+ f.close()
+
+ # Move the pack in.
+ entries.sort()
+ pack_base_name = os.path.join(
+ self.pack_dir, 'pack-' + iter_sha1(e[0] for e in entries))
+ os.rename(path, pack_base_name + '.pack')
+
+ # Write the index.
+ index_file = GitFile(pack_base_name + '.idx', 'wb')
try:
- # Write a full pack version
- temppath = os.path.join(self.pack_dir,
- sha_to_hex(urllib2.randombytes(20))+".temppack")
- write_pack(temppath, p.pack_tuples())
+ write_pack_index_v2(index_file, entries, pack_sha)
+ index_file.close()
finally:
- p.close()
+ index_file.abort()
- pack_sha = load_pack_index(temppath+".idx").objects_sha1()
- newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
- os.rename(temppath+".pack", newbasename+".pack")
- os.rename(temppath+".idx", newbasename+".idx")
- final_pack = Pack(newbasename)
+ # Add the pack to the store and return it.
+ final_pack = Pack(pack_base_name)
+ final_pack.check_length_and_checksum()
self._add_known_pack(final_pack)
return final_pack
+ def add_thin_pack(self, read_all, read_some):
+ """Add a new thin pack to this object store.
+
+ Thin packs are packs that contain deltas with parents that exist outside
+ the pack. They should never be placed in the object store directly, and
+ always indexed and completed as they are copied.
+
+ :param read_all: Read function that blocks until the number of requested
+ bytes are read.
+ :param read_some: Read function that returns at least one byte, but may
+ not return the number of bytes requested.
+ :return: A Pack object pointing at the now-completed thin pack in the
+ objects/pack directory.
+ """
+ fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
+ f = os.fdopen(fd, 'w+b')
+
+ try:
+ indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
+ copier = PackStreamCopier(read_all, read_some, f,
+ delta_iter=indexer)
+ copier.verify()
+ return self._complete_thin_pack(f, path, copier, indexer)
+ finally:
+ f.close()
+
def move_in_pack(self, path):
"""Move a specific file containing a pack into the pack directory.
@@ -442,24 +498,6 @@ class DiskObjectStore(PackBasedObjectStore):
self._add_known_pack(final_pack)
return final_pack
- def add_thin_pack(self):
- """Add a new thin pack to this object store.
-
- Thin packs are packs that contain deltas with parents that exist
- in a different pack.
- """
- fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
- f = os.fdopen(fd, 'wb')
- def commit():
- os.fsync(fd)
- f.close()
- if os.path.getsize(path) > 0:
- return self.move_in_thin_pack(path)
- else:
- os.remove(path)
- return None
- return f, commit
-
def add_pack(self):
"""Add a new pack to this object store.
diff --git a/dulwich/server.py b/dulwich/server.py
index 668a817..ec31651 100644
--- a/dulwich/server.py
+++ b/dulwich/server.py
@@ -44,7 +44,6 @@ from dulwich.objects import (
hex_to_sha,
)
from dulwich.pack import (
- PackStreamCopier,
write_pack_objects,
)
from dulwich.protocol import (
@@ -613,18 +612,14 @@ class ReceivePackHandler(Handler):
return ("report-status", "delete-refs", "side-band-64k")
def _apply_pack(self, refs):
- f, commit = self.repo.object_store.add_thin_pack()
all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
AssertionError, socket.error, zlib.error,
ObjectFormatException)
status = []
# TODO: more informative error messages than just the exception string
try:
- PackStreamCopier(self.proto.read, self.proto.recv, f).verify()
- p = commit()
- if not p:
- raise IOError('Failed to write pack')
- p.check()
+ p = self.repo.object_store.add_thin_pack(self.proto.read,
+ self.proto.recv)
status.append(('unpack', 'ok'))
except all_exceptions, e:
status.append(('unpack', str(e).replace('\n', '')))
diff --git a/dulwich/tests/test_object_store.py b/dulwich/tests/test_object_store.py
index 314ef3f..7f440bd 100644
--- a/dulwich/tests/test_object_store.py
+++ b/dulwich/tests/test_object_store.py
@@ -19,6 +19,7 @@
"""Tests for the object store interface."""
+from cStringIO import StringIO
import os
import shutil
import tempfile
@@ -30,6 +31,7 @@ from dulwich.errors import (
NotTreeError,
)
from dulwich.objects import (
+ sha_to_hex,
object_class,
Blob,
Tag,
@@ -42,6 +44,7 @@ from dulwich.object_store import (
tree_lookup_path,
)
from dulwich.pack import (
+ REF_DELTA,
write_pack_objects,
)
from dulwich.tests import (
@@ -49,6 +52,7 @@ from dulwich.tests import (
)
from dulwich.tests.utils import (
make_object,
+ build_pack,
)
@@ -236,10 +240,22 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
def test_add_thin_pack(self):
o = DiskObjectStore(self.store_dir)
- f, commit = o.add_thin_pack()
- b = make_object(Blob, data="more yummy data")
- write_pack_objects(f, [(b, None)])
- commit()
+ blob = make_object(Blob, data='yummy data')
+ o.add_object(blob)
+
+ f = StringIO()
+ entries = build_pack(f, [
+ (REF_DELTA, (blob.id, 'more yummy data')),
+ ], store=o)
+ pack = o.add_thin_pack(f.read, None)
+
+ packed_blob_sha = sha_to_hex(entries[0][3])
+ pack.check_length_and_checksum()
+ self.assertEqual(sorted([blob.id, packed_blob_sha]), list(pack))
+ self.assertTrue(o.contains_packed(packed_blob_sha))
+ self.assertTrue(o.contains_packed(blob.id))
+ self.assertEqual((Blob.type_num, 'more yummy data'),
+ o.get_raw(packed_blob_sha))
class TreeLookupPathTests(TestCase):
--
1.7.3.1
References