← Back to team overview

dulwich-users team mailing list archive

[PATCH 22/33] pack: Extract a function to compute the SHA of a file.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: I0ddb4f4b381c90476a45b71bd3890a4d949f9453
---
 NEWS                       |    2 ++
 dulwich/pack.py            |   30 ++++++++++++++++++++++--------
 dulwich/tests/test_pack.py |   15 +++++++++++++++
 3 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/NEWS b/NEWS
index 17fb2f7..d6b30c1 100644
--- a/NEWS
+++ b/NEWS
@@ -50,6 +50,8 @@
 
   * Extract a check_length_and_checksum function. (Dave Borowitz)
 
+  * Extract a compute_file_sha function. (Dave Borowitz)
+
  TEST CHANGES
 
   * If setuptools is installed, "python setup.py test" will now run the testsuite.
diff --git a/dulwich/pack.py b/dulwich/pack.py
index a0f10d4..54a62c2 100644
--- a/dulwich/pack.py
+++ b/dulwich/pack.py
@@ -795,6 +795,27 @@ def obj_sha(type, chunks):
     return sha.digest()
 
 
+def compute_file_sha(f, start_ofs=0, end_ofs=0, buffer_size=1<<16):
+    """Hash a portion of a file into a new SHA.
+
+    :param f: A file-like object to read from that supports seek().
+    :param start_ofs: The offset in the file to start reading at.
+    :param end_ofs: The offset in the file to end reading at, relative to the
+        end of the file.
+    :param buffer_size: A buffer size for reading.
+    :return: A new SHA object updated with data read from the file.
+    """
+    sha = make_sha()
+    f.seek(0, SEEK_END)
+    todo = f.tell() + end_ofs - start_ofs
+    f.seek(start_ofs)
+    while todo:
+        data = f.read(min(todo, buffer_size))
+        sha.update(data)
+        todo -= len(data)
+    return sha
+
+
 class PackData(object):
     """The data contained in a packfile.
 
@@ -873,14 +894,7 @@ class PackData(object):
 
         :return: 20-byte binary SHA1 digest
         """
-        s = make_sha()
-        self._file.seek(0)
-        todo = self._get_size() - 20
-        while todo > 0:
-            x = self._file.read(min(todo, 1<<16))
-            s.update(x)
-            todo -= len(x)
-        return s.digest()
+        return compute_file_sha(self._file, end_ofs=-20).digest()
 
     def get_ref(self, sha):
         """Get the object for a ref SHA, only looking in this pack."""
diff --git a/dulwich/tests/test_pack.py b/dulwich/tests/test_pack.py
index 7ac04f6..860822f 100644
--- a/dulwich/tests/test_pack.py
+++ b/dulwich/tests/test_pack.py
@@ -65,6 +65,7 @@ from dulwich.pack import (
     write_pack_object,
     write_pack,
     unpack_object,
+    compute_file_sha,
     DeltaChainIterator,
     )
 from dulwich.tests import (
@@ -258,6 +259,20 @@ class TestPackData(PackTests):
         idx2 = self.get_pack_index(pack1_sha)
         self.assertEquals(idx1, idx2)
 
+    def test_compute_file_sha(self):
+        f = StringIO('abcd1234wxyz')
+        self.assertEqual(make_sha('abcd1234wxyz').hexdigest(),
+                         compute_file_sha(f).hexdigest())
+        self.assertEqual(make_sha('abcd1234wxyz').hexdigest(),
+                         compute_file_sha(f, buffer_size=5).hexdigest())
+        self.assertEqual(make_sha('abcd1234').hexdigest(),
+                         compute_file_sha(f, end_ofs=-4).hexdigest())
+        self.assertEqual(make_sha('1234wxyz').hexdigest(),
+                         compute_file_sha(f, start_ofs=4).hexdigest())
+        self.assertEqual(
+          make_sha('1234').hexdigest(),
+          compute_file_sha(f, start_ofs=4, end_ofs=-4).hexdigest())
+
 
 class TestPack(PackTests):
 
-- 
1.7.3.1



References