← Back to team overview

dulwich team mailing list archive

[PATCH 2/3] Make ShaFiles created from files trust their filenames.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

This adds a dummy FixedSha class that implements the read-only part
of hashlib's hash interface but does not actually compute a SHA-1
digest. This allows us to assign ids to file objects based on their
filename rather than requiring a read of the whole file; SHA-1s will
soon be checked during check().

Added a filename_to_sha helper function to objects.py; refactored the
opposite sha_to_filename functionality into this file as well for
parallelism and testing. As a side effect, reorganized some files in
tests/data to have the normal 2/38 filename structure.

Change-Id: Ic459628aec32a92e29ea49cfd6cbe685053971ef
---
 dulwich/object_store.py                            |    5 +--
 dulwich/objects.py                                 |   36 ++++++++++++++++++++
 .../6f/670c0fb53f9463760b7295fbb814e965fb20c8      |  Bin 0 -> 16 bytes
 .../blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8 |  Bin 16 -> 0 bytes
 .../95/4a536f7819d40e6f637f849ee187dd10066349      |  Bin 0 -> 22 bytes
 .../blobs/954a536f7819d40e6f637f849ee187dd10066349 |  Bin 22 -> 0 bytes
 .../e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391      |  Bin 0 -> 15 bytes
 .../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 |  Bin 15 -> 0 bytes
 .../0d/89f20333fbb1d2f3a94da77f4981373d8f4310      |    2 +
 .../0d89f20333fbb1d2f3a94da77f4981373d8f4310       |    2 -
 .../5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc      |  Bin 0 -> 194 bytes
 .../5dac377bdded4c9aeb8dff595f0faeebcc8498cc       |  Bin 194 -> 0 bytes
 .../60/dacdc733de308bb77bb76ce0fb0f9b44c9769e      |    2 +
 .../60dacdc733de308bb77bb76ce0fb0f9b44c9769e       |    2 -
 .../tags/71/033db03a03c6a36721efcf1968dd8f8e0cf023 |    4 ++
 .../tags/71033db03a03c6a36721efcf1968dd8f8e0cf023  |    4 --
 .../70/c190eb48fa8bbb50ddc692a17b44cb781af7f6      |  Bin 0 -> 71 bytes
 .../trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6 |  Bin 71 -> 0 bytes
 dulwich/tests/test_objects.py                      |   15 ++++----
 19 files changed, 54 insertions(+), 18 deletions(-)
 create mode 100644 dulwich/tests/data/blobs/6f/670c0fb53f9463760b7295fbb814e965fb20c8
 delete mode 100644 dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8
 create mode 100644 dulwich/tests/data/blobs/95/4a536f7819d40e6f637f849ee187dd10066349
 delete mode 100644 dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349
 create mode 100644 dulwich/tests/data/blobs/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
 delete mode 100644 dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
 create mode 100755 dulwich/tests/data/commits/0d/89f20333fbb1d2f3a94da77f4981373d8f4310
 delete mode 100755 dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310
 create mode 100644 dulwich/tests/data/commits/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc
 delete mode 100644 dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc
 create mode 100644 dulwich/tests/data/commits/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e
 delete mode 100644 dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e
 create mode 100644 dulwich/tests/data/tags/71/033db03a03c6a36721efcf1968dd8f8e0cf023
 delete mode 100644 dulwich/tests/data/tags/71033db03a03c6a36721efcf1968dd8f8e0cf023
 create mode 100644 dulwich/tests/data/trees/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6
 delete mode 100644 dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6

diff --git a/dulwich/object_store.py b/dulwich/object_store.py
index 6eaa179..588afd0 100644
--- a/dulwich/object_store.py
+++ b/dulwich/object_store.py
@@ -39,6 +39,7 @@ from dulwich.objects import (
     Tree,
     hex_to_sha,
     sha_to_hex,
+    hex_to_filename,
     S_ISGITLINK,
     )
 from dulwich.pack import (
@@ -362,10 +363,8 @@ class DiskObjectStore(PackBasedObjectStore):
             raise
 
     def _get_shafile_path(self, sha):
-        dir = sha[:2]
-        file = sha[2:]
         # Check from object dir
-        return os.path.join(self.path, dir, file)
+        return hex_to_filename(self.path, sha)
 
     def _iter_loose_objects(self):
         for base in os.listdir(self.path):
diff --git a/dulwich/objects.py b/dulwich/objects.py
index 350eda3..27bbf0a 100644
--- a/dulwich/objects.py
+++ b/dulwich/objects.py
@@ -84,6 +84,27 @@ def hex_to_sha(hex):
     return binascii.unhexlify(hex)
 
 
+def hex_to_filename(path, hex):
+    """Takes a hex sha and returns its filename relative to the given path."""
+    dir = hex[:2]
+    file = hex[2:]
+    # Check from object dir
+    return os.path.join(path, dir, file)
+
+
+def filename_to_hex(filename):
+    """Takes an object filename and returns its corresponding hex sha."""
+    # grab the last (up to) two path components
+    names = filename.rsplit(os.path.sep, 2)[-2:]
+    errmsg = "Invalid object filename: %s" % filename
+    assert len(names) == 2, errmsg
+    base, rest = names
+    assert len(base) == 2 and len(rest) == 38, errmsg
+    hex = base + rest
+    hex_to_sha(hex)
+    return hex
+
+
 def serializable_property(name, docstring=None):
     def set(obj, value):
         obj._ensure_parsed()
@@ -122,6 +143,20 @@ def check_identity(identity, error_msg):
         raise ObjectFormatException(error_msg)
 
 
+class FixedSha(object):
+    """SHA object that behaves like hashlib's but is given a fixed value."""
+
+    def __init__(self, hexsha):
+        self._hexsha = hexsha
+        self._sha = hex_to_sha(hexsha)
+
+    def digest(self):
+        return self._sha
+
+    def hexdigest(self):
+        return self._hexsha
+
+
 class ShaFile(object):
     """A git SHA file."""
 
@@ -282,6 +317,7 @@ class ShaFile(object):
         try:
             try:
                 obj = cls._parse_file_header(f)
+                obj._sha = FixedSha(filename_to_hex(filename))
                 obj._needs_parsing = True
                 obj._needs_serialization = True
                 return obj
diff --git a/dulwich/tests/data/blobs/6f/670c0fb53f9463760b7295fbb814e965fb20c8 b/dulwich/tests/data/blobs/6f/670c0fb53f9463760b7295fbb814e965fb20c8
new file mode 100644
index 0000000000000000000000000000000000000000..8f8ed37f1e6b8f0af781c26daa8f31ae9bd2167d
GIT binary patch
literal 16
XcmXr7n4|5fs~2eSgo%MGhe-wiD3b&h

literal 0
HcmV?d00001

diff --git a/dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8 b/dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8
deleted file mode 100644
index 8f8ed37f1e6b8f0af781c26daa8f31ae9bd2167d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 16
XcmXr7n4|5fs~2eSgo%MGhe-wiD3b&h

diff --git a/dulwich/tests/data/blobs/95/4a536f7819d40e6f637f849ee187dd10066349 b/dulwich/tests/data/blobs/95/4a536f7819d40e6f637f849ee187dd10066349
new file mode 100644
index 0000000000000000000000000000000000000000..7bef12912d59b8fab01801f66978456947e6ce59
GIT binary patch
literal 22
dcmb=J<9+I+e~>}ChNrGxpusaH2E`XFA^>DH2Y~<p

literal 0
HcmV?d00001

diff --git a/dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349 b/dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349
deleted file mode 100644
index 7bef12912d59b8fab01801f66978456947e6ce59..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 22
dcmb=J<9+I+e~>}ChNrGxpusaH2E`XFA^>DH2Y~<p

diff --git a/dulwich/tests/data/blobs/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 b/dulwich/tests/data/blobs/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
new file mode 100644
index 0000000000000000000000000000000000000000..8c901c5b89f920a740af8b23b771ef4019cdb665
GIT binary patch
literal 15
Wcmb=J<9+I+e~>`}0|Vy<#t#58#|6#+

literal 0
HcmV?d00001

diff --git a/dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 b/dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
deleted file mode 100644
index 8c901c5b89f920a740af8b23b771ef4019cdb665..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 15
Wcmb=J<9+I+e~>`}0|Vy<#t#58#|6#+

diff --git a/dulwich/tests/data/commits/0d/89f20333fbb1d2f3a94da77f4981373d8f4310 b/dulwich/tests/data/commits/0d/89f20333fbb1d2f3a94da77f4981373d8f4310
new file mode 100755
index 0000000..b4c3a1d
--- /dev/null
+++ b/dulwich/tests/data/commits/0d/89f20333fbb1d2f3a94da77f4981373d8f4310
@@ -0,0 +1,2 @@
+xœ¥K
+Â@]Ï)zù53×®×==-&’–àíñ¾e=¨âi:×úƒÎ"ÐZ—=¢çH)¢°§©rˆèŠ”úœ¬¡—>§®4ÈwY´¼áÔ¯M•ÒÑxé¿|Ýñq=ƒs)&Ì6Dhì6Ã{Y凹m/üLæXg?«
\ No newline at end of file
diff --git a/dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310 b/dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310
deleted file mode 100755
index b4c3a1d..0000000
--- a/dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310
+++ /dev/null
@@ -1,2 +0,0 @@
-xœ¥K
-Â@]Ï)zù53×®×==-&’–àíñ¾e=¨âi:×úƒÎ"ÐZ—=¢çH)¢°§©rˆèŠ”úœ¬¡—>§®4ÈwY´¼áÔ¯M•ÒÑxé¿|Ýñq=ƒs)&Ì6Dhì6Ã{Y凹m/üLæXg?«
\ No newline at end of file
diff --git a/dulwich/tests/data/commits/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc b/dulwich/tests/data/commits/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc
new file mode 100644
index 0000000000000000000000000000000000000000..69c6dff1ab6b88df371b5f539df78e705d1f05a5
GIT binary patch
literal 194
zcmV;z06qVBoTZP;N(3<!L|x}AvL9&gP11S5Ag%@1uJ0>41KlG%k-@*W9q<b*C@2m^
zLCIxVPQ!;<9>!+owSr*H1hJ<OsUpXweYe!oHRM)e7OL5O(q`-M#5pmm$c&5yG~isT
z%`$6jXnO!@S-bxAlk{#43@9OK>@zli(zHnJ5HqWVCT!W!*Z8<>{)HC1`Zv1{{`SZ7
w?V!ddTA$Cq{C3CFYU9ZxmQ+eC*!%%@Ik`U@_MZ*+YTLW{X?kS$1Hb25!(j?!{r~^~

literal 0
HcmV?d00001

diff --git a/dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc b/dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc
deleted file mode 100644
index 69c6dff1ab6b88df371b5f539df78e705d1f05a5..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 194
zcmV;z06qVBoTZP;N(3<!L|x}AvL9&gP11S5Ag%@1uJ0>41KlG%k-@*W9q<b*C@2m^
zLCIxVPQ!;<9>!+owSr*H1hJ<OsUpXweYe!oHRM)e7OL5O(q`-M#5pmm$c&5yG~isT
z%`$6jXnO!@S-bxAlk{#43@9OK>@zli(zHnJ5HqWVCT!W!*Z8<>{)HC1`Zv1{{`SZ7
w?V!ddTA$Cq{C3CFYU9ZxmQ+eC*!%%@Ik`U@_MZ*+YTLW{X?kS$1Hb25!(j?!{r~^~

diff --git a/dulwich/tests/data/commits/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e b/dulwich/tests/data/commits/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e
new file mode 100644
index 0000000..9e1d726
--- /dev/null
+++ b/dulwich/tests/data/commits/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e
@@ -0,0 +1,2 @@
+xœ¥ŽË
+Â0E]ç+f/ÈäÑN"®]®g’	¶Ð*5"þ½¯Oð.Ï…ÃÉ—i¸€«¶¨a¶	UB¬E¤ÃRrŸ[’²P´\©öæÊ‹Î
°ÄTz靖-®zN¡0Q
)ZO¾Ä¼EÃ÷v¾,pàIopÒ[“'lÇǺ¨<ïÇ|ñfÖ¶k)P—œGXã{&K›þã0Ç÷?“y´MQ
\ No newline at end of file
diff --git a/dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e b/dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e
deleted file mode 100644
index 9e1d726..0000000
--- a/dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e
+++ /dev/null
@@ -1,2 +0,0 @@
-xœ¥ŽË
-Â0E]ç+f/ÈäÑN"®]®g’	¶Ð*5"þ½¯Oð.Ï…ÃÉ—i¸€«¶¨a¶	UB¬E¤ÃRrŸ[’²P´\©öæÊ‹Î
°ÄTz靖-®zN¡0Q
)ZO¾Ä¼EÃ÷v¾,pàIopÒ[“'lÇǺ¨<ïÇ|ñfÖ¶k)P—œGXã{&K›þã0Ç÷?“y´MQ
\ No newline at end of file
diff --git a/dulwich/tests/data/tags/71/033db03a03c6a36721efcf1968dd8f8e0cf023 b/dulwich/tests/data/tags/71/033db03a03c6a36721efcf1968dd8f8e0cf023
new file mode 100644
index 0000000..8c85e3a
--- /dev/null
+++ b/dulwich/tests/data/tags/71/033db03a03c6a36721efcf1968dd8f8e0cf023
@@ -0,0 +1,4 @@
+xmŽMOƒ@„=ï¯x#Ý…²€Qã¶nIME”¢ÁÛòÙíòQ
+mÊ¿—ªG's˜L2O¦SzÑÄÛ,éÁ$1¥vžšqnaJb+¥¹0u3·mMr‘
+aÔ»’¦ªdú‘ÑÉ¢ÎÒs,²=°RB bY(¥ÖãcQ	YjãæˆnØ!p	Æ­7²ƒÑâ#	Ýœ5ãîÒßõ!Xº[‡ïü§GÙ¾“M}n}ð]8mª9pézád%ëÃé
+!É#Îf|ÎX´`ª”íÉfóB½µÁKµœÃD%'“Ís¡¾¯öK¬÷¯Cäӝš5\…™<a5E£Dp‹DÕdúéÈç-=nŒ
ºoKkø=ʽ§ÿn~þ6iM
\ No newline at end of file
diff --git a/dulwich/tests/data/tags/71033db03a03c6a36721efcf1968dd8f8e0cf023 b/dulwich/tests/data/tags/71033db03a03c6a36721efcf1968dd8f8e0cf023
deleted file mode 100644
index 8c85e3a..0000000
--- a/dulwich/tests/data/tags/71033db03a03c6a36721efcf1968dd8f8e0cf023
+++ /dev/null
@@ -1,4 +0,0 @@
-xmŽMOƒ@„=ï¯x#Ý…²€Qã¶nIME”¢ÁÛòÙíòQ
-mÊ¿—ªG's˜L2O¦SzÑÄÛ,éÁ$1¥vžšqnaJb+¥¹0u3·mMr‘
-aÔ»’¦ªdú‘ÑÉ¢ÎÒs,²=°RB bY(¥ÖãcQ	YjãæˆnØ!p	Æ­7²ƒÑâ#	Ýœ5ãîÒßõ!Xº[‡ïü§GÙ¾“M}n}ð]8mª9pézád%ëÃé
-!É#Îf|ÎX´`ª”íÉfóB½µÁKµœÃD%'“Ís¡¾¯öK¬÷¯Cäӝš5\…™<a5E£Dp‹DÕdúéÈç-=nŒ
ºoKkø=ʽ§ÿn~þ6iM
\ No newline at end of file
diff --git a/dulwich/tests/data/trees/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6 b/dulwich/tests/data/trees/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6
new file mode 100644
index 0000000000000000000000000000000000000000..2569779c10cc06f9e3639d73183c80b6fbd8f243
GIT binary patch
literal 71
zcmV-N0J#5noGU3xO;s?pU@$Z=Ff%bxNMy)Q=i%RKKP9=0yJ+g~9U?DNe=D4TN+vOA
dX701=dw4)edD_xM5%wo#^F^L|0|3{t7x5-=A87yp

literal 0
HcmV?d00001

diff --git a/dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6 b/dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6
deleted file mode 100644
index 2569779c10cc06f9e3639d73183c80b6fbd8f243..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 71
zcmV-N0J#5noGU3xO;s?pU@$Z=Ff%bxNMy)Q=i%RKKP9=0yJ+g~9U?DNe=D4TN+vOA
dX701=dw4)edD_xM5%wo#^F^L|0|3{t7x5-=A87yp

diff --git a/dulwich/tests/test_objects.py b/dulwich/tests/test_objects.py
index 7cf87a9..0d275b1 100644
--- a/dulwich/tests/test_objects.py
+++ b/dulwich/tests/test_objects.py
@@ -38,6 +38,7 @@ from dulwich.objects import (
     Tag,
     format_timezone,
     hex_to_sha,
+    hex_to_filename,
     check_hexsha,
     check_identity,
     parse_timezone,
@@ -89,11 +90,11 @@ except ImportError:
 
 class BlobReadTests(unittest.TestCase):
     """Test decompression of blobs"""
-  
-    def get_sha_file(self, obj, base, sha):
-        return obj.from_file(os.path.join(os.path.dirname(__file__),
-                                          'data', base, sha))
-  
+
+    def get_sha_file(self, cls, base, sha):
+        dir = os.path.join(os.path.dirname(__file__), 'data', base)
+        return cls.from_file(hex_to_filename(dir, sha))
+
     def get_blob(self, sha):
         """Return the blob named sha from the test data dir"""
         return self.get_sha_file(Blob, 'blobs', sha)
@@ -406,8 +407,8 @@ class TreeTests(ShaFileCheckTests):
         self.assertEquals(["a.c", "a", "a/c"], [p[0] for p in x.iteritems()])
 
     def _do_test_parse_tree(self, parse_tree):
-        o = Tree.from_file(os.path.join(os.path.dirname(__file__), 'data',
-                                        'trees', tree_sha))
+        dir = os.path.join(os.path.dirname(__file__), 'data', 'trees')
+        o = Tree.from_file(hex_to_filename(dir, tree_sha))
         o._parse_file()
         self.assertEquals([('a', 0100644, a_sha), ('b', 0100644, b_sha)],
                           list(parse_tree(o.as_raw_string())))
-- 
1.7.0.3.295.gd8fa2




References