dulwich-users team mailing list archive
-
dulwich-users team
-
Mailing list archive
-
Message #00330
[PATCH 05/24] diff_tree: Add function to count blocks in an object.
From: Dave Borowitz <dborowitz@xxxxxxxxxx>
Change-Id: Ida66f369c2cfb4e173f1661304a15f661d379560
---
dulwich/diff_tree.py | 28 ++++++++++++++++++++++++++++
dulwich/tests/test_diff_tree.py | 24 ++++++++++++++++++++++++
2 files changed, 52 insertions(+), 0 deletions(-)
diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py
index dbe85c1..3f78ef3 100644
--- a/dulwich/diff_tree.py
+++ b/dulwich/diff_tree.py
@@ -18,9 +18,12 @@
"""Utilities for diffing files and trees."""
+from cStringIO import StringIO
+import itertools
import stat
from dulwich.misc import (
+ defaultdict,
TreeChangeTuple,
)
from dulwich.objects import (
@@ -166,3 +169,28 @@ def tree_changes(store, tree1_id, tree2_id, want_unchanged=False):
# Both were None because at least one was a tree.
continue
yield TreeChange(change_type, entry1, entry2)
+
+
+_BLOCK_SIZE = 64
+
+
+def _count_blocks(obj):
+ """Count the blocks in an object.
+
+ Splits the data into blocks either on lines or <=64-byte chunks of lines.
+
+ :param obj: The object to count blocks for.
+ :return: A dict of block -> number of occurrences.
+ """
+ block_counts = defaultdict(int)
+ block = StringIO()
+ for c in itertools.chain(*obj.as_raw_chunks()):
+ block.write(c)
+ if c == '\n' or block.tell() == _BLOCK_SIZE:
+ block_counts[block.getvalue()] += 1
+ block.seek(0)
+ block.truncate()
+ last_block = block.getvalue()
+ if last_block:
+ block_counts[last_block] += 1
+ return block_counts
diff --git a/dulwich/tests/test_diff_tree.py b/dulwich/tests/test_diff_tree.py
index 1225e1b..2cefd45 100644
--- a/dulwich/tests/test_diff_tree.py
+++ b/dulwich/tests/test_diff_tree.py
@@ -27,6 +27,7 @@ from dulwich.diff_tree import (
TreeChange,
_merge_entries,
tree_changes,
+ _count_blocks,
)
from dulwich.index import (
commit_tree,
@@ -35,6 +36,7 @@ from dulwich.object_store import (
MemoryObjectStore,
)
from dulwich.objects import (
+ ShaFile,
Blob,
)
from dulwich.tests import (
@@ -247,3 +249,25 @@ class TreeChangesTest(TestCase):
[TreeChange(CHANGE_MODIFY, ('a', 0100644, blob_a1.id),
('a', 0100644, blob_a2.id))],
tree1, tree2)
+
+
+class RenameDetectionTest(TestCase):
+
+ def test_count_blocks(self):
+ blob = make_object(Blob, data='a\nb\na\n')
+ self.assertEqual({'a\n': 2, 'b\n': 1}, _count_blocks(blob))
+
+ def test_count_blocks_no_newline(self):
+ blob = make_object(Blob, data='a\na')
+ self.assertEqual({'a\n': 1, 'a': 1}, _count_blocks(blob))
+
+ def test_count_blocks_chunks(self):
+ blob = ShaFile.from_raw_chunks(Blob.type_num, ['a\nb', '\na\n'])
+ self.assertEqual({'a\n': 2, 'b\n': 1}, _count_blocks(blob))
+
+ def test_count_blocks_long_lines(self):
+ a = 'a' * 64
+ data = a + 'xxx\ny\n' + a + 'zzz\n'
+ blob = make_object(Blob, data=data)
+ self.assertEqual({'a' * 64: 2, 'xxx\n': 1, 'y\n': 1, 'zzz\n': 1},
+ _count_blocks(blob))
--
1.7.3.2.168.gd6b63
References