← Back to team overview

dulwich-users team mailing list archive

[PATCH 05/24] diff_tree: Add function to count blocks in an object.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: Ida66f369c2cfb4e173f1661304a15f661d379560
---
 dulwich/diff_tree.py            |   28 ++++++++++++++++++++++++++++
 dulwich/tests/test_diff_tree.py |   24 ++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 0 deletions(-)

diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py
index dbe85c1..3f78ef3 100644
--- a/dulwich/diff_tree.py
+++ b/dulwich/diff_tree.py
@@ -18,9 +18,12 @@
 
 """Utilities for diffing files and trees."""
 
+from cStringIO import StringIO
+import itertools
 import stat
 
 from dulwich.misc import (
+    defaultdict,
     TreeChangeTuple,
     )
 from dulwich.objects import (
@@ -166,3 +169,28 @@ def tree_changes(store, tree1_id, tree2_id, want_unchanged=False):
             # Both were None because at least one was a tree.
             continue
         yield TreeChange(change_type, entry1, entry2)
+
+
+_BLOCK_SIZE = 64
+
+
+def _count_blocks(obj):
+    """Count the blocks in an object.
+
+    Splits the data into blocks either on lines or <=64-byte chunks of lines.
+
+    :param obj: The object to count blocks for.
+    :return: A dict of block -> number of occurrences.
+    """
+    block_counts = defaultdict(int)
+    block = StringIO()
+    for c in itertools.chain(*obj.as_raw_chunks()):
+        block.write(c)
+        if c == '\n' or block.tell() == _BLOCK_SIZE:
+            block_counts[block.getvalue()] += 1
+            block.seek(0)
+            block.truncate()
+    last_block = block.getvalue()
+    if last_block:
+        block_counts[last_block] += 1
+    return block_counts
diff --git a/dulwich/tests/test_diff_tree.py b/dulwich/tests/test_diff_tree.py
index 1225e1b..2cefd45 100644
--- a/dulwich/tests/test_diff_tree.py
+++ b/dulwich/tests/test_diff_tree.py
@@ -27,6 +27,7 @@ from dulwich.diff_tree import (
     TreeChange,
     _merge_entries,
     tree_changes,
+    _count_blocks,
     )
 from dulwich.index import (
     commit_tree,
@@ -35,6 +36,7 @@ from dulwich.object_store import (
     MemoryObjectStore,
     )
 from dulwich.objects import (
+    ShaFile,
     Blob,
     )
 from dulwich.tests import (
@@ -247,3 +249,25 @@ class TreeChangesTest(TestCase):
           [TreeChange(CHANGE_MODIFY, ('a', 0100644, blob_a1.id),
                       ('a', 0100644, blob_a2.id))],
           tree1, tree2)
+
+
+class RenameDetectionTest(TestCase):
+
+    def test_count_blocks(self):
+        blob = make_object(Blob, data='a\nb\na\n')
+        self.assertEqual({'a\n': 2, 'b\n': 1}, _count_blocks(blob))
+
+    def test_count_blocks_no_newline(self):
+        blob = make_object(Blob, data='a\na')
+        self.assertEqual({'a\n': 1, 'a': 1}, _count_blocks(blob))
+
+    def test_count_blocks_chunks(self):
+        blob = ShaFile.from_raw_chunks(Blob.type_num, ['a\nb', '\na\n'])
+        self.assertEqual({'a\n': 2, 'b\n': 1}, _count_blocks(blob))
+
+    def test_count_blocks_long_lines(self):
+        a = 'a' * 64
+        data = a + 'xxx\ny\n' + a + 'zzz\n'
+        blob = make_object(Blob, data=data)
+        self.assertEqual({'a' * 64: 2, 'xxx\n': 1, 'y\n': 1, 'zzz\n': 1},
+                         _count_blocks(blob))
-- 
1.7.3.2.168.gd6b63




References