← Back to team overview

dulwich-users team mailing list archive

[PATCH 12/34] walk: Return only commits matching specified paths.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: I5d171b9644b6f27f5680be9cb6a45f33ac7986d6
---
 dulwich/tests/test_walk.py |   71 ++++++++++++++++++++++++++++++++++++++++++++
 dulwich/walk.py            |   62 +++++++++++++++++++++++++++++++++-----
 2 files changed, 125 insertions(+), 8 deletions(-)

diff --git a/dulwich/tests/test_walk.py b/dulwich/tests/test_walk.py
index a910679..fcb2a3c 100644
--- a/dulwich/tests/test_walk.py
+++ b/dulwich/tests/test_walk.py
@@ -150,3 +150,74 @@ class WalkerTest(TestCase):
           ]]
         self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
                               exclude=[c1.id, c2.id])
+
+    def test_path_matches(self):
+        walker = Walker(None, [], paths=['foo', 'bar', 'baz/quux'])
+        self.assertTrue(walker._path_matches('foo'))
+        self.assertTrue(walker._path_matches('foo/a'))
+        self.assertTrue(walker._path_matches('foo/a/b'))
+        self.assertTrue(walker._path_matches('bar'))
+        self.assertTrue(walker._path_matches('baz/quux'))
+        self.assertTrue(walker._path_matches('baz/quux/a'))
+
+        self.assertFalse(walker._path_matches(None))
+        self.assertFalse(walker._path_matches('oops'))
+        self.assertFalse(walker._path_matches('fool'))
+        self.assertFalse(walker._path_matches('baz'))
+        self.assertFalse(walker._path_matches('baz/quu'))
+
+    def test_paths(self):
+        blob_a1 = make_object(Blob, data='a1')
+        blob_b2 = make_object(Blob, data='b2')
+        blob_a3 = make_object(Blob, data='a3')
+        blob_b3 = make_object(Blob, data='b3')
+        c1, c2, c3 = self.make_linear_commits(
+          3, trees={1: [('a', blob_a1)],
+                    2: [('a', blob_a1), ('x/b', blob_b2)],
+                    3: [('a', blob_a3), ('x/b', blob_b3)]})
+
+        self.assertWalkYields([c3, c2, c1], [c3.id])
+        self.assertWalkYields([c3, c1], [c3.id], paths=['a'])
+        self.assertWalkYields([c3, c2], [c3.id], paths=['x/b'])
+
+        # All changes are included, not just for requested paths.
+        changes = [
+          TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
+                     ('a', F, blob_a3.id)),
+          TreeChange(CHANGE_MODIFY, ('x/b', F, blob_b2.id),
+                     ('x/b', F, blob_b3.id)),
+          ]
+        self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
+                              max_entries=1, paths=['a'])
+
+    def test_paths_subtree(self):
+        blob_a = make_object(Blob, data='a')
+        blob_b = make_object(Blob, data='b')
+        c1, c2, c3 = self.make_linear_commits(
+          3, trees={1: [('x/a', blob_a)],
+                    2: [('b', blob_b), ('x/a', blob_a)],
+                    3: [('b', blob_b), ('x/a', blob_a), ('x/b', blob_b)]})
+        self.assertWalkYields([c2], [c3.id], paths=['b'])
+        self.assertWalkYields([c3, c1], [c3.id], paths=['x'])
+
+    def test_paths_max_entries(self):
+        blob_a = make_object(Blob, data='a')
+        blob_b = make_object(Blob, data='b')
+        c1, c2 = self.make_linear_commits(
+          2, trees={1: [('a', blob_a)],
+                    2: [('a', blob_a), ('b', blob_b)]})
+        self.assertWalkYields([c2], [c2.id], paths=['b'], max_entries=1)
+        self.assertWalkYields([c1], [c1.id], paths=['a'], max_entries=1)
+
+    def test_paths_merge(self):
+        blob_a1 = make_object(Blob, data='a1')
+        blob_a2 = make_object(Blob, data='a2')
+        blob_a3 = make_object(Blob, data='a3')
+        x1, y2, m3, m4 = self.make_commits(
+          [[1], [2], [3, 1, 2], [4, 1, 2]],
+          trees={1: [('a', blob_a1)],
+                 2: [('a', blob_a2)],
+                 3: [('a', blob_a3)],
+                 4: [('a', blob_a1)]})  # Non-conflicting
+        self.assertWalkYields([m3, y2, x1], [m3.id], paths=['a'])
+        self.assertWalkYields([y2, x1], [m4.id], paths=['a'])
diff --git a/dulwich/walk.py b/dulwich/walk.py
index 2364a2a..a9d0e9c 100644
--- a/dulwich/walk.py
+++ b/dulwich/walk.py
@@ -20,6 +20,7 @@
 
 import heapq
 import itertools
+import os
 
 from dulwich.diff_tree import (
     tree_changes,
@@ -72,7 +73,7 @@ class Walker(object):
     """
 
     def __init__(self, store, include, exclude=None, order=ORDER_DATE,
-                 reverse=False, max_entries=None):
+                 reverse=False, max_entries=None, paths=None):
         """Constructor.
 
         :param store: ObjectStore instance for looking up objects.
@@ -86,6 +87,7 @@ class Walker(object):
             memory.
         :param max_entries: The maximum number of entries to yield, or None for
             no limit.
+        :param paths: Iterable of file or subtree paths to show entries for.
         """
         self._store = store
 
@@ -94,12 +96,14 @@ class Walker(object):
         self._order = order
         self._reverse = reverse
         self._max_entries = max_entries
+        self._num_entries = 0
 
         exclude = exclude or []
         self._excluded = set(exclude)
         self._pq = []
         self._pq_set = set()
         self._done = set()
+        self._paths = paths and list(paths) or None
 
         for commit_id in itertools.chain(include, exclude):
             self._push(store[commit_id])
@@ -130,16 +134,58 @@ class Walker(object):
                 return commit
         return None
 
+    def _path_matches(self, changed_path):
+        if changed_path is None:
+            return False
+        for followed_path in self._paths:
+            if changed_path == followed_path:
+                return True
+            if (changed_path.startswith(followed_path) and
+                changed_path[len(followed_path)] == '/'):
+                return True
+        return False
+
+    def _change_matches(self, change):
+        return (self._path_matches(change.old.path) or
+                self._path_matches(change.new.path))
+
     def _make_entry(self, commit):
-        if commit is None:
-            return None
-        return WalkEntry(self._store, commit)
+        """Make a WalkEntry from a commit.
+
+        :param commit: The commit for the WalkEntry.
+        :return: A WalkEntry object, or None if no entry should be returned for
+            this commit (e.g. if it doesn't match any requested  paths).
+        """
+        entry = WalkEntry(self._store, commit)
+        if self._paths is None:
+            return entry
+
+        if len(commit.parents) > 1:
+            for path_changes in entry.changes():
+                # For merge commits, only include changes with conflicts for
+                # this path. Since a rename conflict may include different
+                # old.paths, we have to check all of them.
+                for change in path_changes:
+                    if self._change_matches(change):
+                        return entry
+        else:
+            for change in entry.changes():
+                if self._change_matches(change):
+                    return entry
+        return None
 
     def _next(self):
-        limit = self._max_entries
-        if limit is not None and len(self._done) >= limit:
-            return None
-        return self._make_entry(self._pop())
+        max_entries = self._max_entries
+        while True:
+            if max_entries is not None and self._num_entries >= max_entries:
+                return None
+            commit = self._pop()
+            if commit is None:
+                return None
+            entry = self._make_entry(commit)
+            if entry:
+                self._num_entries += 1
+                return entry
 
     def __iter__(self):
         results = iter(self._next, None)
-- 
1.7.3.1



References