← Back to team overview

dulwich-users team mailing list archive

[PATCH 30/34] walk: Exclude parents more aggressively.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: I74ebbc4812233708755ad67ce77df0227ccb1309
---
 dulwich/walk.py |   27 ++++++++++++++++++++++-----
 1 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/dulwich/walk.py b/dulwich/walk.py
index 34ba3f1..8050a33 100644
--- a/dulwich/walk.py
+++ b/dulwich/walk.py
@@ -95,6 +95,7 @@ class _CommitTimeQueue(object):
         self._excluded = walker.excluded
         self._pq = []
         self._pq_set = set()
+        self._seen = set()
         self._done = set()
         self._min_time = walker.since
         self._extra_commits_left = _MAX_EXTRA_COMMITS
@@ -111,6 +112,22 @@ class _CommitTimeQueue(object):
         if commit_id not in self._pq_set and commit_id not in self._done:
             heapq.heappush(self._pq, (-commit.commit_time, commit))
             self._pq_set.add(commit_id)
+            self._seen.add(commit_id)
+
+    def _exclude_parents(self, commit):
+        excluded = self._excluded
+        seen = self._seen
+        todo = [commit]
+        while todo:
+            commit = todo.pop()
+            for parent in commit.parents:
+                if parent not in excluded and parent in seen:
+                    # TODO: This is inefficient unless the object store does
+                    # some caching (which DiskObjectStore currently does not).
+                    # We could either add caching in this class or pass around
+                    # parsed queue entry objects instead of commits.
+                    todo.append(self._store[parent])
+                excluded.add(parent)
 
     def next(self):
         if self._is_finished:
@@ -121,12 +138,15 @@ class _CommitTimeQueue(object):
             self._pq_set.remove(sha)
             if sha in self._done:
                 continue
+            self._done.add(commit.id)
+
+            for parent_id in commit.parents:
+                self._push(parent_id)
 
             is_excluded = sha in self._excluded
             if is_excluded:
-                self._excluded.update(commit.parents)
+                self._exclude_parents(commit)
 
-            self._done.add(commit.id)
             if self._min_time is not None:
                 if commit.commit_time < self._min_time:
                     # We want to stop walking at min_time, but commits at the
@@ -140,9 +160,6 @@ class _CommitTimeQueue(object):
                     # We're not at a boundary, so reset the counter.
                     self._extra_commits_left = _MAX_EXTRA_COMMITS
 
-            for parent_id in commit.parents:
-                self._push(parent_id)
-
             if not is_excluded:
                 return WalkEntry(self._walker, commit)
         self._is_finished = True
-- 
1.7.3.1



References