← Back to team overview

dulwich-users team mailing list archive

[PATCH 22/34] walk: Handle a small, fixed number of out-of-date-order commits.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: If07647d28f6636b1e2279e5eea327ba1fc56aa74
---
 dulwich/tests/test_walk.py |   16 +++++++++++-----
 dulwich/walk.py            |   22 +++++++++++++++++++---
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/dulwich/tests/test_walk.py b/dulwich/tests/test_walk.py
index ca2e2fc..0fabd4a 100644
--- a/dulwich/tests/test_walk.py
+++ b/dulwich/tests/test_walk.py
@@ -329,8 +329,14 @@ class WalkerTest(TestCase):
         self.assertWalkYields([c2], [c3.id], since=100, until=100)
         self.assertWalkYields([c2], [c3.id], since=50, until=150)
 
-    def test_since_prune(self):
-        c1, c2, c3 = self.make_linear_commits(3)
-        del self.store[c1.id]
-        self.assertRaises(MissingCommitError, list, Walker(self.store, [c3.id]))
-        self.assertWalkYields([c3], [c3.id], since=101)
+    def test_since_over_scan(self):
+        times = [9, 0, 1, 2, 3, 4, 5, 8, 6, 7, 9]
+        attrs = dict((i + 1, {'commit_time': t}) for i, t in enumerate(times))
+        commits = self.make_linear_commits(11, attrs=attrs)
+        c8, _, c10, c11 = commits[-4:]
+        del self.store[commits[0].id]
+        # c9 is older than we want to walk, but is out of order with its parent,
+        # so we need to walk past it to get to c8.
+        # c1 would also match, but we've deleted it, and it should get pruned
+        # even with over-scanning.
+        self.assertWalkYields([c11, c10, c8], [c11.id], since=7)
diff --git a/dulwich/walk.py b/dulwich/walk.py
index 502951a..35a2002 100644
--- a/dulwich/walk.py
+++ b/dulwich/walk.py
@@ -34,6 +34,9 @@ from dulwich.errors import (
 
 ORDER_DATE = 'date'
 
+# Maximum number of commits to walk past a commit time boundary.
+_MAX_EXTRA_COMMITS = 5
+
 
 class WalkEntry(object):
     """Object encapsulating a single result from a walk."""
@@ -126,6 +129,7 @@ class Walker(object):
 
         self._since = since
         self._until = until
+        self._extra_commits_left = _MAX_EXTRA_COMMITS
 
         for commit_id in itertools.chain(include, exclude):
             self._push(commit_id)
@@ -152,9 +156,21 @@ class Walker(object):
                 self._excluded.update(commit.parents)
 
             self._done.add(commit.id)
-            if self._since is None or commit.commit_time >= self._since:
-                for parent_id in commit.parents:
-                    self._push(parent_id)
+            if self._since is not None:
+                if commit.commit_time < self._since:
+                    # We want to stop walking at since, but commits at the
+                    # boundary may be out of order with respect to their
+                    # parents. So we walk _MAX_EXTRA_COMMITS more commits once
+                    # we hit this boundary.
+                    self._extra_commits_left -= 1
+                    if not self._extra_commits_left:
+                        break
+                else:
+                    # We're not at a boundary, so reset the counter.
+                    self._extra_commits_left = _MAX_EXTRA_COMMITS
+
+            for parent_id in commit.parents:
+                self._push(parent_id)
 
             if not is_excluded:
                 return commit
-- 
1.7.3.1



References