← Back to team overview

dulwich-users team mailing list archive

[PATCH 27/34] walk: Allow topological ordering.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Topological ordering is applied by correcting the original commit time
order output. This is fairly slow, requiring O(N) additional time and
storage compared to not reordering. (Note that the O(N) estimate is
under the assumption that commits are not too far out of topological
order.)

Change-Id: I77713a036ff69a4f5f7fb36a5bbbe7193e3e4721
---
 dulwich/tests/test_walk.py |   57 ++++++++++++++++++++++++++++++++++++++++++++
 dulwich/walk.py            |   56 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 108 insertions(+), 5 deletions(-)

diff --git a/dulwich/tests/test_walk.py b/dulwich/tests/test_walk.py
index ead7bd1..c5d970e 100644
--- a/dulwich/tests/test_walk.py
+++ b/dulwich/tests/test_walk.py
@@ -18,6 +18,9 @@
 
 """Tests for commit walking functionality."""
 
+from dulwich._compat import (
+    permutations,
+    )
 from dulwich.diff_tree import (
     CHANGE_ADD,
     CHANGE_MODIFY,
@@ -37,8 +40,10 @@ from dulwich.objects import (
     Blob,
     )
 from dulwich.walk import (
+    ORDER_TOPO,
     WalkEntry,
     Walker,
+    _topo_reorder
     )
 from dulwich.tests import TestCase
 from utils import (
@@ -135,6 +140,14 @@ class WalkerTest(TestCase):
         self.assertWalkYields([y4], [y4.id], exclude=[x3.id])
         self.assertWalkYields([x3, x2], [x3.id], exclude=[y4.id])
 
+    def test_merge(self):
+        c1, c2, c3, c4 = self.make_commits([[1], [2, 1], [3, 1], [4, 2, 3]])
+        self.assertWalkYields([c4, c3, c2, c1], [c4.id])
+        self.assertWalkYields([c3, c1], [c3.id])
+        self.assertWalkYields([c2, c1], [c2.id])
+        self.assertWalkYields([c4, c3], [c4.id], exclude=[c2.id])
+        self.assertWalkYields([c4, c2], [c4.id], exclude=[c3.id])
+
     def test_reverse(self):
         c1, c2, c3 = self.make_linear_commits(3)
         self.assertWalkYields([c1, c2, c3], [c3.id], reverse=True)
@@ -344,3 +357,47 @@ class WalkerTest(TestCase):
         # c1 would also match, but we've deleted it, and it should get pruned
         # even with over-scanning.
         self.assertWalkYields([c11, c10, c8], [c11.id], since=7)
+
+    def assertTopoOrderEqual(self, expected_commits, commits):
+        entries = [TestWalkEntry(c, None) for c in commits]
+        actual_ids = [e.commit.id for e in list(_topo_reorder(entries))]
+        self.assertEqual([c.id for c in expected_commits], actual_ids)
+
+    def test_topo_reorder_linear(self):
+        commits = self.make_linear_commits(5)
+        commits.reverse()
+        for perm in permutations(commits):
+            self.assertTopoOrderEqual(commits, perm)
+
+    def test_topo_reorder_multiple_parents(self):
+        c1, c2, c3 = self.make_commits([[1], [2], [3, 1, 2]])
+        # Already sorted, so totally FIFO.
+        self.assertTopoOrderEqual([c3, c2, c1], [c3, c2, c1])
+        self.assertTopoOrderEqual([c3, c1, c2], [c3, c1, c2])
+
+        # c3 causes one parent to be yielded.
+        self.assertTopoOrderEqual([c3, c2, c1], [c2, c3, c1])
+        self.assertTopoOrderEqual([c3, c1, c2], [c1, c3, c2])
+
+        # c3 causes both parents to be yielded.
+        self.assertTopoOrderEqual([c3, c2, c1], [c1, c2, c3])
+        self.assertTopoOrderEqual([c3, c2, c1], [c2, c1, c3])
+
+    def test_topo_reorder_multiple_children(self):
+        c1, c2, c3 = self.make_commits([[1], [2, 1], [3, 1]])
+
+        # c2 and c3 are FIFO but c1 moves to the end.
+        self.assertTopoOrderEqual([c3, c2, c1], [c3, c2, c1])
+        self.assertTopoOrderEqual([c3, c2, c1], [c3, c1, c2])
+        self.assertTopoOrderEqual([c3, c2, c1], [c1, c3, c2])
+
+        self.assertTopoOrderEqual([c2, c3, c1], [c2, c3, c1])
+        self.assertTopoOrderEqual([c2, c3, c1], [c2, c1, c3])
+        self.assertTopoOrderEqual([c2, c3, c1], [c1, c2, c3])
+
+    def test_out_of_order_children(self):
+        c1, c2, c3, c4, c5 = self.make_commits(
+          [[1], [2, 1], [3, 2], [4, 1], [5, 3, 4]],
+          times=[2, 1, 3, 4, 5])
+        self.assertWalkYields([c5, c4, c3, c1, c2], [c5.id])
+        self.assertWalkYields([c5, c4, c3, c2, c1], [c5.id], order=ORDER_TOPO)
diff --git a/dulwich/walk.py b/dulwich/walk.py
index a54f52d..83b2939 100644
--- a/dulwich/walk.py
+++ b/dulwich/walk.py
@@ -18,6 +18,13 @@
 
 """General implementation of walking commits and their contents."""
 
+
+try:
+    from collections import defaultdict
+except ImportError:
+    from _compat import defaultdict
+
+import collections
 import heapq
 import itertools
 import os
@@ -33,6 +40,9 @@ from dulwich.errors import (
     )
 
 ORDER_DATE = 'date'
+ORDER_TOPO = 'topo'
+
+ALL_ORDERS = (ORDER_DATE, ORDER_TOPO)
 
 # Maximum number of commits to walk past a commit time boundary.
 _MAX_EXTRA_COMMITS = 5
@@ -170,7 +180,7 @@ class Walker(object):
             iterator protocol. The constructor takes a single argument, the
             Walker.
         """
-        if order not in (ORDER_DATE,):
+        if order not in ALL_ORDERS:
             raise ValueError('Unknown walk order %s' % order)
         self.store = store
         self.include = include
@@ -257,14 +267,50 @@ class Walker(object):
     def _reorder(self, results):
         """Possibly reorder a results iterator.
 
-        :param results: An iterator of results, in the order returned from the
-            queue_cls.
-        :return: An iterator or list of results, in the order required by the
-            Walker.
+        :param results: An iterator of WalkEntry objects, in the order returned
+            from the queue_cls.
+        :return: An iterator or list of WalkEntry objects, in the order required
+            by the Walker.
         """
+        if self.order == ORDER_TOPO:
+            results = _topo_reorder(results)
         if self.reverse:
             results = reversed(list(results))
         return results
 
     def __iter__(self):
         return iter(self._reorder(iter(self._next, None)))
+
+
+def _topo_reorder(entries):
+    """Reorder an iterable of entries topologically.
+
+    This works best assuming the entries are already in almost-topological
+    order, e.g. in commit time order.
+
+    :param entries: An iterable of WalkEntry objects.
+    :yield: WalkEntry objects from entries in FIFO order, except where a parent
+        would be yielded before any of its children.
+    """
+    todo = collections.deque()
+    pending = {}
+    num_children = defaultdict(int)
+    for entry in entries:
+        todo.append(entry)
+        for p in entry.commit.parents:
+            num_children[p] += 1
+
+    while todo:
+        entry = todo.popleft()
+        commit = entry.commit
+        commit_id = commit.id
+        if num_children[commit_id]:
+            pending[commit_id] = entry
+            continue
+        for parent_id in commit.parents:
+            num_children[parent_id] -= 1
+            if not num_children[parent_id]:
+                parent_entry = pending.pop(parent_id, None)
+                if parent_entry:
+                    todo.appendleft(parent_entry)
+        yield entry
-- 
1.7.3.1



References