← Back to team overview

dulwich-users team mailing list archive

[PATCH 06/34] Add a simple, extensible framework for commit walking.

 

From: Dave Borowitz <dborowitz@xxxxxxxxxx>

Change-Id: I0b2d440461ae1e84f75b40eec48720bd43a27ce3
---
 NEWS                       |    3 +
 dulwich/tests/test_walk.py |   77 +++++++++++++++++++++++++++++++++++
 dulwich/walk.py            |   97 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 177 insertions(+), 0 deletions(-)
 create mode 100644 dulwich/tests/test_walk.py
 create mode 100644 dulwich/walk.py

diff --git a/NEWS b/NEWS
index a064cac..838b717 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,9 @@
     a pack, with implementations for pack indexing and inflation.
     (Dave Borowitz)
 
+  * New walk module with a Walker class for customizable commit walking.
+    (Dave Borowitz)
+
  BUG FIXES
 
   * Avoid storing all objects in memory when writing pack.
diff --git a/dulwich/tests/test_walk.py b/dulwich/tests/test_walk.py
new file mode 100644
index 0000000..938ef8a
--- /dev/null
+++ b/dulwich/tests/test_walk.py
@@ -0,0 +1,77 @@
+# test_walk.py -- Tests for commit walking functionality.
+# Copyright (C) 2010 Google, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""Tests for commit walking functionality."""
+
+from dulwich.object_store import (
+    MemoryObjectStore,
+    )
+from dulwich.walk import (
+    Walker,
+    )
+from dulwich.tests import TestCase
+from utils import (
+    build_commit_graph,
+    )
+
+
+class WalkerTest(TestCase):
+
+    def setUp(self):
+        self.store = MemoryObjectStore()
+
+    def make_commits(self, commit_spec, **kwargs):
+        return build_commit_graph(self.store, commit_spec, **kwargs)
+
+    def make_linear_commits(self, num_commits, **kwargs):
+        commit_spec = []
+        for i in xrange(1, num_commits + 1):
+            c = [i]
+            if i > 1:
+                c.append(i - 1)
+            commit_spec.append(c)
+        return self.make_commits(commit_spec, **kwargs)
+
+    def assertWalkYields(self, expected, *args, **kwargs):
+        walker = Walker(self.store, *args, **kwargs)
+        actual = list(walker)
+        self.assertEqual(expected, actual)
+
+    def test_linear(self):
+        c1, c2, c3 = self.make_linear_commits(3)
+        self.assertWalkYields([c1], [c1.id])
+        self.assertWalkYields([c2, c1], [c2.id])
+        self.assertWalkYields([c3, c2, c1], [c3.id])
+        self.assertWalkYields([c3, c2, c1], [c3.id, c1.id])
+        self.assertWalkYields([c3, c2], [c3.id], exclude=[c1.id])
+        self.assertWalkYields([c3, c2], [c3.id, c1.id], exclude=[c1.id])
+        self.assertWalkYields([c3], [c3.id, c1.id], exclude=[c2.id])
+
+    def test_branch(self):
+        c1, x2, x3, y4 = self.make_commits([[1], [2, 1], [3, 2], [4, 1]])
+        self.assertWalkYields([x3, x2, c1], [x3.id])
+        self.assertWalkYields([y4, c1], [y4.id])
+        self.assertWalkYields([y4, x2, c1], [y4.id, x2.id])
+        self.assertWalkYields([y4, x2], [y4.id, x2.id], exclude=[c1.id])
+        self.assertWalkYields([y4, x3], [y4.id, x3.id], exclude=[x2.id])
+        self.assertWalkYields([y4], [y4.id], exclude=[x3.id])
+        self.assertWalkYields([x3, x2], [x3.id], exclude=[y4.id])
+
+    def test_reverse(self):
+        c1, c2, c3 = self.make_linear_commits(3)
+        self.assertWalkYields([c1, c2, c3], [c3.id], reverse=True)
diff --git a/dulwich/walk.py b/dulwich/walk.py
new file mode 100644
index 0000000..ac14877
--- /dev/null
+++ b/dulwich/walk.py
@@ -0,0 +1,97 @@
+# walk.py -- General implementation of walking commits and their contents.
+# Copyright (C) 2010 Google, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# or (at your option) any later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""General implementation of walking commits and their contents."""
+
+import heapq
+import itertools
+
+ORDER_DATE = 'date'
+
+
+class Walker(object):
+    """Object for performing a walk of commits in a store.
+
+    Walker objects are initialized with a store and other options and can then
+    be treated as iterators of Commit objects.
+    """
+
+    def __init__(self, store, include, exclude=None, order=ORDER_DATE,
+                 reverse=False):
+        """Constructor.
+
+        :param store: ObjectStore instance for looking up objects.
+        :param include: Iterable of SHAs of commits to include along with their
+            ancestors.
+        :param exclude: Iterable of SHAs of commits to exclude along with their
+            ancestors, overriding includes.
+        :param order: ORDER_* constant specifying the order of results. Anything
+            other than ORDER_DATE may result in O(n) memory usage.
+        :param reverse: If True, reverse the order of output, requiring O(n)
+            memory.
+        """
+        self._store = store
+
+        if order not in (ORDER_DATE,):
+            raise ValueError('Unknown walk order %s' % order)
+        self._order = order
+        self._reverse = reverse
+
+        exclude = exclude or []
+        self._excluded = set(exclude)
+        self._pq = []
+        self._pq_set = set()
+        self._done = set()
+
+        for commit_id in itertools.chain(include, exclude):
+            self._push(store[commit_id])
+
+    def _push(self, commit):
+        sha = commit.id
+        if sha not in self._pq_set and sha not in self._done:
+            heapq.heappush(self._pq, (-commit.commit_time, commit))
+            self._pq_set.add(sha)
+
+    def _pop(self):
+        while self._pq:
+            _, commit = heapq.heappop(self._pq)
+            sha = commit.id
+            self._pq_set.remove(sha)
+            if sha in self._done:
+                continue
+
+            is_excluded = sha in self._excluded
+            if is_excluded:
+                self._excluded.update(commit.parents)
+
+            self._done.add(commit.id)
+            for parent_id in commit.parents:
+                self._push(self._store[parent_id])
+
+            if not is_excluded:
+                return commit
+        return None
+
+    def _next(self):
+        return self._pop()
+
+    def __iter__(self):
+        results = iter(self._next, None)
+        if self._reverse:
+            results = reversed(list(results))
+        return iter(results)
-- 
1.7.3.1



References