dulwich-users team mailing list archive
-
dulwich-users team
-
Mailing list archive
-
Message #00623
[PATCH 24/34] walk: Extract commit-time pqueue into its own class.
From: Dave Borowitz <dborowitz@xxxxxxxxxx>
Change-Id: I2fa69f8d2e14a71b1892252da544df13ea13e0c7
---
dulwich/walk.py | 172 +++++++++++++++++++++++++++++-------------------------
1 files changed, 92 insertions(+), 80 deletions(-)
diff --git a/dulwich/walk.py b/dulwich/walk.py
index b655d2c..5185b6c 100644
--- a/dulwich/walk.py
+++ b/dulwich/walk.py
@@ -76,74 +76,32 @@ class WalkEntry(object):
self.commit.id, self.changes())
-class Walker(object):
- """Object for performing a walk of commits in a store.
-
- Walker objects are initialized with a store and other options and can then
- be treated as iterators of Commit objects.
- """
-
- def __init__(self, store, include, exclude=None, order=ORDER_DATE,
- reverse=False, max_entries=None, paths=None,
- rename_detector=None, follow=False, since=None, until=None):
- """Constructor.
-
- :param store: ObjectStore instance for looking up objects.
- :param include: Iterable of SHAs of commits to include along with their
- ancestors.
- :param exclude: Iterable of SHAs of commits to exclude along with their
- ancestors, overriding includes.
- :param order: ORDER_* constant specifying the order of results. Anything
- other than ORDER_DATE may result in O(n) memory usage.
- :param reverse: If True, reverse the order of output, requiring O(n)
- memory.
- :param max_entries: The maximum number of entries to yield, or None for
- no limit.
- :param paths: Iterable of file or subtree paths to show entries for.
- :param rename_detector: diff.RenameDetector object for detecting
- renames.
- :param follow: If True, follow path across renames/copies. Forces a
- default rename_detector.
- :param since: Timestamp to list commits after.
- :param until: Timestamp to list commits before.
- """
- self.store = store
+class _CommitTimeQueue(object):
+ """Priority queue of WalkEntry objects by commit time."""
- if order not in (ORDER_DATE,):
- raise ValueError('Unknown walk order %s' % order)
- self._order = order
- self._reverse = reverse
- self._max_entries = max_entries
- self._num_entries = 0
- if follow and not rename_detector:
- rename_detector = RenameDetector(store)
- self.rename_detector = rename_detector
-
- exclude = exclude or []
- self._excluded = set(exclude)
+ def __init__(self, walker):
+ self._walker = walker
+ self._store = walker.store
+ self._excluded = walker.excluded
self._pq = []
self._pq_set = set()
self._done = set()
- self._paths = paths and set(paths) or None
- self._follow = follow
-
- self._since = since
- self._until = until
+ self._min_time = walker.since
self._extra_commits_left = _MAX_EXTRA_COMMITS
- for commit_id in itertools.chain(include, exclude):
+ for commit_id in itertools.chain(walker.include, walker.excluded):
self._push(commit_id)
def _push(self, commit_id):
try:
- commit = self.store[commit_id]
+ commit = self._store[commit_id]
except KeyError:
raise MissingCommitError(commit_id)
if commit_id not in self._pq_set and commit_id not in self._done:
heapq.heappush(self._pq, (-commit.commit_time, commit))
self._pq_set.add(commit_id)
- def _pop(self):
+ def next(self):
while self._pq:
_, commit = heapq.heappop(self._pq)
sha = commit.id
@@ -156,9 +114,9 @@ class Walker(object):
self._excluded.update(commit.parents)
self._done.add(commit.id)
- if self._since is not None:
- if commit.commit_time < self._since:
- # We want to stop walking at since, but commits at the
+ if self._min_time is not None:
+ if commit.commit_time < self._min_time:
+ # We want to stop walking at min_time, but commits at the
# boundary may be out of order with respect to their
# parents. So we walk _MAX_EXTRA_COMMITS more commits once
# we hit this boundary.
@@ -173,13 +131,68 @@ class Walker(object):
self._push(parent_id)
if not is_excluded:
- return commit
+ return WalkEntry(self._walker, commit)
return None
+
+class Walker(object):
+ """Object for performing a walk of commits in a store.
+
+ Walker objects are initialized with a store and other options and can then
+ be treated as iterators of Commit objects.
+ """
+
+ def __init__(self, store, include, exclude=None, order=ORDER_DATE,
+ reverse=False, max_entries=None, paths=None,
+ rename_detector=None, follow=False, since=None, until=None,
+ queue_cls=_CommitTimeQueue):
+ """Constructor.
+
+ :param store: ObjectStore instance for looking up objects.
+ :param include: Iterable of SHAs of commits to include along with their
+ ancestors.
+ :param exclude: Iterable of SHAs of commits to exclude along with their
+ ancestors, overriding includes.
+ :param order: ORDER_* constant specifying the order of results. Anything
+ other than ORDER_DATE may result in O(n) memory usage.
+ :param reverse: If True, reverse the order of output, requiring O(n)
+ memory.
+ :param max_entries: The maximum number of entries to yield, or None for
+ no limit.
+ :param paths: Iterable of file or subtree paths to show entries for.
+ :param rename_detector: diff.RenameDetector object for detecting
+ renames.
+ :param follow: If True, follow path across renames/copies. Forces a
+ default rename_detector.
+ :param since: Timestamp to list commits after.
+ :param until: Timestamp to list commits before.
+ :param queue_cls: A class to use for a queue of commits, supporting the
+ iterator protocol. The constructor takes a single argument, the
+ Walker.
+ """
+ if order not in (ORDER_DATE,):
+ raise ValueError('Unknown walk order %s' % order)
+ self.store = store
+ self.include = include
+ self.excluded = set(exclude or [])
+ self.order = order
+ self.reverse = reverse
+ self.max_entries = max_entries
+ self.paths = paths and set(paths) or None
+ if follow and not rename_detector:
+ rename_detector = RenameDetector(store)
+ self.rename_detector = rename_detector
+ self.follow = follow
+ self.since = since
+ self.until = until
+
+ self._num_entries = 0
+ self._queue = queue_cls(self)
+
def _path_matches(self, changed_path):
if changed_path is None:
return False
- for followed_path in self._paths:
+ for followed_path in self.paths:
if changed_path == followed_path:
return True
if (changed_path.startswith(followed_path) and
@@ -191,29 +204,29 @@ class Walker(object):
old_path = change.old.path
new_path = change.new.path
if self._path_matches(new_path):
- if self._follow and change.type in RENAME_CHANGE_TYPES:
- self._paths.add(old_path)
- self._paths.remove(new_path)
+ if self.follow and change.type in RENAME_CHANGE_TYPES:
+ self.paths.add(old_path)
+ self.paths.remove(new_path)
return True
elif self._path_matches(old_path):
return True
return False
- def _make_entry(self, commit):
- """Make a WalkEntry from a commit.
+ def _should_return(self, entry):
+ """Determine if a walk entry should be returned..
- :param commit: The commit for the WalkEntry.
- :return: A WalkEntry object, or None if no entry should be returned for
- this commit (e.g. if it doesn't match any requested paths).
+ :param entry: The WalkEntry to consider.
+ :return: True if the WalkEntry should be returned by this walk, or False
+ otherwise (e.g. if it doesn't match any requested paths).
"""
- if self._since is not None and commit.commit_time < self._since:
- return None
- if self._until is not None and commit.commit_time > self._until:
- return None
+ commit = entry.commit
+ if self.since is not None and commit.commit_time < self.since:
+ return False
+ if self.until is not None and commit.commit_time > self.until:
+ return False
- entry = WalkEntry(self, commit)
- if self._paths is None:
- return entry
+ if self.paths is None:
+ return True
if len(commit.parents) > 1:
for path_changes in entry.changes():
@@ -222,28 +235,27 @@ class Walker(object):
# old.paths, we have to check all of them.
for change in path_changes:
if self._change_matches(change):
- return entry
+ return True
else:
for change in entry.changes():
if self._change_matches(change):
- return entry
+ return True
return None
def _next(self):
- max_entries = self._max_entries
+ max_entries = self.max_entries
while True:
if max_entries is not None and self._num_entries >= max_entries:
return None
- commit = self._pop()
- if commit is None:
+ entry = self._queue.next()
+ if entry is None:
return None
- entry = self._make_entry(commit)
- if entry:
+ if self._should_return(entry):
self._num_entries += 1
return entry
def __iter__(self):
results = iter(self._next, None)
- if self._reverse:
+ if self.reverse:
results = reversed(list(results))
return iter(results)
--
1.7.3.1
References