dulwich-users team mailing list archive
-
dulwich-users team
-
Mailing list archive
-
Message #00654
[PATCH 11/13] diff_tree: Factor out a few protected methods in RenameDetector.
From: Dave Borowitz <dborowitz@xxxxxxxxxx>
This is intended to make subclassing easier.
Change-Id: If6c16651e97ee0b963deb6deafb3a13f041bd283
---
dulwich/diff_tree.py | 40 +++++++++++++++++++++++-----------------
1 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py
index d0f4936..02c80cd 100644
--- a/dulwich/diff_tree.py
+++ b/dulwich/diff_tree.py
@@ -491,18 +491,32 @@ class RenameDetector(object):
self._changes.append(TreeChange(CHANGE_COPY, old, new))
self._prune(add_paths, delete_paths)
- def _find_content_renames(self):
+ def _should_find_content_renames(self):
+ return len(self._adds) * len(self._deletes) <= self._max_files ** 2
+
+ def _rename_type(self, check_paths, delete, add):
+ if check_paths and delete.old.path == add.new.path:
+ # If the paths match, this must be a split modify, so make sure it
+ # comes out as a modify.
+ return CHANGE_MODIFY
+ elif delete.type != CHANGE_DELETE:
+ # If it's in deletes but not marked as a delete, it must have been
+ # added due to find_copies_harder, and needs to be marked as a copy.
+ return CHANGE_COPY
+ return CHANGE_RENAME
+
+ def _find_content_rename_candidates(self):
+ candidates = self._candidates = []
# TODO: Optimizations:
# - Compare object sizes before counting blocks.
# - Skip if delete's S_IFMT differs from all adds.
# - Skip if adds or deletes is empty.
# Match C git's behavior of not attempting to find content renames if
# the matrix size exceeds the threshold.
- if len(self._adds) * len(self._deletes) > self._max_files ** 2:
+ if not self._should_find_content_renames():
return
check_paths = self._rename_threshold is not None
- candidates = []
for delete in self._deletes:
if S_ISGITLINK(delete.old.mode):
continue # Git links don't exist in this repo.
@@ -516,26 +530,17 @@ class RenameDetector(object):
score = _similarity_score(old_obj, new_obj,
block_cache={old_sha: old_blocks})
if score > self._rename_threshold:
- if check_paths and delete.old.path == add.new.path:
- # If the paths match, this must be a split modify, so
- # make sure it comes out as a modify.
- new_type = CHANGE_MODIFY
- elif delete.type != CHANGE_DELETE:
- # If it's in deletes but not marked as a delete, it must
- # have been added due to find_copies_harder, and needs
- # to be marked as a copy.
- new_type = CHANGE_COPY
- else:
- new_type = CHANGE_RENAME
+ new_type = self._rename_type(check_paths, delete, add)
rename = TreeChange(new_type, delete.old, add.new)
candidates.append((-score, rename))
+ def _choose_content_renames(self):
# Sort scores from highest to lowest, but keep names in ascending order.
- candidates.sort()
+ self._candidates.sort()
delete_paths = set()
add_paths = set()
- for _, change in candidates:
+ for _, change in self._candidates:
new_path = change.new.path
if new_path in add_paths:
continue
@@ -588,7 +593,8 @@ class RenameDetector(object):
self._want_unchanged = want_unchanged
self._collect_changes(tree1_id, tree2_id)
self._find_exact_renames()
- self._find_content_renames()
+ self._find_content_rename_candidates()
+ self._choose_content_renames()
self._join_modifies()
self._prune_unchanged()
return self._sorted_changes()
--
1.7.3.1
References