dulwich-users team mailing list archive
  
  - 
     dulwich-users team dulwich-users team
- 
    Mailing list archive
  
- 
    Message #00654
  
 [PATCH 11/13] diff_tree: Factor out a few protected	methods in RenameDetector.
  
From: Dave Borowitz <dborowitz@xxxxxxxxxx>
This is intended to make subclassing easier.
Change-Id: If6c16651e97ee0b963deb6deafb3a13f041bd283
---
 dulwich/diff_tree.py |   40 +++++++++++++++++++++++-----------------
 1 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py
index d0f4936..02c80cd 100644
--- a/dulwich/diff_tree.py
+++ b/dulwich/diff_tree.py
@@ -491,18 +491,32 @@ class RenameDetector(object):
                     self._changes.append(TreeChange(CHANGE_COPY, old, new))
         self._prune(add_paths, delete_paths)
 
-    def _find_content_renames(self):
+    def _should_find_content_renames(self):
+        return len(self._adds) * len(self._deletes) <= self._max_files ** 2
+
+    def _rename_type(self, check_paths, delete, add):
+        if check_paths and delete.old.path == add.new.path:
+            # If the paths match, this must be a split modify, so make sure it
+            # comes out as a modify.
+            return CHANGE_MODIFY
+        elif delete.type != CHANGE_DELETE:
+            # If it's in deletes but not marked as a delete, it must have been
+            # added due to find_copies_harder, and needs to be marked as a copy.
+            return CHANGE_COPY
+        return CHANGE_RENAME
+
+    def _find_content_rename_candidates(self):
+        candidates = self._candidates = []
         # TODO: Optimizations:
         #  - Compare object sizes before counting blocks.
         #  - Skip if delete's S_IFMT differs from all adds.
         #  - Skip if adds or deletes is empty.
         # Match C git's behavior of not attempting to find content renames if
         # the matrix size exceeds the threshold.
-        if len(self._adds) * len(self._deletes) > self._max_files ** 2:
+        if not self._should_find_content_renames():
             return
 
         check_paths = self._rename_threshold is not None
-        candidates = []
         for delete in self._deletes:
             if S_ISGITLINK(delete.old.mode):
                 continue  # Git links don't exist in this repo.
@@ -516,26 +530,17 @@ class RenameDetector(object):
                 score = _similarity_score(old_obj, new_obj,
                                           block_cache={old_sha: old_blocks})
                 if score > self._rename_threshold:
-                    if check_paths and delete.old.path == add.new.path:
-                        # If the paths match, this must be a split modify, so
-                        # make sure it comes out as a modify.
-                        new_type = CHANGE_MODIFY
-                    elif delete.type != CHANGE_DELETE:
-                        # If it's in deletes but not marked as a delete, it must
-                        # have been added due to find_copies_harder, and needs
-                        # to be marked as a copy.
-                        new_type = CHANGE_COPY
-                    else:
-                        new_type = CHANGE_RENAME
+                    new_type = self._rename_type(check_paths, delete, add)
                     rename = TreeChange(new_type, delete.old, add.new)
                     candidates.append((-score, rename))
 
+    def _choose_content_renames(self):
         # Sort scores from highest to lowest, but keep names in ascending order.
-        candidates.sort()
+        self._candidates.sort()
 
         delete_paths = set()
         add_paths = set()
-        for _, change in candidates:
+        for _, change in self._candidates:
             new_path = change.new.path
             if new_path in add_paths:
                 continue
@@ -588,7 +593,8 @@ class RenameDetector(object):
         self._want_unchanged = want_unchanged
         self._collect_changes(tree1_id, tree2_id)
         self._find_exact_renames()
-        self._find_content_renames()
+        self._find_content_rename_candidates()
+        self._choose_content_renames()
         self._join_modifies()
         self._prune_unchanged()
         return self._sorted_changes()
-- 
1.7.3.1
References