← Back to team overview

duplicity-team team mailing list archive

[Merge] lp:~mterry/duplicity/leftover-sigtar into lp:duplicity

 

Michael Terry has proposed merging lp:~mterry/duplicity/leftover-sigtar into lp:duplicity.

Requested reviews:
  duplicity-team (duplicity-team)
Related bugs:
  Bug #1031269 in Duplicity: "remove-all-but-n-full broken"
  https://bugs.launchpad.net/duplicity/+bug/1031269

For more details, see:
https://code.launchpad.net/~mterry/duplicity/leftover-sigtar/+merge/125285

So currently, duplicity does not delete signature files when doing a remove-all-but-n operation.  Seems wrong, since those signature files are now useless and take up space.

This branch does several things:
1) Make remove-all-but-n operate on chains.  In practice it did before, since the sets it operated on always came from complete chains (i.e. it never used only some of the sets from a chain)
2) Add a new method to get all signature chains before a certain time.
3) Use this new method to also delete signature chains during remove-all-but operations.

And it cleans up the cleanuptest.py file:
1) Removes crufty, unused code
2) Disallows changing the destination folder for the test, which no one would ever want to do and isn't really supported anyway
3) Add some additional checks to the existing test
4) Adds two new methods to test remove-all-but-n and remove-all-inc-of-but-n-full
-- 
https://code.launchpad.net/~mterry/duplicity/leftover-sigtar/+merge/125285
Your team duplicity-team is requested to review the proposed merge of lp:~mterry/duplicity/leftover-sigtar into lp:duplicity.
=== modified file 'bin/duplicity'
--- bin/duplicity	2012-09-13 14:08:52 +0000
+++ bin/duplicity	2012-09-19 17:36:20 +0000
@@ -833,6 +833,10 @@
         """Return string listing times of sets in setlist"""
         return "\n".join(map(lambda s: dup_time.timetopretty(s.get_time()),
                              setlist))
+    def chain_times_str(chainlist):
+        """Return string listing times of chains in chainlist"""
+        return "\n".join(map(lambda s: dup_time.timetopretty(s.end_time),
+                             chainlist))
 
     req_list = col_stats.get_older_than_required(globals.remove_time)
     if req_list:
@@ -847,32 +851,39 @@
                    "However, it will not be deleted.  To remove all your backups, "
                    "manually purge the repository."))
 
-    setlist = col_stats.get_older_than(globals.remove_time)
-    if not setlist:
+    chainlist = col_stats.get_chains_older_than(globals.remove_time)
+    if not chainlist:
         log.Notice(_("No old backup sets found, nothing deleted."))
         return
     if globals.force:
-        log.Notice(gettext.ngettext("Deleting backup set at time:",
-                                    "Deleting backup sets at times:",
-                                    len(setlist)) +
-                   "\n" + set_times_str(setlist))
-        setlist.reverse() # save oldest for last
-        for set in setlist:
+        log.Notice(gettext.ngettext("Deleting backup chain at time:",
+                                    "Deleting backup chains at times:",
+                                    len(chainlist)) +
+                   "\n" + chain_times_str(chainlist))
+        # Add signature files too, since they won't be needed anymore
+        chainlist += col_stats.get_signature_chains_older_than(globals.remove_time)
+        chainlist.reverse() # save oldest for last
+        for chain in chainlist:
             # if remove_all_inc_of_but_n_full_mode mode, remove only incrementals one and not full
-            if globals.dry_run:
-                log.Notice("(Not: dry-run) Deleting set " + set.type + " " + dup_time.timetopretty(set.get_time()))
+            if globals.remove_all_inc_of_but_n_full_mode:
+                if isinstance(chain, collections.SignatureChain):
+                    chain_desc = _("Deleting incremental signature chain %s")
+                else:
+                    chain_desc = _("Deleting incremental backup chain %s")
             else:
-                if globals.remove_all_inc_of_but_n_full_mode and (set.type != "inc") :
-                    log.Notice("Not deleting set " + set.type + " " + dup_time.timetopretty(set.get_time()))
-                else :
-                    log.Notice("Deleting set " + set.type + " " + dup_time.timetopretty(set.get_time()))
-                    set.delete()
+                if isinstance(chain, collections.SignatureChain):
+                    chain_desc = _("Deleting complete signature chain %s")
+                else:
+                    chain_desc = _("Deleting complete backup chain %s")
+            log.Notice(chain_desc % dup_time.timetopretty(chain.end_time))
+            if not globals.dry_run:
+                chain.delete(keep_full=globals.remove_all_inc_of_but_n_full_mode)
         col_stats.set_values(sig_chain_warning=None)
     else:
-        log.Notice(gettext.ngettext("Found old backup set at the following time:",
-                                    "Found old backup sets at the following times:",
-                                    len(setlist)) +
-                   "\n" + set_times_str(setlist) + "\n" +
+        log.Notice(gettext.ngettext("Found old backup chain at the following time:",
+                                    "Found old backup chains at the following times:",
+                                    len(chainlist)) +
+                   "\n" + chain_times_str(chainlist) + "\n" +
                    _("Rerun command with --force option to actually delete."))
 
 

=== modified file 'duplicity/collections.py'
--- duplicity/collections.py	2011-11-20 16:53:09 +0000
+++ duplicity/collections.py	2012-09-19 17:36:20 +0000
@@ -326,13 +326,13 @@
         assert self.end_time
         return True
 
-    def delete(self):
+    def delete(self, keep_full=False):
         """
         Delete all sets in chain, in reverse order
         """
         for i in range(len(self.incset_list)-1, -1, -1):
             self.incset_list[i].delete()
-        if self.fullset:
+        if self.fullset and not keep_full:
             self.fullset.delete()
 
     def get_sets_at_time(self, time):
@@ -530,7 +530,7 @@
             filename_to_fileobj = self.backend.get_fileobj_read
         return map(filename_to_fileobj, self.get_filenames(time))
 
-    def delete(self):
+    def delete(self, keep_full=False):
         """
         Remove all files in signature set
         """
@@ -538,12 +538,14 @@
         if self.archive_dir:
             for i in range(len(self.inclist)-1, -1, -1):
                 self.archive_dir.append(self.inclist[i]).delete()
-            self.archive_dir.append(self.fullsig).delete()
+            if not keep_full:
+                self.archive_dir.append(self.fullsig).delete()
         else:
             assert self.backend
             inclist_copy = self.inclist[:]
             inclist_copy.reverse()
-            inclist_copy.append(self.fullsig)
+            if not keep_full:
+                inclist_copy.append(self.fullsig)
             self.backend.delete(inclist_copy)
 
     def get_filenames(self, time = None):
@@ -1009,8 +1011,6 @@
             if self.matched_chain_pair:
                 matched_sig_chain = self.matched_chain_pair[0]
                 for sig_chain in self.all_sig_chains:
-                    print sig_chain.start_time, matched_sig_chain.start_time,
-                    print sig_chain.end_time, matched_sig_chain.end_time
                     if (sig_chain.start_time == matched_sig_chain.start_time and
                         sig_chain.end_time == matched_sig_chain.end_time):
                         old_sig_chains.remove(sig_chain)
@@ -1032,10 +1032,43 @@
 
     def get_chains_older_than(self, t):
         """
-        Return a list of chains older than time t
-        """
-        assert self.values_set
-        return filter(lambda c: c.end_time < t, self.all_backup_chains)
+        Returns a list of backup chains older than the given time t
+
+        All of the times will be associated with an intact chain.
+        Furthermore, none of the times will be of a chain which a newer
+        set may depend on.  For instance, if set A is a full set older
+        than t, and set B is an incremental based on A which is newer
+        than t, then the time of set A will not be returned.
+        """
+        assert self.values_set
+        old_chains = []
+        for chain in self.all_backup_chains:
+            if chain.end_time < t and (
+                not self.matched_chain_pair or
+                chain is not self.matched_chain_pair[1]):
+                # don't delete the active (matched) chain
+                old_chains.append(chain)
+        return old_chains
+
+    def get_signature_chains_older_than(self, t):
+        """
+        Returns a list of signature chains older than the given time t
+
+        All of the times will be associated with an intact chain.
+        Furthermore, none of the times will be of a chain which a newer
+        set may depend on.  For instance, if set A is a full set older
+        than t, and set B is an incremental based on A which is newer
+        than t, then the time of set A will not be returned.
+        """
+        assert self.values_set
+        old_chains = []
+        for chain in self.all_sig_chains:
+            if chain.end_time < t and (
+                not self.matched_chain_pair or
+                chain is not self.matched_chain_pair[0]):
+                # don't delete the active (matched) chain
+                old_chains.append(chain)
+        return old_chains
 
     def get_last_full_backup_time(self):
         """
@@ -1098,10 +1131,7 @@
         """
         old_sets = []
         for chain in self.get_chains_older_than(t):
-            if (not self.matched_chain_pair or
-                chain is not self.matched_chain_pair[1]):
-                # don't delete the active (matched) chain
-                old_sets.extend(chain.get_all_sets())
+            old_sets.extend(chain.get_all_sets())
         return self.sort_sets(old_sets)
 
     def get_older_than_required(self, t):

=== modified file 'testing/tests/cleanuptest.py'
--- testing/tests/cleanuptest.py	2011-12-05 21:49:18 +0000
+++ testing/tests/cleanuptest.py	2012-09-19 17:36:20 +0000
@@ -27,16 +27,10 @@
 
 helper.setup()
 
-# This can be changed to select the URL to use
-backend_url = "file://testfiles/output"
-
 # Extra arguments to be passed to duplicity
 other_args = ["-v0", "--no-print-statistics"]
 #other_args = []
 
-# If this is set to true, after each backup, verify contents
-verify = 1
-
 class CmdError(Exception):
     """Indicates an error running an external command"""
     pass
@@ -47,6 +41,7 @@
     """
     def setUp(self):
         assert not os.system("tar xzf testfiles.tar.gz > /dev/null 2>&1")
+        self.deltmp()
 
     def tearDown(self):
         assert not os.system("rm -rf testfiles tempdir temp2.tar")
@@ -55,6 +50,7 @@
         """
         Run duplicity binary with given arguments and options
         """
+        before_files = set(os.listdir("testfiles/output"))
         options.append("--archive-dir testfiles/cache")
         cmd_list = ["duplicity"]
         cmd_list.extend(options + ["--allow-source-mismatch"])
@@ -71,6 +67,8 @@
         return_val = os.system(cmdline)
         if return_val:
             raise CmdError(return_val)
+        after_files = set(os.listdir("testfiles/output"))
+        return after_files - before_files
 
     def backup(self, type, input_dir, options = [], current_time = None):
         """
@@ -79,108 +77,90 @@
         options = options[:]
         if type == "full":
             options.insert(0, 'full')
-        args = [input_dir, "'%s'" % backend_url]
-        self.run_duplicity(args, options, current_time)
+        args = [input_dir, "file://testfiles/output"]
+        new_files = self.run_duplicity(args, options, current_time)
         # If a chain ends with time X and the next full chain begins at time X,
         # we may trigger an assert in collections.py.  This way, we avoid
         # such problems
         time.sleep(1)
-
-    def restore(self, file_to_restore = None, time = None, options = [],
-                current_time = None):
-        options = options[:] # just nip any mutability problems in bud
-        assert not os.system("rm -rf testfiles/restore_out")
-        args = ["'%s'" % backend_url, "testfiles/restore_out"]
-        if file_to_restore:
-            options.extend(['--file-to-restore', file_to_restore])
-        if time:
-            options.extend(['--restore-time', str(time)])
-        self.run_duplicity(args, options, current_time)
+        return new_files
 
     def verify(self, dirname, file_to_verify = None, time = None, options = [],
                current_time = None):
         options = ["verify"] + options[:]
-        args = ["'%s'" % backend_url, dirname]
+        args = ["file://testfiles/output", dirname]
         if file_to_verify:
             options.extend(['--file-to-restore', file_to_verify])
         if time:
             options.extend(['--restore-time', str(time)])
-        self.run_duplicity(args, options, current_time)
+        return self.run_duplicity(args, options, current_time)
 
     def cleanup(self, options = []):
         """
         Run duplicity cleanup to default directory
         """
         options = ["cleanup"] + options[:]
-        args = ["'%s'" % backend_url]
-        self.run_duplicity(args, options)
+        args = ["file://testfiles/output"]
+        return self.run_duplicity(args, options)
 
     def deltmp(self):
         """
         Delete temporary directories
         """
-        assert not os.system("rm -rf testfiles/output "
-                             "testfiles/restore_out testfiles/cache")
+        assert not os.system("rm -rf testfiles/output testfiles/cache")
         assert not os.system("mkdir testfiles/output testfiles/cache")
-        backend = duplicity.backend.get_backend(backend_url)
+        backend = duplicity.backend.get_backend("file://testfiles/output")
         bl = backend.list()
         if bl:
             backend.delete(backend.list())
         backend.close()
 
-    def runtest(self, dirlist, backup_options = [], restore_options = []):
-        """
-        Run backup/restore test on directories in dirlist
-        """
-        assert len(dirlist) >= 1
-        self.deltmp()
-
-        # Back up directories to local backend
-        current_time = 100000
-        self.backup("full", dirlist[0], current_time = current_time,
-                    options = backup_options)
-        for new_dir in dirlist[1:]:
-            current_time += 100000
-            self.backup("inc", new_dir, current_time = current_time,
-                        options = backup_options)
-
-        # Restore each and compare them
-        for i in range(len(dirlist)):
-            dirname = dirlist[i]
-            current_time = 100000*(i + 1)
-            self.restore(time = current_time, options = restore_options)
-            self.check_same(dirname, "testfiles/restore_out")
-            if verify:
-                self.verify(dirname,
-                            time = current_time, options = restore_options)
-
-    def check_same(self, filename1, filename2):
-        """
-        Verify two filenames are the same
-        """
-        path1, path2 = path.Path(filename1), path.Path(filename2)
-        assert path1.compare_recursive(path2, verbose = 1)
-
     def test_cleanup_after_partial(self):
         """
         Regression test for https://bugs.launchpad.net/bugs/409593
         where duplicity deletes all the signatures during a cleanup
         after a failed backup.
         """
-        #TODO: find something better than /etc for source
-        self.deltmp()
-        self.backup("full", "/etc", options = ["--vol 1"])
-        self.backup("inc", "/etc", options = ["--vol 1"])
-        self.backup("inc", "/etc", options = ["--vol 1"])
+        good_files = self.backup("full", "/bin", options = ["--vol 1"])
+        good_files |= self.backup("inc", "/bin", options = ["--vol 1"])
+        good_files |= self.backup("inc", "/bin", options = ["--vol 1"])
         # we know we're going to fail these, they are forced
         try:
-            self.backup("full", "/etc", options = ["--vol 1", "--fail 1"])
+            self.backup("full", "/bin", options = ["--vol 1", "--fail 1"])
+            self.fail("Not supposed to reach this far")
         except CmdError:
-            pass
+            bad_files = set(os.listdir("testfiles/output"))
+            bad_files -= good_files
+            self.assertNotEqual(bad_files, set())
         # the cleanup should go OK
         self.cleanup(options = ["--force"])
-        self.backup("inc", "/etc", options = ["--vol 1"])
-        self.verify("/etc")
+        leftovers = set(os.listdir("testfiles/output"))
+        self.assertSetEqual(good_files, leftovers)
+        self.backup("inc", "/bin", options = ["--vol 1"])
+        self.verify("/bin")
+
+    def test_remove_all_but_n(self):
+        """
+        Test that remove-all-but-n works in the simple case.
+        """
+        full1_files = self.backup("full", "testfiles/empty_dir")
+        full2_files = self.backup("full", "testfiles/empty_dir")
+        self.run_duplicity(["file://testfiles/output"],
+                           ["remove-all-but-n", "1", "--force"])
+        leftovers = set(os.listdir("testfiles/output"))
+        self.assertSetEqual(full2_files, leftovers)
+
+    def test_remove_all_inc_of_but_n(self):
+        """
+        Test that remove-all-inc-of-but-n-full works in the simple case.
+        """
+        full1_files = self.backup("full", "testfiles/empty_dir")
+        inc1_files = self.backup("inc", "testfiles/empty_dir")
+        full2_files = self.backup("full", "testfiles/empty_dir")
+        self.run_duplicity(["file://testfiles/output"],
+                           ["remove-all-inc-of-but-n-full", "1", "--force"])
+        leftovers = set(os.listdir("testfiles/output"))
+        self.assertSetEqual(full1_files | full2_files, leftovers)
 
 
 if __name__ == "__main__":


Follow ups