← Back to team overview

duplicity-team team mailing list archive

[Merge] lp:~mterry/duplicity/disappearing-source into lp:duplicity

 

Michael Terry has proposed merging lp:~mterry/duplicity/disappearing-source into lp:duplicity with lp:~mterry/duplicity/manifest-oddities as a prerequisite.

Requested reviews:
  duplicity-team (duplicity-team)

For more details, see:
https://code.launchpad.net/~mterry/duplicity/disappearing-source/+merge/195461

When restarting a backup, we may accidentally skip the first chunk of one of the source files.  To reproduce this,:
1) interrupt a backup
2) delete the source file it was in the middle of
3) restart the backup

When replaying the source iterator to find where to resume from, we can't notice that the file is gone until we've already iterated past where it would be!

The solution I came up with is to just let duplicity stuff the data we accidentally read back into the source iterator.

This is actually a data loss bug, because it's possible to back up corrupted files (that are missing their first chunk).
-- 
https://code.launchpad.net/~mterry/duplicity/disappearing-source/+merge/195461
Your team duplicity-team is requested to review the proposed merge of lp:~mterry/duplicity/disappearing-source into lp:duplicity.
=== modified file 'bin/duplicity'
--- bin/duplicity	2013-09-20 01:32:31 +0000
+++ bin/duplicity	2013-11-16 02:20:57 +0000
@@ -223,7 +223,8 @@
     last_block = globals.restart.last_block
     try:
         # Just spin our wheels
-        while tarblock_iter.next():
+        iter_result = tarblock_iter.next()
+        while iter_result:
             if (tarblock_iter.previous_index == last_index):
                 # If both the previous index and this index are done, exit now
                 # before we hit the next index, to prevent skipping its first
@@ -238,13 +239,15 @@
                            "Continuing restart on file %s.") %
                            ("/".join(last_index), "/".join(tarblock_iter.previous_index)),
                            log.ErrorCode.restart_file_not_found)
+                # We went too far! Stuff the data back into place before restarting
+                tarblock_iter.queue_index_data(iter_result)
                 break
+            iter_result = tarblock_iter.next()
     except StopIteration:
         log.Warn(_("File %s missing in backup set.\n"
                    "Continuing restart on file %s.") %
                    ("/".join(last_index), "/".join(tarblock_iter.previous_index)),
                    log.ErrorCode.restart_file_not_found)
-    return 0
 
 
 def write_multivol(backup_type, tarblock_iter, man_outfp, sig_outfp, backend):

=== modified file 'duplicity/diffdir.py'
--- duplicity/diffdir.py	2013-04-27 14:48:39 +0000
+++ duplicity/diffdir.py	2013-11-16 02:20:57 +0000
@@ -469,6 +469,7 @@
         self.remember_next = False          # see remember_next_index()
         self.remember_value = None          # holds index of next block
         self.remember_block = None          # holds block of next block
+        self.queued_data = None             # data to return in next next() call
 
     def tarinfo2tarblock(self, index, tarinfo, file_data = ""):
         """
@@ -504,6 +505,12 @@
         """
         Return next block and update offset
         """
+        if self.queued_data is not None:
+            result = self.queued_data
+            self.queued_data = None
+            # Keep rest of metadata as is (like previous_index)
+            return result
+
         if self.process_waiting:
             result = self.process_continued()
         else:
@@ -532,6 +539,12 @@
         """
         return self.previous_index, self.previous_block
 
+    def queue_index_data(self, data):
+        """
+        Next time next() is called, we will return data instead of processing
+        """
+        self.queued_data = data
+
     def remember_next_index(self):
         """
         When called, remember the index of the next block iterated

=== modified file 'testing/tests/restarttest.py'
--- testing/tests/restarttest.py	2013-11-16 02:20:57 +0000
+++ testing/tests/restarttest.py	2013-11-16 02:20:57 +0000
@@ -446,11 +446,13 @@
         assert not os.system("diff %s/file1 testfiles/restore_out/file1" % source)
         assert not os.system("diff %s/z testfiles/restore_out/z" % source)
 
-    def test_dangling_manifest_volume(self):
+    def test_changed_source_dangling_manifest_volume(self):
         """
         If we restart but find remote volumes missing, we can easily end up
         with a manifest that lists "vol1, vol2, vol3, vol2", leaving a dangling
-        vol3.  Make sure we can gracefully handle that.
+        vol3.  Make sure we can gracefully handle that.  This will only happen
+        if the source data changes to be small enough to not create a vol3 on
+        restart.
         """
         source = 'testfiles/largefiles'
         self.make_largefiles(count=5, size=1)
@@ -460,7 +462,7 @@
             self.fail()
         except CmdError, e:
             self.assertEqual(30, e.exit_status)
-        # now delete the last volume on remote end and change source data
+        # now delete the last volume on remote end and some source files
         assert not os.system("rm testfiles/output/duplicity-full*vol3.difftar*")
         assert not os.system("rm %s/file[2345]" % source)
         assert not os.system("echo hello > %s/z" % source)
@@ -469,6 +471,30 @@
         # and verify we can restore
         self.restore()
 
+    def test_changed_source_file_disappears(self):
+        """
+        Make sure we correctly handle restarting a backup when a file
+        disappears when we had been in the middle of backing it up.  It's
+        possible that the first chunk of the next file will be skipped unless
+        we're careful.
+        """
+        source = 'testfiles/largefiles'
+        self.make_largefiles(count=1)
+        # intentionally interrupt initial backup
+        try:
+            self.backup("full", source, options = ["--vol 1", "--fail 2"])
+            self.fail()
+        except CmdError, e:
+            self.assertEqual(30, e.exit_status)
+        # now remove starting source data and make sure we add something after
+        assert not os.system("rm %s/*" % source)
+        assert not os.system("echo hello > %s/z" % source)
+        # finish backup
+        self.backup("full", source)
+        # and verify we can restore
+        self.restore()
+        assert not os.system("diff %s/z testfiles/restore_out/z" % source)
+
 
 # Note that this class duplicates all the tests in RestartTest
 class RestartTestWithoutEncryption(RestartTest):


Follow ups