← Back to team overview

duplicity-team team mailing list archive

[Merge] lp:~duplicity-team/duplicity/check-volumes into lp:duplicity

 

Michael Terry has proposed merging lp:~duplicity-team/duplicity/check-volumes into lp:duplicity.

Requested reviews:
  Michael Terry (mterry)

For more details, see:
https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826

This is the first pass of checking each volume's size as it is uploaded, as discussed in https://code.launchpad.net/~mterry/duplicity/early-catch-498933/+merge/72607
-- 
https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826
Your team duplicity-team is subscribed to branch lp:~duplicity-team/duplicity/check-volumes.
=== modified file 'duplicity-bin'
--- duplicity-bin	2011-08-23 13:37:06 +0000
+++ duplicity-bin	2011-08-29 03:36:23 +0000
@@ -269,14 +269,28 @@
             end_block -= 1
         return start_index, start_block, end_index, end_block
 
-    def put(tdp, dest_filename):
+    def validate_block(tdp, dest_filename):
+        info = backend.query_info([dest_filename])[dest_filename]
+        if 'size' not in info:
+            return # backend didn't know how to query size
+        size = info['size']
+        if size is None:
+            return # error querying file
+        if size != tdp.getsize():
+            code_extra = "%s %d %d" % (util.escape(dest_filename), tdp.getsize(), size)
+            log.FatalError(_("File %s was corrupted during upload.") % dest_filename,
+                           log.ErrorCode.volume_wrong_size, code_extra)
+
+    def put(tdp, dest_filename, vol_num):
         """
         Retrieve file size *before* calling backend.put(), which may (at least
         in case of the localbackend) rename the temporary file to the target
         instead of copying.
         """
         putsize = tdp.getsize()
-        backend.put(tdp, dest_filename)
+        if globals.skip_volume != vol_num: # for testing purposes only
+            backend.put(tdp, dest_filename)
+        validate_block(tdp, dest_filename)
         if tdp.stat:
             tdp.delete()
         return putsize
@@ -350,8 +364,8 @@
             sig_outfp.flush()
             man_outfp.flush()
 
-        async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename: put(tdp, dest_filename),
-                                                        (tdp, dest_filename)))
+        async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename, vol_num: put(tdp, dest_filename, vol_num),
+                                                        (tdp, dest_filename, vol_num)))
 
         # Log human-readable version as well as raw numbers for machine consumers
         log.Progress('Processed volume %d' % vol_num, diffdir.stats.SourceFileSize)

=== modified file 'duplicity/backend.py'
--- duplicity/backend.py	2011-08-06 15:57:54 +0000
+++ duplicity/backend.py	2011-08-29 03:36:23 +0000
@@ -361,6 +361,35 @@
         """
         raise NotImplementedError()
 
+    # Should never cause FatalError.
+    # Returns a dictionary of dictionaries.  The outer dictionary maps
+    # filenames to metadata dictionaries.  Supported metadata are:
+    #
+    # 'size': if >= 0, size of file
+    #         if -1, file is not found
+    #         if None, error querying file
+    #
+    # Returned dictionary is guaranteed to contain a metadata dictionary for
+    # each filename, but not all metadata are guaranteed to be present.
+    def query_info(self, filename_list, raise_errors=True):
+        """
+        Return metadata about each filename in filename_list
+        """
+        info = {}
+        if hasattr(self, '_query_list_info'):
+            info = self._query_list_info(filename_list)
+        elif hasattr(self, '_query_file_info'):
+            for filename in filename_list:
+                info[filename] = self._query_file_info(filename)
+
+        # Fill out any missing entries (may happen if backend has no support
+        # or its query_list support is lazy)
+        for filename in filename_list:
+            if filename not in info:
+                info[filename] = {}
+
+        return info
+
     """ use getpass by default, inherited backends may overwrite this behaviour """
     use_getpass = True
 

=== modified file 'duplicity/backends/botobackend.py'
--- duplicity/backends/botobackend.py	2011-04-04 13:01:12 +0000
+++ duplicity/backends/botobackend.py	2011-08-29 03:36:23 +0000
@@ -26,6 +26,7 @@
 from duplicity import log
 from duplicity.errors import * #@UnusedWildImport
 from duplicity.util import exception_traceback
+from duplicity.backend import retry
 
 class BotoBackend(duplicity.backend.Backend):
     """
@@ -294,6 +295,24 @@
             self.bucket.delete_key(self.key_prefix + filename)
             log.Debug("Deleted %s/%s" % (self.straight_url, filename))
 
+    @retry
+    def _query_file_info(self, filename, raise_errors=False):
+        try:
+            key = self.bucket.lookup(self.key_prefix + filename)
+            if key is None:
+                return {'size': -1}
+            return {'size': key.size}
+        except Exception, e:
+            log.Warn("Query %s/%s failed: %s"
+                     "" % (self.straight_url,
+                           filename,
+                           str(e)))
+            self.resetConnection()
+            if raise_errors:
+                raise e
+            else:
+                return {'size': None}
+
 duplicity.backend.register_backend("s3", BotoBackend)
 duplicity.backend.register_backend("s3+http", BotoBackend)
 

=== modified file 'duplicity/backends/cloudfilesbackend.py'
--- duplicity/backends/cloudfilesbackend.py	2011-02-12 15:11:34 +0000
+++ duplicity/backends/cloudfilesbackend.py	2011-08-29 03:36:23 +0000
@@ -26,6 +26,7 @@
 from duplicity import log
 from duplicity.errors import * #@UnusedWildImport
 from duplicity.util import exception_traceback
+from duplicity.backend import retry
 
 class CloudFilesBackend(duplicity.backend.Backend):
     """
@@ -140,4 +141,22 @@
             self.container.delete_object(file)
             log.Debug("Deleted '%s/%s'" % (self.container, file))
 
+    @retry
+    def _query_file_info(self, filename, raise_errors=False):
+        from cloudfiles.errors import NoSuchObject
+        try:
+            sobject = self.container.get_object(filename)
+            return {'size': sobject.size}
+        except NoSuchObject:
+            return {'size': -1}
+        except Exception, e:
+            log.Warn("Error querying '%s/%s': %s"
+                     "" % (self.container,
+                           filename,
+                           str(e)))
+            if raise_errors:
+                raise e
+            else:
+                return {'size': None}
+
 duplicity.backend.register_backend("cf+http", CloudFilesBackend)

=== modified file 'duplicity/backends/giobackend.py'
--- duplicity/backends/giobackend.py	2011-06-12 22:25:39 +0000
+++ duplicity/backends/giobackend.py	2011-08-29 03:36:23 +0000
@@ -164,3 +164,20 @@
                 self.handle_error(raise_errors, e, 'delete',
                                   target_file.get_parse_name())
                 return
+
+    @retry
+    def _query_file_info(self, filename, raise_errors=False):
+        """Query attributes on filename"""
+        target_file = self.remote_file.get_child(filename)
+        attrs = gio.FILE_ATTRIBUTE_STANDARD_SIZE
+        try:
+            info = target_file.query_info(attrs, gio.FILE_QUERY_INFO_NONE)
+            return {'size': info.get_size()}
+        except Exception, e:
+            if isinstance(e, gio.Error):
+                if e.code == gio.ERROR_NOT_FOUND:
+                    return {'size': -1} # early exit, no need to retry
+            if raise_errors:
+                raise e
+            else:
+                return {'size': None}

=== modified file 'duplicity/backends/localbackend.py'
--- duplicity/backends/localbackend.py	2011-06-17 18:22:28 +0000
+++ duplicity/backends/localbackend.py	2011-08-29 03:36:23 +0000
@@ -57,7 +57,7 @@
                 code = log.ErrorCode.backend_no_space
         extra = ' '.join([util.escape(x) for x in [file1, file2] if x])
         extra = ' '.join([op, extra])
-        if op != 'delete':
+        if op != 'delete' and op != 'query':
             log.FatalError(str(e), code, extra)
         else:
             log.Warn(str(e), code, extra)
@@ -110,5 +110,17 @@
             except Exception, e:
                 self.handle_error(e, 'delete', self.remote_pathdir.append(filename).name)
 
+    def _query_file_info(self, filename):
+        """Query attributes on filename"""
+        try:
+            target_file = self.remote_pathdir.append(filename)
+            if not os.path.exists(target_file.name):
+                return {'size': -1}
+            target_file.setdata()
+            size = target_file.getsize()
+            return {'size': size}
+        except Exception, e:
+            self.handle_error(e, 'query', target_file.name)
+            return {'size': None}
 
 duplicity.backend.register_backend("file", LocalBackend)

=== modified file 'duplicity/backends/u1backend.py'
--- duplicity/backends/u1backend.py	2011-08-17 14:25:52 +0000
+++ duplicity/backends/u1backend.py	2011-08-29 03:36:23 +0000
@@ -98,17 +98,15 @@
         import urllib
         return urllib.quote(url, safe="/~")
 
-    def handle_error(self, raise_error, op, headers, file1=None, file2=None, ignore=None):
+    def parse_error(self, headers, ignore=None):
         from duplicity import log
-        from duplicity import util
-        import json
 
         status = int(headers[0].get('status'))
         if status >= 200 and status < 300:
-            return
+            return None
 
         if ignore and status in ignore:
-            return
+            return None
 
         if status == 400:
             code = log.ErrorCode.backend_permission_denied
@@ -118,6 +116,18 @@
             code = log.ErrorCode.backend_no_space
         else:
             code = log.ErrorCode.backend_error
+        return code
+
+    def handle_error(self, raise_error, op, headers, file1=None, file2=None, ignore=None):
+        from duplicity import log
+        from duplicity import util
+        import json
+
+        code = self.parse_error(headers, ignore)
+        if code is None:
+            return
+
+        status = int(headers[0].get('status'))
 
         if file1:
             file1 = file1.encode("utf8")
@@ -222,5 +232,27 @@
     	    answer = auth.request(remote_full, http_method="DELETE")
             self.handle_error(raise_errors, 'delete', answer, remote_full, ignore=[404])
 
+    @retry
+    def _query_file_info(self, filename, raise_errors=False):
+        """Query attributes on filename"""
+        import json
+        import ubuntuone.couch.auth as auth
+        from duplicity import log
+        remote_full = self.meta_base + self.quote(filename)
+        answer = auth.request(remote_full)
+
+        code = self.parse_error(answer)
+        if code is not None:
+            if code == log.ErrorCode.backend_not_found:
+                return {'size': -1}
+            elif raise_errors:
+                self.handle_error(raise_errors, 'query', answer, remote_full, filename)
+            else:
+                return {'size': None}
+
+        node = json.loads(answer[1])
+        size = node.get('size')
+        return {'size': size}
+
 duplicity.backend.register_backend("u1", U1Backend)
 duplicity.backend.register_backend("u1+http", U1Backend)

=== modified file 'duplicity/commandline.py'
--- duplicity/commandline.py	2011-08-18 18:09:18 +0000
+++ duplicity/commandline.py	2011-08-29 03:36:23 +0000
@@ -292,6 +292,10 @@
     parser.add_option("--fail-on-volume", type="int",
                       help=optparse.SUPPRESS_HELP)
 
+    # used in testing only - skips upload for a given volume
+    parser.add_option("--skip-volume", type="int",
+                      help=optparse.SUPPRESS_HELP)
+
     # If set, restore only the subdirectory or file specified, not the
     # whole root.
     # TRANSL: Used in usage help to represent a Unix-style path name. Example:

=== modified file 'duplicity/globals.py'
--- duplicity/globals.py	2011-08-18 18:09:18 +0000
+++ duplicity/globals.py	2011-08-29 03:36:23 +0000
@@ -200,6 +200,9 @@
 # used in testing only - raises exception after volume
 fail_on_volume = 0
 
+# used in testing only - skips uploading a particular volume
+skip_volume = 0
+
 # ignore (some) errors during operations; supposed to make it more
 # likely that you are able to restore data under problematic
 # circumstances. the default should absolutely always be True unless

=== modified file 'duplicity/log.py'
--- duplicity/log.py	2011-05-31 18:07:07 +0000
+++ duplicity/log.py	2011-08-29 03:36:23 +0000
@@ -189,6 +189,7 @@
     gio_not_available = 40
     source_dir_mismatch = 42 # 41 is reserved for par2
     ftps_lftp_missing = 43
+    volume_wrong_size = 44
 
     # 50->69 reserved for backend errors
     backend_error = 50

=== modified file 'testing/alltests'
--- testing/alltests	2009-08-12 17:43:42 +0000
+++ testing/alltests	2011-08-29 03:36:23 +0000
@@ -24,3 +24,4 @@
 finaltest.py
 restarttest.py
 cleanuptest.py
+badupload.py

=== added file 'testing/badupload.py'
--- testing/badupload.py	1970-01-01 00:00:00 +0000
+++ testing/badupload.py	2011-08-29 03:36:23 +0000
@@ -0,0 +1,83 @@
+# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
+#
+# Copyright 2002 Ben Escoto <ben@xxxxxxxxxxx>
+# Copyright 2007 Kenneth Loafman <kenneth@xxxxxxxxxxx>
+# Copyright 2011 Canonical Ltd
+#
+# This file is part of duplicity.
+#
+# Duplicity is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# Duplicity is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with duplicity; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import config
+import os, unittest, sys
+sys.path.insert(0, "../")
+
+config.setup()
+
+# This can be changed to select the URL to use
+backend_url = 'file://testfiles/output'
+
+class CmdError(Exception):
+    """Indicates an error running an external command"""
+    return_val = -1
+    def __init__(self, return_val):
+        self.return_val = os.WEXITSTATUS(return_val)
+
+class BadUploadTest(unittest.TestCase):
+    """
+    Test missing volume upload using duplicity binary
+    """
+    def setUp(self):
+        assert not os.system("tar xzf testfiles.tar.gz > /dev/null 2>&1")
+
+    def tearDown(self):
+        assert not os.system("rm -rf testfiles tempdir temp2.tar")
+
+    def run_duplicity(self, arglist, options = []):
+        """
+        Run duplicity binary with given arguments and options
+        """
+        options.append("--archive-dir testfiles/cache")
+        cmd_list = ["../duplicity-bin"]
+        cmd_list.extend(options + ["--allow-source-mismatch"])
+        cmd_list.extend(arglist)
+        cmdline = " ".join(cmd_list)
+        if not os.environ.has_key('PASSPHRASE'):
+            os.environ['PASSPHRASE'] = 'foobar'
+        return_val = os.system(cmdline)
+        if return_val:
+            raise CmdError(return_val)
+
+    def backup(self, type, input_dir, options = []):
+        """Run duplicity backup to default directory"""
+        options = options[:]
+        if type == "full":
+            options.insert(0, 'full')
+        args = [input_dir, "'%s'" % backend_url]
+        self.run_duplicity(args, options)
+
+    def test_missing_file(self):
+        """
+        Test basic lost file
+        """
+        # we know we're going to fail this one, its forced
+        try:
+            self.backup("full", "testfiles/dir1", options = ["--skip-volume 1"])
+            assert False # shouldn't get this far
+        except CmdError, e:
+            assert e.return_val == 44, e.return_val
+
+if __name__ == "__main__":
+    unittest.main()


References