duplicity-team team mailing list archive
-
duplicity-team team
-
Mailing list archive
-
Message #00746
[Merge] lp:~duplicity-team/duplicity/check-volumes into lp:duplicity
Michael Terry has proposed merging lp:~duplicity-team/duplicity/check-volumes into lp:duplicity.
Requested reviews:
Michael Terry (mterry)
For more details, see:
https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826
This is the first pass of checking each volume's size as it is uploaded, as discussed in https://code.launchpad.net/~mterry/duplicity/early-catch-498933/+merge/72607
--
https://code.launchpad.net/~duplicity-team/duplicity/check-volumes/+merge/72826
Your team duplicity-team is subscribed to branch lp:~duplicity-team/duplicity/check-volumes.
=== modified file 'duplicity-bin'
--- duplicity-bin 2011-08-23 13:37:06 +0000
+++ duplicity-bin 2011-08-29 03:36:23 +0000
@@ -269,14 +269,28 @@
end_block -= 1
return start_index, start_block, end_index, end_block
- def put(tdp, dest_filename):
+ def validate_block(tdp, dest_filename):
+ info = backend.query_info([dest_filename])[dest_filename]
+ if 'size' not in info:
+ return # backend didn't know how to query size
+ size = info['size']
+ if size is None:
+ return # error querying file
+ if size != tdp.getsize():
+ code_extra = "%s %d %d" % (util.escape(dest_filename), tdp.getsize(), size)
+ log.FatalError(_("File %s was corrupted during upload.") % dest_filename,
+ log.ErrorCode.volume_wrong_size, code_extra)
+
+ def put(tdp, dest_filename, vol_num):
"""
Retrieve file size *before* calling backend.put(), which may (at least
in case of the localbackend) rename the temporary file to the target
instead of copying.
"""
putsize = tdp.getsize()
- backend.put(tdp, dest_filename)
+ if globals.skip_volume != vol_num: # for testing purposes only
+ backend.put(tdp, dest_filename)
+ validate_block(tdp, dest_filename)
if tdp.stat:
tdp.delete()
return putsize
@@ -350,8 +364,8 @@
sig_outfp.flush()
man_outfp.flush()
- async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename: put(tdp, dest_filename),
- (tdp, dest_filename)))
+ async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename, vol_num: put(tdp, dest_filename, vol_num),
+ (tdp, dest_filename, vol_num)))
# Log human-readable version as well as raw numbers for machine consumers
log.Progress('Processed volume %d' % vol_num, diffdir.stats.SourceFileSize)
=== modified file 'duplicity/backend.py'
--- duplicity/backend.py 2011-08-06 15:57:54 +0000
+++ duplicity/backend.py 2011-08-29 03:36:23 +0000
@@ -361,6 +361,35 @@
"""
raise NotImplementedError()
+ # Should never cause FatalError.
+ # Returns a dictionary of dictionaries. The outer dictionary maps
+ # filenames to metadata dictionaries. Supported metadata are:
+ #
+ # 'size': if >= 0, size of file
+ # if -1, file is not found
+ # if None, error querying file
+ #
+ # Returned dictionary is guaranteed to contain a metadata dictionary for
+ # each filename, but not all metadata are guaranteed to be present.
+ def query_info(self, filename_list, raise_errors=True):
+ """
+ Return metadata about each filename in filename_list
+ """
+ info = {}
+ if hasattr(self, '_query_list_info'):
+ info = self._query_list_info(filename_list)
+ elif hasattr(self, '_query_file_info'):
+ for filename in filename_list:
+ info[filename] = self._query_file_info(filename)
+
+ # Fill out any missing entries (may happen if backend has no support
+ # or its query_list support is lazy)
+ for filename in filename_list:
+ if filename not in info:
+ info[filename] = {}
+
+ return info
+
""" use getpass by default, inherited backends may overwrite this behaviour """
use_getpass = True
=== modified file 'duplicity/backends/botobackend.py'
--- duplicity/backends/botobackend.py 2011-04-04 13:01:12 +0000
+++ duplicity/backends/botobackend.py 2011-08-29 03:36:23 +0000
@@ -26,6 +26,7 @@
from duplicity import log
from duplicity.errors import * #@UnusedWildImport
from duplicity.util import exception_traceback
+from duplicity.backend import retry
class BotoBackend(duplicity.backend.Backend):
"""
@@ -294,6 +295,24 @@
self.bucket.delete_key(self.key_prefix + filename)
log.Debug("Deleted %s/%s" % (self.straight_url, filename))
+ @retry
+ def _query_file_info(self, filename, raise_errors=False):
+ try:
+ key = self.bucket.lookup(self.key_prefix + filename)
+ if key is None:
+ return {'size': -1}
+ return {'size': key.size}
+ except Exception, e:
+ log.Warn("Query %s/%s failed: %s"
+ "" % (self.straight_url,
+ filename,
+ str(e)))
+ self.resetConnection()
+ if raise_errors:
+ raise e
+ else:
+ return {'size': None}
+
duplicity.backend.register_backend("s3", BotoBackend)
duplicity.backend.register_backend("s3+http", BotoBackend)
=== modified file 'duplicity/backends/cloudfilesbackend.py'
--- duplicity/backends/cloudfilesbackend.py 2011-02-12 15:11:34 +0000
+++ duplicity/backends/cloudfilesbackend.py 2011-08-29 03:36:23 +0000
@@ -26,6 +26,7 @@
from duplicity import log
from duplicity.errors import * #@UnusedWildImport
from duplicity.util import exception_traceback
+from duplicity.backend import retry
class CloudFilesBackend(duplicity.backend.Backend):
"""
@@ -140,4 +141,22 @@
self.container.delete_object(file)
log.Debug("Deleted '%s/%s'" % (self.container, file))
+ @retry
+ def _query_file_info(self, filename, raise_errors=False):
+ from cloudfiles.errors import NoSuchObject
+ try:
+ sobject = self.container.get_object(filename)
+ return {'size': sobject.size}
+ except NoSuchObject:
+ return {'size': -1}
+ except Exception, e:
+ log.Warn("Error querying '%s/%s': %s"
+ "" % (self.container,
+ filename,
+ str(e)))
+ if raise_errors:
+ raise e
+ else:
+ return {'size': None}
+
duplicity.backend.register_backend("cf+http", CloudFilesBackend)
=== modified file 'duplicity/backends/giobackend.py'
--- duplicity/backends/giobackend.py 2011-06-12 22:25:39 +0000
+++ duplicity/backends/giobackend.py 2011-08-29 03:36:23 +0000
@@ -164,3 +164,20 @@
self.handle_error(raise_errors, e, 'delete',
target_file.get_parse_name())
return
+
+ @retry
+ def _query_file_info(self, filename, raise_errors=False):
+ """Query attributes on filename"""
+ target_file = self.remote_file.get_child(filename)
+ attrs = gio.FILE_ATTRIBUTE_STANDARD_SIZE
+ try:
+ info = target_file.query_info(attrs, gio.FILE_QUERY_INFO_NONE)
+ return {'size': info.get_size()}
+ except Exception, e:
+ if isinstance(e, gio.Error):
+ if e.code == gio.ERROR_NOT_FOUND:
+ return {'size': -1} # early exit, no need to retry
+ if raise_errors:
+ raise e
+ else:
+ return {'size': None}
=== modified file 'duplicity/backends/localbackend.py'
--- duplicity/backends/localbackend.py 2011-06-17 18:22:28 +0000
+++ duplicity/backends/localbackend.py 2011-08-29 03:36:23 +0000
@@ -57,7 +57,7 @@
code = log.ErrorCode.backend_no_space
extra = ' '.join([util.escape(x) for x in [file1, file2] if x])
extra = ' '.join([op, extra])
- if op != 'delete':
+ if op != 'delete' and op != 'query':
log.FatalError(str(e), code, extra)
else:
log.Warn(str(e), code, extra)
@@ -110,5 +110,17 @@
except Exception, e:
self.handle_error(e, 'delete', self.remote_pathdir.append(filename).name)
+ def _query_file_info(self, filename):
+ """Query attributes on filename"""
+ try:
+ target_file = self.remote_pathdir.append(filename)
+ if not os.path.exists(target_file.name):
+ return {'size': -1}
+ target_file.setdata()
+ size = target_file.getsize()
+ return {'size': size}
+ except Exception, e:
+ self.handle_error(e, 'query', target_file.name)
+ return {'size': None}
duplicity.backend.register_backend("file", LocalBackend)
=== modified file 'duplicity/backends/u1backend.py'
--- duplicity/backends/u1backend.py 2011-08-17 14:25:52 +0000
+++ duplicity/backends/u1backend.py 2011-08-29 03:36:23 +0000
@@ -98,17 +98,15 @@
import urllib
return urllib.quote(url, safe="/~")
- def handle_error(self, raise_error, op, headers, file1=None, file2=None, ignore=None):
+ def parse_error(self, headers, ignore=None):
from duplicity import log
- from duplicity import util
- import json
status = int(headers[0].get('status'))
if status >= 200 and status < 300:
- return
+ return None
if ignore and status in ignore:
- return
+ return None
if status == 400:
code = log.ErrorCode.backend_permission_denied
@@ -118,6 +116,18 @@
code = log.ErrorCode.backend_no_space
else:
code = log.ErrorCode.backend_error
+ return code
+
+ def handle_error(self, raise_error, op, headers, file1=None, file2=None, ignore=None):
+ from duplicity import log
+ from duplicity import util
+ import json
+
+ code = self.parse_error(headers, ignore)
+ if code is None:
+ return
+
+ status = int(headers[0].get('status'))
if file1:
file1 = file1.encode("utf8")
@@ -222,5 +232,27 @@
answer = auth.request(remote_full, http_method="DELETE")
self.handle_error(raise_errors, 'delete', answer, remote_full, ignore=[404])
+ @retry
+ def _query_file_info(self, filename, raise_errors=False):
+ """Query attributes on filename"""
+ import json
+ import ubuntuone.couch.auth as auth
+ from duplicity import log
+ remote_full = self.meta_base + self.quote(filename)
+ answer = auth.request(remote_full)
+
+ code = self.parse_error(answer)
+ if code is not None:
+ if code == log.ErrorCode.backend_not_found:
+ return {'size': -1}
+ elif raise_errors:
+ self.handle_error(raise_errors, 'query', answer, remote_full, filename)
+ else:
+ return {'size': None}
+
+ node = json.loads(answer[1])
+ size = node.get('size')
+ return {'size': size}
+
duplicity.backend.register_backend("u1", U1Backend)
duplicity.backend.register_backend("u1+http", U1Backend)
=== modified file 'duplicity/commandline.py'
--- duplicity/commandline.py 2011-08-18 18:09:18 +0000
+++ duplicity/commandline.py 2011-08-29 03:36:23 +0000
@@ -292,6 +292,10 @@
parser.add_option("--fail-on-volume", type="int",
help=optparse.SUPPRESS_HELP)
+ # used in testing only - skips upload for a given volume
+ parser.add_option("--skip-volume", type="int",
+ help=optparse.SUPPRESS_HELP)
+
# If set, restore only the subdirectory or file specified, not the
# whole root.
# TRANSL: Used in usage help to represent a Unix-style path name. Example:
=== modified file 'duplicity/globals.py'
--- duplicity/globals.py 2011-08-18 18:09:18 +0000
+++ duplicity/globals.py 2011-08-29 03:36:23 +0000
@@ -200,6 +200,9 @@
# used in testing only - raises exception after volume
fail_on_volume = 0
+# used in testing only - skips uploading a particular volume
+skip_volume = 0
+
# ignore (some) errors during operations; supposed to make it more
# likely that you are able to restore data under problematic
# circumstances. the default should absolutely always be True unless
=== modified file 'duplicity/log.py'
--- duplicity/log.py 2011-05-31 18:07:07 +0000
+++ duplicity/log.py 2011-08-29 03:36:23 +0000
@@ -189,6 +189,7 @@
gio_not_available = 40
source_dir_mismatch = 42 # 41 is reserved for par2
ftps_lftp_missing = 43
+ volume_wrong_size = 44
# 50->69 reserved for backend errors
backend_error = 50
=== modified file 'testing/alltests'
--- testing/alltests 2009-08-12 17:43:42 +0000
+++ testing/alltests 2011-08-29 03:36:23 +0000
@@ -24,3 +24,4 @@
finaltest.py
restarttest.py
cleanuptest.py
+badupload.py
=== added file 'testing/badupload.py'
--- testing/badupload.py 1970-01-01 00:00:00 +0000
+++ testing/badupload.py 2011-08-29 03:36:23 +0000
@@ -0,0 +1,83 @@
+# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
+#
+# Copyright 2002 Ben Escoto <ben@xxxxxxxxxxx>
+# Copyright 2007 Kenneth Loafman <kenneth@xxxxxxxxxxx>
+# Copyright 2011 Canonical Ltd
+#
+# This file is part of duplicity.
+#
+# Duplicity is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# Duplicity is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with duplicity; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import config
+import os, unittest, sys
+sys.path.insert(0, "../")
+
+config.setup()
+
+# This can be changed to select the URL to use
+backend_url = 'file://testfiles/output'
+
+class CmdError(Exception):
+ """Indicates an error running an external command"""
+ return_val = -1
+ def __init__(self, return_val):
+ self.return_val = os.WEXITSTATUS(return_val)
+
+class BadUploadTest(unittest.TestCase):
+ """
+ Test missing volume upload using duplicity binary
+ """
+ def setUp(self):
+ assert not os.system("tar xzf testfiles.tar.gz > /dev/null 2>&1")
+
+ def tearDown(self):
+ assert not os.system("rm -rf testfiles tempdir temp2.tar")
+
+ def run_duplicity(self, arglist, options = []):
+ """
+ Run duplicity binary with given arguments and options
+ """
+ options.append("--archive-dir testfiles/cache")
+ cmd_list = ["../duplicity-bin"]
+ cmd_list.extend(options + ["--allow-source-mismatch"])
+ cmd_list.extend(arglist)
+ cmdline = " ".join(cmd_list)
+ if not os.environ.has_key('PASSPHRASE'):
+ os.environ['PASSPHRASE'] = 'foobar'
+ return_val = os.system(cmdline)
+ if return_val:
+ raise CmdError(return_val)
+
+ def backup(self, type, input_dir, options = []):
+ """Run duplicity backup to default directory"""
+ options = options[:]
+ if type == "full":
+ options.insert(0, 'full')
+ args = [input_dir, "'%s'" % backend_url]
+ self.run_duplicity(args, options)
+
+ def test_missing_file(self):
+ """
+ Test basic lost file
+ """
+ # we know we're going to fail this one, its forced
+ try:
+ self.backup("full", "testfiles/dir1", options = ["--skip-volume 1"])
+ assert False # shouldn't get this far
+ except CmdError, e:
+ assert e.return_val == 44, e.return_val
+
+if __name__ == "__main__":
+ unittest.main()
References