duplicity-team team mailing list archive
-
duplicity-team team
-
Mailing list archive
-
Message #04183
[Merge] lp:~dawgfoto/duplicity/replicate into lp:duplicity
Martin Nowak has proposed merging lp:~dawgfoto/duplicity/replicate into lp:duplicity.
Requested reviews:
duplicity-team (duplicity-team)
For more details, see:
https://code.launchpad.net/~dawgfoto/duplicity/replicate/+merge/322836
Initial request for feedback.
Add replicate command to replicate a backup (or backup sets older than a given time) to another backend, leveraging duplicity's backend and compression/encryption infrastructure.
--
Your team duplicity-team is requested to review the proposed merge of lp:~dawgfoto/duplicity/replicate into lp:duplicity.
=== modified file 'bin/duplicity'
--- bin/duplicity 2017-03-02 22:38:47 +0000
+++ bin/duplicity 2017-04-20 12:10:37 +0000
@@ -28,6 +28,7 @@
# any suggestions.
import duplicity.errors
+import copy
import gzip
import os
import platform
@@ -1006,6 +1007,108 @@
"\n" + chain_times_str(chainlist) + "\n" +
_("Rerun command with --force option to actually delete."))
+def replicate():
+ """
+ Replicate backup files from one remote to another, possibly encrypting or adding parity.
+
+ @rtype: void
+ @return: void
+ """
+ time = globals.restore_time or dup_time.curtime
+ src_stats = collections.CollectionsStatus(globals.src_backend, None).set_values(sig_chain_warning=None)
+ tgt_stats = collections.CollectionsStatus(globals.backend, None).set_values(sig_chain_warning=None)
+
+ src_list = globals.src_backend.list()
+ tgt_list = globals.backend.list()
+
+ src_chainlist = src_stats.get_signature_chains(local=False, filelist=src_list)[0]
+ tgt_chainlist = tgt_stats.get_signature_chains(local=False, filelist=tgt_list)[0]
+ sorted(src_chainlist, key=lambda chain: chain.start_time)
+ sorted(tgt_chainlist, key=lambda chain: chain.start_time)
+ if not src_chainlist:
+ log.Notice(_("No old backup sets found."))
+ return
+ for src_chain in src_chainlist:
+ try:
+ tgt_chain = filter(lambda chain: chain.start_time == src_chain.start_time, tgt_chainlist)[0]
+ except IndexError:
+ tgt_chain = None
+
+ tgt_sigs = map(file_naming.parse, tgt_chain.get_filenames()) if tgt_chain else []
+ for src_sig_filename in src_chain.get_filenames():
+ src_sig = file_naming.parse(src_sig_filename)
+ if not (src_sig.time or src_sig.end_time) < time:
+ continue
+ try:
+ tgt_sigs.remove(src_sig)
+ log.Info(_("Signature %s already replicated") % (src_sig_filename,))
+ continue
+ except ValueError:
+ pass
+ if src_sig.type == 'new-sig':
+ dup_time.setprevtime(src_sig.start_time)
+ dup_time.setcurtime(src_sig.time or src_sig.end_time)
+ log.Notice(_("Replicating %s.") % (src_sig_filename,))
+ fileobj = globals.src_backend.get_fileobj_read(src_sig_filename)
+ filename = file_naming.get(src_sig.type, encrypted=globals.encryption, gzipped=globals.compression)
+ tdp = dup_temp.new_tempduppath(file_naming.parse(filename))
+ tmpobj = tdp.filtered_open(mode='wb')
+ util.copyfileobj(fileobj, tmpobj) # decrypt, compress, (re)-encrypt
+ tmpobj.close()
+ globals.backend.put(tdp, filename)
+
+ src_chainlist = src_stats.get_backup_chains(filename_list = src_list)[0]
+ tgt_chainlist = tgt_stats.get_backup_chains(filename_list = tgt_list)[0]
+ sorted(src_chainlist, key=lambda chain: chain.start_time)
+ sorted(tgt_chainlist, key=lambda chain: chain.start_time)
+ for src_chain in src_chainlist:
+ try:
+ tgt_chain = filter(lambda chain: chain.start_time == src_chain.start_time, tgt_chainlist)[0]
+ except IndexError:
+ tgt_chain = None
+
+ tgt_sets = tgt_chain.get_all_sets() if tgt_chain else []
+ for src_set in src_chain.get_all_sets():
+ if not src_set.get_time() < time:
+ continue
+ try:
+ tgt_sets.remove(src_set)
+ log.Info(_("Backupset %s already replicated") % (src_set.remote_manifest_name,))
+ continue
+ except ValueError:
+ pass
+ if src_set.type == 'inc':
+ dup_time.setprevtime(src_set.start_time)
+ dup_time.setcurtime(src_set.get_time())
+ rmf = src_set.get_remote_manifest()
+ mf_filename = file_naming.get(src_set.type, manifest=True)
+ mf_tdp = dup_temp.new_tempduppath(file_naming.parse(mf_filename))
+ mf = manifest.Manifest(fh=mf_tdp.filtered_open(mode='wb'))
+ for i, filename in src_set.volume_name_dict.iteritems():
+ log.Notice(_("Replicating %s.") % (filename,))
+ fileobj = restore_get_enc_fileobj(globals.src_backend, filename, rmf.volume_info_dict[i])
+ filename = file_naming.get(src_set.type, i, encrypted=globals.encryption, gzipped=globals.compression)
+ tdp = dup_temp.new_tempduppath(file_naming.parse(filename))
+ tmpobj = tdp.filtered_open(mode='wb')
+ util.copyfileobj(fileobj, tmpobj) # decrypt, compress, (re)-encrypt
+ tmpobj.close()
+ globals.backend.put(tdp, filename)
+
+ vi = copy.copy(rmf.volume_info_dict[i])
+ vi.set_hash("SHA1", gpg.get_hash("SHA1", tdp))
+ mf.add_volume_info(vi)
+
+ mf.fh.close()
+ # incremental GPG writes hang on close, so do any encryption here at once
+ mf_final_filename = file_naming.get(src_set.type, manifest=True, encrypted=globals.encryption, gzipped=globals.compression)
+ mf_final_tdp = dup_temp.new_tempduppath(file_naming.parse(mf_final_filename))
+ mf_final_fileobj = mf_final_tdp.filtered_open(mode='wb')
+ util.copyfileobj(mf_tdp.filtered_open(mode='rb'), mf_final_fileobj) # compress, encrypt
+ mf_final_fileobj.close()
+ globals.backend.put(mf_final_tdp, mf_final_filename)
+
+ globals.src_backend.close()
+ globals.backend.close()
def sync_archive(decrypt):
"""
@@ -1408,8 +1511,9 @@
check_resources(action)
# check archive synch with remote, fix if needed
- decrypt = action not in ["collection-status"]
- sync_archive(decrypt)
+ if not action == "replicate":
+ decrypt = action not in ["collection-status"]
+ sync_archive(decrypt)
# get current collection status
col_stats = collections.CollectionsStatus(globals.backend,
@@ -1483,6 +1587,8 @@
remove_all_but_n_full(col_stats)
elif action == "sync":
sync_archive(True)
+ elif action == "replicate":
+ replicate()
else:
assert action == "inc" or action == "full", action
# the passphrase for full and inc is used by --sign-key
=== modified file 'bin/duplicity.1'
--- bin/duplicity.1 2017-02-13 16:57:00 +0000
+++ bin/duplicity.1 2017-04-20 12:10:37 +0000
@@ -48,6 +48,10 @@
.I [options] [--force] [--extra-clean]
target_url
+.B duplicity replicate
+.I [options] [--time time]
+source_url target_url
+
.SH DESCRIPTION
Duplicity incrementally backs up files and folders into
tar-format volumes encrypted with GnuPG and places them to a
@@ -243,6 +247,19 @@
.I --force
will be needed to delete the files instead of just listing them.
+.TP
+.BI "replicate " "[--time time] <source_url> <target_url>"
+Replicate backup sets from source to target backend. Files will be
+(re)-encrypted and (re)-compressed depending on normal backend
+options. Signatures and volumes will not get recomputed, thus options like
+.BI --volsize
+or
+.BI --max-blocksize
+have no effect.
+When
+.I --time time
+is given, only backup sets older than time will be replicated.
+
.SH OPTIONS
.TP
=== modified file 'duplicity/collections.py'
--- duplicity/collections.py 2017-02-27 13:18:57 +0000
+++ duplicity/collections.py 2017-04-20 12:10:37 +0000
@@ -294,6 +294,15 @@
"""
return len(self.volume_name_dict.keys())
+ def __eq__(self, other):
+ """
+ Return whether this backup set is equal to other
+ """
+ return self.type == other.type and \
+ self.time == other.time and \
+ self.start_time == other.start_time and \
+ self.end_time == other.end_time and \
+ len(self) == len(other)
class BackupChain:
"""
@@ -642,7 +651,7 @@
u"-----------------",
_("Connecting with backend: %s") %
(self.backend.__class__.__name__,),
- _("Archive dir: %s") % (util.ufn(self.archive_dir_path.name),)]
+ _("Archive dir: %s") % (util.ufn(self.archive_dir_path.name if self.archive_dir_path else 'None'),)]
l.append("\n" +
ngettext("Found %d secondary backup chain.",
@@ -697,7 +706,7 @@
len(backend_filename_list))
# get local filename list
- local_filename_list = self.archive_dir_path.listdir()
+ local_filename_list = self.archive_dir_path.listdir() if self.archive_dir_path else []
log.Debug(ngettext("%d file exists in cache",
"%d files exist in cache",
len(local_filename_list)) %
@@ -894,7 +903,7 @@
if filelist is not None:
return filelist
elif local:
- return self.archive_dir_path.listdir()
+ return self.archive_dir_path.listdir() if self.archive_dir_path else []
else:
return self.backend.list()
=== modified file 'duplicity/commandline.py'
--- duplicity/commandline.py 2017-02-27 13:18:57 +0000
+++ duplicity/commandline.py 2017-04-20 12:10:37 +0000
@@ -54,6 +54,7 @@
collection_status = None # Will be set to true if collection-status command given
cleanup = None # Set to true if cleanup command given
verify = None # Set to true if verify command given
+replicate = None # Set to true if replicate command given
commands = ["cleanup",
"collection-status",
@@ -65,6 +66,7 @@
"remove-all-inc-of-but-n-full",
"restore",
"verify",
+ "replicate"
]
@@ -236,7 +238,7 @@
def parse_cmdline_options(arglist):
"""Parse argument list"""
global select_opts, select_files, full_backup
- global list_current, collection_status, cleanup, remove_time, verify
+ global list_current, collection_status, cleanup, remove_time, verify, replicate
def set_log_fd(fd):
if fd < 1:
@@ -706,6 +708,9 @@
num_expect = 1
elif cmd == "verify":
verify = True
+ elif cmd == "replicate":
+ replicate = True
+ num_expect = 2
if len(args) != num_expect:
command_line_error("Expected %d args, got %d" % (num_expect, len(args)))
@@ -724,7 +729,12 @@
elif len(args) == 1:
backend_url = args[0]
elif len(args) == 2:
- lpath, backend_url = args_to_path_backend(args[0], args[1]) # @UnusedVariable
+ if replicate:
+ if not backend.is_backend_url(args[0]) or not backend.is_backend_url(args[1]):
+ command_line_error("Two URLs expected for replicate.")
+ src_backend_url, backend_url= args[0], args[1]
+ else:
+ lpath, backend_url = args_to_path_backend(args[0], args[1]) # @UnusedVariable
else:
command_line_error("Too many arguments")
@@ -899,6 +909,7 @@
duplicity remove-older-than %(time)s [%(options)s] %(target_url)s
duplicity remove-all-but-n-full %(count)s [%(options)s] %(target_url)s
duplicity remove-all-inc-of-but-n-full %(count)s [%(options)s] %(target_url)s
+ duplicity replicate %(source_url)s %(target_url)s
""" % dict
@@ -944,7 +955,8 @@
remove-older-than <%(time)s> <%(target_url)s>
remove-all-but-n-full <%(count)s> <%(target_url)s>
remove-all-inc-of-but-n-full <%(count)s> <%(target_url)s>
- verify <%(target_url)s> <%(source_dir)s>""" % dict
+ verify <%(target_url)s> <%(source_dir)s>
+ replicate <%(source_url)s> <%(target_url)s>""" % dict
return msg
@@ -1047,7 +1059,7 @@
def check_consistency(action):
"""Final consistency check, see if something wrong with command line"""
- global full_backup, select_opts, list_current
+ global full_backup, select_opts, list_current, collection_status, cleanup, replicate
def assert_only_one(arglist):
"""Raises error if two or more of the elements of arglist are true"""
@@ -1058,8 +1070,8 @@
assert n <= 1, "Invalid syntax, two conflicting modes specified"
if action in ["list-current", "collection-status",
- "cleanup", "remove-old", "remove-all-but-n-full", "remove-all-inc-of-but-n-full"]:
- assert_only_one([list_current, collection_status, cleanup,
+ "cleanup", "remove-old", "remove-all-but-n-full", "remove-all-inc-of-but-n-full", "replicate"]:
+ assert_only_one([list_current, collection_status, cleanup, replicate,
globals.remove_time is not None])
elif action == "restore" or action == "verify":
if full_backup:
@@ -1137,22 +1149,27 @@
"file:///usr/local". See the man page for more information.""") % (args[0],),
log.ErrorCode.bad_url)
elif len(args) == 2:
- # Figure out whether backup or restore
- backup, local_pathname = set_backend(args[0], args[1])
- if backup:
- if full_backup:
- action = "full"
- else:
- action = "inc"
+ if replicate:
+ globals.src_backend = backend.get_backend(args[0])
+ globals.backend = backend.get_backend(args[1])
+ action = "replicate"
else:
- if verify:
- action = "verify"
+ # Figure out whether backup or restore
+ backup, local_pathname = set_backend(args[0], args[1])
+ if backup:
+ if full_backup:
+ action = "full"
+ else:
+ action = "inc"
else:
- action = "restore"
+ if verify:
+ action = "verify"
+ else:
+ action = "restore"
- process_local_dir(action, local_pathname)
- if action in ['full', 'inc', 'verify']:
- set_selection()
+ process_local_dir(action, local_pathname)
+ if action in ['full', 'inc', 'verify']:
+ set_selection()
elif len(args) > 2:
raise AssertionError("this code should not be reachable")
=== modified file 'duplicity/file_naming.py'
--- duplicity/file_naming.py 2016-06-28 21:03:46 +0000
+++ duplicity/file_naming.py 2017-04-20 12:10:37 +0000
@@ -436,3 +436,12 @@
self.encrypted = encrypted # true if gpg encrypted
self.partial = partial
+
+ def __eq__(self, other):
+ return self.type == other.type and \
+ self.manifest == other.manifest and \
+ self.time == other.time and \
+ self.start_time == other.start_time and \
+ self.end_time == other.end_time and \
+ self.partial == other.partial
+
Follow ups