← Back to team overview

duplicity-team team mailing list archive

[Merge] lp:~dawgfoto/duplicity/replicate into lp:duplicity

 

Martin Nowak has proposed merging lp:~dawgfoto/duplicity/replicate into lp:duplicity.

Requested reviews:
  duplicity-team (duplicity-team)

For more details, see:
https://code.launchpad.net/~dawgfoto/duplicity/replicate/+merge/322836

Initial request for feedback.

Add replicate command to replicate a backup (or backup sets older than a given time) to another backend, leveraging duplicity's backend and compression/encryption infrastructure.
-- 
Your team duplicity-team is requested to review the proposed merge of lp:~dawgfoto/duplicity/replicate into lp:duplicity.
=== modified file 'bin/duplicity'
--- bin/duplicity	2017-03-02 22:38:47 +0000
+++ bin/duplicity	2017-04-20 12:10:37 +0000
@@ -28,6 +28,7 @@
 # any suggestions.
 
 import duplicity.errors
+import copy
 import gzip
 import os
 import platform
@@ -1006,6 +1007,108 @@
                    "\n" + chain_times_str(chainlist) + "\n" +
                    _("Rerun command with --force option to actually delete."))
 
+def replicate():
+    """
+    Replicate backup files from one remote to another, possibly encrypting or adding parity.
+
+    @rtype: void
+    @return: void
+    """
+    time = globals.restore_time or dup_time.curtime
+    src_stats = collections.CollectionsStatus(globals.src_backend, None).set_values(sig_chain_warning=None)
+    tgt_stats = collections.CollectionsStatus(globals.backend, None).set_values(sig_chain_warning=None)
+
+    src_list = globals.src_backend.list()
+    tgt_list = globals.backend.list()
+
+    src_chainlist = src_stats.get_signature_chains(local=False, filelist=src_list)[0]
+    tgt_chainlist = tgt_stats.get_signature_chains(local=False, filelist=tgt_list)[0]
+    sorted(src_chainlist, key=lambda chain: chain.start_time)
+    sorted(tgt_chainlist, key=lambda chain: chain.start_time)
+    if not src_chainlist:
+        log.Notice(_("No old backup sets found."))
+        return
+    for src_chain in src_chainlist:
+        try:
+            tgt_chain = filter(lambda chain: chain.start_time == src_chain.start_time, tgt_chainlist)[0]
+        except IndexError:
+            tgt_chain = None
+
+        tgt_sigs = map(file_naming.parse, tgt_chain.get_filenames()) if tgt_chain else []
+        for src_sig_filename in src_chain.get_filenames():
+            src_sig = file_naming.parse(src_sig_filename)
+            if not (src_sig.time or src_sig.end_time) < time:
+                continue
+            try:
+                tgt_sigs.remove(src_sig)
+                log.Info(_("Signature %s already replicated") % (src_sig_filename,))
+                continue
+            except ValueError:
+                pass
+            if src_sig.type == 'new-sig':
+                dup_time.setprevtime(src_sig.start_time)
+            dup_time.setcurtime(src_sig.time or src_sig.end_time)
+            log.Notice(_("Replicating %s.") % (src_sig_filename,))
+            fileobj = globals.src_backend.get_fileobj_read(src_sig_filename)
+            filename = file_naming.get(src_sig.type, encrypted=globals.encryption, gzipped=globals.compression)
+            tdp = dup_temp.new_tempduppath(file_naming.parse(filename))
+            tmpobj = tdp.filtered_open(mode='wb')
+            util.copyfileobj(fileobj, tmpobj) # decrypt, compress, (re)-encrypt
+            tmpobj.close()
+            globals.backend.put(tdp, filename)
+
+    src_chainlist = src_stats.get_backup_chains(filename_list = src_list)[0]
+    tgt_chainlist = tgt_stats.get_backup_chains(filename_list = tgt_list)[0]
+    sorted(src_chainlist, key=lambda chain: chain.start_time)
+    sorted(tgt_chainlist, key=lambda chain: chain.start_time)
+    for src_chain in src_chainlist:
+        try:
+            tgt_chain = filter(lambda chain: chain.start_time == src_chain.start_time, tgt_chainlist)[0]
+        except IndexError:
+            tgt_chain = None
+
+        tgt_sets = tgt_chain.get_all_sets() if tgt_chain else []
+        for src_set in src_chain.get_all_sets():
+            if not src_set.get_time() < time:
+                continue
+            try:
+                tgt_sets.remove(src_set)
+                log.Info(_("Backupset %s already replicated") % (src_set.remote_manifest_name,))
+                continue
+            except ValueError:
+                pass
+            if src_set.type == 'inc':
+                dup_time.setprevtime(src_set.start_time)
+            dup_time.setcurtime(src_set.get_time())
+            rmf = src_set.get_remote_manifest()
+            mf_filename = file_naming.get(src_set.type, manifest=True)
+            mf_tdp = dup_temp.new_tempduppath(file_naming.parse(mf_filename))
+            mf = manifest.Manifest(fh=mf_tdp.filtered_open(mode='wb'))
+            for i, filename in src_set.volume_name_dict.iteritems():
+                log.Notice(_("Replicating %s.") % (filename,))
+                fileobj = restore_get_enc_fileobj(globals.src_backend, filename, rmf.volume_info_dict[i])
+                filename = file_naming.get(src_set.type, i, encrypted=globals.encryption, gzipped=globals.compression)
+                tdp = dup_temp.new_tempduppath(file_naming.parse(filename))
+                tmpobj = tdp.filtered_open(mode='wb')
+                util.copyfileobj(fileobj, tmpobj) # decrypt, compress, (re)-encrypt
+                tmpobj.close()
+                globals.backend.put(tdp, filename)
+
+                vi = copy.copy(rmf.volume_info_dict[i])
+                vi.set_hash("SHA1", gpg.get_hash("SHA1", tdp))
+                mf.add_volume_info(vi)
+
+            mf.fh.close()
+            # incremental GPG writes hang on close, so do any encryption here at once
+            mf_final_filename = file_naming.get(src_set.type, manifest=True, encrypted=globals.encryption, gzipped=globals.compression)
+            mf_final_tdp = dup_temp.new_tempduppath(file_naming.parse(mf_final_filename))
+            mf_final_fileobj = mf_final_tdp.filtered_open(mode='wb')
+            util.copyfileobj(mf_tdp.filtered_open(mode='rb'), mf_final_fileobj) # compress, encrypt
+            mf_final_fileobj.close()
+            globals.backend.put(mf_final_tdp, mf_final_filename)
+
+    globals.src_backend.close()
+    globals.backend.close()
 
 def sync_archive(decrypt):
     """
@@ -1408,8 +1511,9 @@
     check_resources(action)
 
     # check archive synch with remote, fix if needed
-    decrypt = action not in ["collection-status"]
-    sync_archive(decrypt)
+    if not action == "replicate":
+        decrypt = action not in ["collection-status"]
+        sync_archive(decrypt)
 
     # get current collection status
     col_stats = collections.CollectionsStatus(globals.backend,
@@ -1483,6 +1587,8 @@
         remove_all_but_n_full(col_stats)
     elif action == "sync":
         sync_archive(True)
+    elif action == "replicate":
+        replicate()
     else:
         assert action == "inc" or action == "full", action
         # the passphrase for full and inc is used by --sign-key

=== modified file 'bin/duplicity.1'
--- bin/duplicity.1	2017-02-13 16:57:00 +0000
+++ bin/duplicity.1	2017-04-20 12:10:37 +0000
@@ -48,6 +48,10 @@
 .I [options] [--force] [--extra-clean]
 target_url
 
+.B duplicity replicate
+.I [options] [--time time]
+source_url target_url
+
 .SH DESCRIPTION
 Duplicity incrementally backs up files and folders into
 tar-format volumes encrypted with GnuPG and places them to a
@@ -243,6 +247,19 @@
 .I --force
 will be needed to delete the files instead of just listing them.
 
+.TP
+.BI "replicate " "[--time time] <source_url> <target_url>"
+Replicate backup sets from source to target backend. Files will be
+(re)-encrypted and (re)-compressed depending on normal backend
+options. Signatures and volumes will not get recomputed, thus options like
+.BI --volsize
+or
+.BI --max-blocksize
+have no effect.
+When
+.I --time time
+is given, only backup sets older than time will be replicated.
+
 .SH OPTIONS
 
 .TP

=== modified file 'duplicity/collections.py'
--- duplicity/collections.py	2017-02-27 13:18:57 +0000
+++ duplicity/collections.py	2017-04-20 12:10:37 +0000
@@ -294,6 +294,15 @@
         """
         return len(self.volume_name_dict.keys())
 
+    def __eq__(self, other):
+        """
+        Return whether this backup set is equal to other
+        """
+        return self.type == other.type and \
+            self.time == other.time and \
+            self.start_time == other.start_time and \
+            self.end_time == other.end_time and \
+            len(self) == len(other)
 
 class BackupChain:
     """
@@ -642,7 +651,7 @@
              u"-----------------",
              _("Connecting with backend: %s") %
              (self.backend.__class__.__name__,),
-             _("Archive dir: %s") % (util.ufn(self.archive_dir_path.name),)]
+             _("Archive dir: %s") % (util.ufn(self.archive_dir_path.name if self.archive_dir_path else 'None'),)]
 
         l.append("\n" +
                  ngettext("Found %d secondary backup chain.",
@@ -697,7 +706,7 @@
                   len(backend_filename_list))
 
         # get local filename list
-        local_filename_list = self.archive_dir_path.listdir()
+        local_filename_list = self.archive_dir_path.listdir() if self.archive_dir_path else []
         log.Debug(ngettext("%d file exists in cache",
                            "%d files exist in cache",
                            len(local_filename_list)) %
@@ -894,7 +903,7 @@
             if filelist is not None:
                 return filelist
             elif local:
-                return self.archive_dir_path.listdir()
+                return self.archive_dir_path.listdir() if self.archive_dir_path else []
             else:
                 return self.backend.list()
 

=== modified file 'duplicity/commandline.py'
--- duplicity/commandline.py	2017-02-27 13:18:57 +0000
+++ duplicity/commandline.py	2017-04-20 12:10:37 +0000
@@ -54,6 +54,7 @@
 collection_status = None  # Will be set to true if collection-status command given
 cleanup = None  # Set to true if cleanup command given
 verify = None  # Set to true if verify command given
+replicate = None  # Set to true if replicate command given
 
 commands = ["cleanup",
             "collection-status",
@@ -65,6 +66,7 @@
             "remove-all-inc-of-but-n-full",
             "restore",
             "verify",
+            "replicate"
             ]
 
 
@@ -236,7 +238,7 @@
 def parse_cmdline_options(arglist):
     """Parse argument list"""
     global select_opts, select_files, full_backup
-    global list_current, collection_status, cleanup, remove_time, verify
+    global list_current, collection_status, cleanup, remove_time, verify, replicate
 
     def set_log_fd(fd):
         if fd < 1:
@@ -706,6 +708,9 @@
         num_expect = 1
     elif cmd == "verify":
         verify = True
+    elif cmd == "replicate":
+        replicate = True
+        num_expect = 2
 
     if len(args) != num_expect:
         command_line_error("Expected %d args, got %d" % (num_expect, len(args)))
@@ -724,7 +729,12 @@
     elif len(args) == 1:
         backend_url = args[0]
     elif len(args) == 2:
-        lpath, backend_url = args_to_path_backend(args[0], args[1])  # @UnusedVariable
+        if replicate:
+            if not backend.is_backend_url(args[0]) or not backend.is_backend_url(args[1]):
+                command_line_error("Two URLs expected for replicate.")
+            src_backend_url, backend_url= args[0], args[1]
+        else:
+            lpath, backend_url = args_to_path_backend(args[0], args[1])  # @UnusedVariable
     else:
         command_line_error("Too many arguments")
 
@@ -899,6 +909,7 @@
   duplicity remove-older-than %(time)s [%(options)s] %(target_url)s
   duplicity remove-all-but-n-full %(count)s [%(options)s] %(target_url)s
   duplicity remove-all-inc-of-but-n-full %(count)s [%(options)s] %(target_url)s
+  duplicity replicate %(source_url)s %(target_url)s
 
 """ % dict
 
@@ -944,7 +955,8 @@
   remove-older-than <%(time)s> <%(target_url)s>
   remove-all-but-n-full <%(count)s> <%(target_url)s>
   remove-all-inc-of-but-n-full <%(count)s> <%(target_url)s>
-  verify <%(target_url)s> <%(source_dir)s>""" % dict
+  verify <%(target_url)s> <%(source_dir)s>
+  replicate <%(source_url)s> <%(target_url)s>""" % dict
 
     return msg
 
@@ -1047,7 +1059,7 @@
 
 def check_consistency(action):
     """Final consistency check, see if something wrong with command line"""
-    global full_backup, select_opts, list_current
+    global full_backup, select_opts, list_current, collection_status, cleanup, replicate
 
     def assert_only_one(arglist):
         """Raises error if two or more of the elements of arglist are true"""
@@ -1058,8 +1070,8 @@
         assert n <= 1, "Invalid syntax, two conflicting modes specified"
 
     if action in ["list-current", "collection-status",
-                  "cleanup", "remove-old", "remove-all-but-n-full", "remove-all-inc-of-but-n-full"]:
-        assert_only_one([list_current, collection_status, cleanup,
+                  "cleanup", "remove-old", "remove-all-but-n-full", "remove-all-inc-of-but-n-full", "replicate"]:
+        assert_only_one([list_current, collection_status, cleanup, replicate,
                          globals.remove_time is not None])
     elif action == "restore" or action == "verify":
         if full_backup:
@@ -1137,22 +1149,27 @@
 "file:///usr/local".  See the man page for more information.""") % (args[0],),
                            log.ErrorCode.bad_url)
     elif len(args) == 2:
-        # Figure out whether backup or restore
-        backup, local_pathname = set_backend(args[0], args[1])
-        if backup:
-            if full_backup:
-                action = "full"
-            else:
-                action = "inc"
+        if replicate:
+            globals.src_backend = backend.get_backend(args[0])
+            globals.backend = backend.get_backend(args[1])
+            action = "replicate"
         else:
-            if verify:
-                action = "verify"
+            # Figure out whether backup or restore
+            backup, local_pathname = set_backend(args[0], args[1])
+            if backup:
+                if full_backup:
+                    action = "full"
+                else:
+                    action = "inc"
             else:
-                action = "restore"
+                if verify:
+                    action = "verify"
+                else:
+                    action = "restore"
 
-        process_local_dir(action, local_pathname)
-        if action in ['full', 'inc', 'verify']:
-            set_selection()
+            process_local_dir(action, local_pathname)
+            if action in ['full', 'inc', 'verify']:
+                set_selection()
     elif len(args) > 2:
         raise AssertionError("this code should not be reachable")
 

=== modified file 'duplicity/file_naming.py'
--- duplicity/file_naming.py	2016-06-28 21:03:46 +0000
+++ duplicity/file_naming.py	2017-04-20 12:10:37 +0000
@@ -436,3 +436,12 @@
         self.encrypted = encrypted  # true if gpg encrypted
 
         self.partial = partial
+
+    def __eq__(self, other):
+        return self.type == other.type and \
+            self.manifest == other.manifest and \
+            self.time == other.time and \
+            self.start_time == other.start_time and \
+            self.end_time == other.end_time and \
+            self.partial == other.partial
+    


Follow ups