← Back to team overview

duplicity-team team mailing list archive

[Merge] lp:~carlos-abalde/duplicity/google-docs into lp:duplicity

 

Carlos has proposed merging lp:~carlos-abalde/duplicity/google-docs into lp:duplicity.

Requested reviews:
  duplicity-team (duplicity-team)

For more details, see:
https://code.launchpad.net/~carlos-abalde/duplicity/google-docs/+merge/70360

New backend implementation storing backups on Google Docs folders.

It's not very fast, but it's handy for personal backups due the low cost of Google Storage. It has been tested for about one week backing up about 15 GB without problems. Supports captchas, 2-step authentication and folders. Depends on google API Python libraries (usually python-gdata package).

An example backend URL is gdocs://carlos.abalde:s3cr3t@xxxxxxxxx/duplicity/personal/projets


-- 
https://code.launchpad.net/~carlos-abalde/duplicity/google-docs/+merge/70360
Your team duplicity-team is requested to review the proposed merge of lp:~carlos-abalde/duplicity/google-docs into lp:duplicity.
=== modified file 'dist/makedist'
--- dist/makedist	2011-06-12 14:18:45 +0000
+++ dist/makedist	2011-08-03 20:04:34 +0000
@@ -122,6 +122,7 @@
         "backends/tahoebackend.py",
         "backends/u1backend.py",
         "backends/webdavbackend.py",
+        "backends/gdocsbackend.py",
         ]:
         assert not os.system("cp %s/%s %s/src/backends" %
                              (SourceDir, filename, tardir)), filename

=== modified file 'duplicity.1'
--- duplicity.1	2011-07-16 18:37:47 +0000
+++ duplicity.1	2011-08-03 20:04:34 +0000
@@ -63,7 +63,7 @@
 Duplicity incrementally backs up files and directory
 by encrypting tar-format volumes with GnuPG and uploading them to a
 remote (or local) file server.  Currently local, ftp, ssh/scp, rsync,
-WebDAV, WebDAVs, HSi and Amazon S3 backends are available.
+WebDAV, WebDAVs, Google Docs, HSi and Amazon S3 backends are available.
 Because duplicity uses
 librsync, the incremental archives are space efficient and only record
 the parts of files that have changed since the last backup.  Currently
@@ -826,6 +826,8 @@
 webdav://user[:password]@other.host/some_dir
 .PP
 webdavs://user[:password]@other.host/some_dir
+.PP
+gdocs://user[:password]@other.host/some_dir
 
 .RE
 

=== modified file 'duplicity/backend.py'
--- duplicity/backend.py	2011-06-17 06:21:42 +0000
+++ duplicity/backend.py	2011-08-03 20:04:34 +0000
@@ -183,6 +183,7 @@
                                      'u1',
                                      'scp', 'ssh', 'sftp',
                                      'webdav', 'webdavs',
+                                     'gdocs',
                                      'http', 'https',
                                      'imap', 'imaps']
 

=== added file 'duplicity/backends/gdocsbackend.py'
--- duplicity/backends/gdocsbackend.py	1970-01-01 00:00:00 +0000
+++ duplicity/backends/gdocsbackend.py	2011-08-03 20:04:34 +0000
@@ -0,0 +1,247 @@
+# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
+#
+# Copyright 2011 Carlos Abalde <carlos.abalde@xxxxxxxxx>
+#
+# This file is part of duplicity.
+#
+# Duplicity is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# Duplicity is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with duplicity; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import os.path
+import string
+import urllib;
+
+import duplicity.backend
+from duplicity.backend import retry
+from duplicity import log
+from duplicity.errors import * #@UnusedWildImport
+
+class GDocsBackend(duplicity.backend.Backend):
+    """Connect to remote store using Google Google Documents List API"""
+
+    ROOT_FOLDER_ID = 'folder%3Aroot'
+    BACKUP_DOCUMENT_TYPE = 'application/binary'
+
+    def __init__(self, parsed_url):
+        duplicity.backend.Backend.__init__(self, parsed_url)
+
+        # Import Google Data APIs libraries.
+        try:
+            global atom
+            global gdata
+            import atom.data
+            import gdata.client
+            import gdata.docs.client
+            import gdata.docs.data
+        except ImportError:
+            raise BackendException('Google Docs backend requires Google Data APIs Python '
+                                   'Client Library (see http://code.google.com/p/gdata-python-client/).')
+
+        # Setup client instance.
+        self.client = gdata.docs.client.DocsClient(source='duplicity $version')
+        self.client.ssl = True
+        self.client.http_client.debug = False
+        self.__authorize(parsed_url.username + '@' + parsed_url.hostname, self.get_password())
+
+        # Fetch destination folder entry (and crete hierarchy if required).
+        folder_names = string.split(parsed_url.path[1:], '/')
+        parent_folder = None
+        parent_folder_id = GDocsBackend.ROOT_FOLDER_ID
+        for folder_name in folder_names:
+            entries = self.__fetch_entries(parent_folder_id, 'folder', folder_name)
+            if entries is not None:
+                if len(entries) == 1:
+                    parent_folder = entries[0]
+                elif len(entries) == 0:
+                    parent_folder = self.client.create(gdata.docs.data.FOLDER_LABEL, folder_name, parent_folder)
+                else:
+                    parent_folder = None
+                if parent_folder:
+                    parent_folder_id = parent_folder.resource_id.text
+                else:
+                    raise BackendException("Error while creating destination folder '%s'." % folder_name)
+            else:
+                raise BackendException("Error while fetching destination folder '%s'." % folder_name)
+        self.folder = parent_folder
+
+    @retry
+    def put(self, source_path, remote_filename=None, raise_errors = False):
+        """Transfer source_path to remote_filename"""
+        # Default remote file name.
+        if not remote_filename:
+            remote_filename = source_path.get_filename()
+
+        # Upload!
+        try:
+            # If remote file already exists in destination folder, remove it.
+            entries = self.__fetch_entries(self.folder.resource_id.text,
+                                           GDocsBackend.BACKUP_DOCUMENT_TYPE,
+                                           remote_filename)
+            for entry in entries:
+                self.client.delete(entry.get_edit_link().href + '?delete=true', force=True)
+            
+            # Set uploader instance. Note that resumable uploads are required in order to
+            # enable uploads for all file types.
+            # (see http://googleappsdeveloper.blogspot.com/2011/05/upload-all-file-types-to-any-google.html)
+            file = source_path.open()
+            uploader = gdata.client.ResumableUploader(
+              self.client, file, GDocsBackend.BACKUP_DOCUMENT_TYPE, os.path.getsize(file.name),
+              chunk_size=gdata.client.ResumableUploader.DEFAULT_CHUNK_SIZE,
+              desired_class=gdata.docs.data.DocsEntry)
+            if uploader:
+                # Chunked upload.
+                entry = gdata.docs.data.DocsEntry(title = atom.data.Title(text = remote_filename))
+                uri = '/feeds/upload/create-session/default/private/full?convert=false'
+                entry = uploader.UploadFile(uri, entry = entry)
+                if entry:
+                    # Move to destination folder.
+                    # TODO: any ideas on how to avoid this step?
+                    if self.client.Move(entry, self.folder):
+                        assert not file.close()
+                        return
+                    else:
+                        self.__handle_error("Failed to move uploaded file '%s' to destination remote folder '%s'"
+                                            % (source_path.get_filename(), self.folder.title.text), raise_errors)
+                else:
+                    self.__handle_error("Failed to upload file '%s' to remote folder '%s'" 
+                                        % (source_path.get_filename(), self.folder.title.text), raise_errors)
+            else:
+                self.__handle_error("Failed to initialize upload of file '%s' to remote folder '%s'"
+                         % (source_path.get_filename(), self.folder.title.text), raise_errors)
+            assert not file.close()
+        except Exception, e:
+            self.__handle_error("Failed to upload file '%s' to remote folder '%s': %s"
+                                % (source_path.get_filename(), self.folder.title.text, str(e)), raise_errors)
+
+    @retry
+    def get(self, remote_filename, local_path, raise_errors = False):
+        """Get remote filename, saving it to local_path"""
+        try:
+            entries = self.__fetch_entries(self.folder.resource_id.text,
+                                           GDocsBackend.BACKUP_DOCUMENT_TYPE,
+                                           remote_filename)
+            if len(entries) == 1:
+                entry = entries[0]
+                self.client.Download(entry, local_path.name)
+                local_path.setdata()
+                return
+            else:
+                self.__handle_error("Failed to find file '%s' in remote folder '%s'"
+                                    % (remote_filename, self.folder.title.text), raise_errors)
+        except Exception, e:
+            self.__handle_error("Failed to download file '%s' in remote folder '%s': %s"
+                                 % (remote_filename, self.folder.title.text, str(e)), raise_errors)
+
+    @retry
+    def list(self, raise_errors = False):
+        """List files in folder"""
+        try:
+            entries = self.__fetch_entries(self.folder.resource_id.text,
+                                           GDocsBackend.BACKUP_DOCUMENT_TYPE)
+            return [entry.title.text for entry in entries]
+        except Exception, e:
+            self.__handle_error("Failed to fetch list of files in remote folder '%s': %s"
+                                % (self.folder.title.text, str(e)), raise_errors)
+
+    @retry
+    def delete(self, filename_list, raise_errors = False):
+        """Delete files in filename_list"""
+        for filename in filename_list:
+            try:
+                entries = self.__fetch_entries(self.folder.resource_id.text,
+                                               GDocsBackend.BACKUP_DOCUMENT_TYPE,
+                                               filename)
+                if len(entries) > 0:
+                    success = True
+                    for entry in entries:
+                        if not self.client.delete(entry.get_edit_link().href + '?delete=true', force = True):
+                            success = False
+                    if not success:
+                        self.__handle_error("Failed to remove file '%s' in remote folder '%s'"
+                                            % (filename, self.folder.title.text), raise_errors)
+                else:
+                    log.Warn("Failed to fetch file '%s' in remote folder '%s'"
+                             % (filename, self.folder.title.text))
+            except Exception, e:
+                self.__handle_error("Failed to remove file '%s' in remote folder '%s': %s"
+                                    % (filename, self.folder.title.text, str(e)), raise_errors)
+
+    def __handle_error(self, message, raise_errors = True):
+        if raise_errors:
+            raise BackendException(message)
+        else:
+            log.FatalError(message, log.ErrorCode.backend_error)
+    
+    def __authorize(self, email, password, captcha_token = None, captcha_response = None):
+        try:
+            self.client.client_login(email,
+                                     password,
+                                     source = 'duplicity $version',
+                                     service = 'writely',
+                                     captcha_token = captcha_token,
+                                     captcha_response = captcha_response)
+        except gdata.client.CaptchaChallenge, challenge:
+            print('A captcha challenge in required. Please visit ' + challenge.captcha_url)
+            answer = None
+            while not answer:
+                answer = raw_input('Answer to the challenge? ')
+            self.__authorize(email, password, challenge.captcha_token, answer)
+        except gdata.client.BadAuthentication:
+            self.__handle_error('Invalid user credentials given. Be aware that accounts '
+                                'that use 2-step verification require creating an application specific '
+                                'access code for using this Duplicity backend. Follow the instrucction in '
+                                'http://www.google.com/support/accounts/bin/static.py?page=guide.cs&guide=1056283&topic=1056286 '
+                                'and create your application-specific password to run duplicity backups.')
+        except Exception, e:
+            self.__handle_error('Error while authenticating client: %s.' % str(e))
+
+    def __fetch_entries(self, folder_id, type, title = None):
+        # Build URI.
+        uri = '/feeds/default/private/full/%s/contents' % folder_id
+        if type == 'folder':
+            uri += '/-/folder?showfolders=true'
+        elif type == GDocsBackend.BACKUP_DOCUMENT_TYPE:
+            uri += '?showfolders=false'
+        else:
+            uri += '?showfolders=true'
+        if title:
+            uri += '&title=' + urllib.quote(title) + '&title-exact=true'
+        
+        try:
+            # Fetch entries
+            entries = self.client.get_everything(uri = uri)
+            
+            # When filtering by entry title, API is returning (don't know why) documents in other
+            # folders (apart from folder_id) matching the title, so some extra filtering is required.
+            if title:
+                result = []
+                for entry in entries:
+                    if (not type) or (entry.get_document_type() == type):
+                        if folder_id != GDocsBackend.ROOT_FOLDER_ID:
+                            for link in entry.in_folders():
+                                folder_entry = self.client.get_entry(link.href, None, None,
+                                                                     desired_class=gdata.docs.data.DocsEntry)
+                                if folder_entry and (folder_entry.resource_id.text == folder_id):
+                                    result.append(entry)
+                        elif len(entry.in_folders()) == 0:
+                            result.append(entry)
+            else:
+                result = entries
+            
+            # Done!
+            return result
+        except Exception, e:
+            self.__handle_error('Error while fetching remote entries: %s.' % str(e))
+
+duplicity.backend.register_backend('gdocs', GDocsBackend)

=== modified file 'duplicity/commandline.py'
--- duplicity/commandline.py	2011-07-16 18:37:47 +0000
+++ duplicity/commandline.py	2011-08-03 20:04:34 +0000
@@ -750,6 +750,7 @@
   tahoe://%(alias)s/%(directory)s
   webdav://%(user)s[:%(password)s]@%(other_host)s/%(some_dir)s
   webdavs://%(user)s[:%(password)s]@%(other_host)s/%(some_dir)s
+  gdocs://%(user)s[:%(password)s]@%(other_host)s/%(some_dir)s
 
 """ % dict
 

=== modified file 'po/POTFILES.in'
--- po/POTFILES.in	2009-08-12 19:05:52 +0000
+++ po/POTFILES.in	2011-08-03 20:04:34 +0000
@@ -43,3 +43,4 @@
 duplicity/backends/sshbackend.py
 duplicity/backends/tahoebackend.py
 duplicity/backends/webdavbackend.py
+duplicity/backends/gdocsbackend.py

=== modified file 'testing/config.py.tmpl'
--- testing/config.py.tmpl	2011-06-17 18:22:28 +0000
+++ testing/config.py.tmpl	2011-08-03 20:04:34 +0000
@@ -83,6 +83,8 @@
 webdavs_url = None
 webdavs_password = None
 
+gdocs_url = None
+gdocs_password = None
 
 def setup():
     """ setup for unit tests """


Follow ups