← Back to team overview

duplicity-team team mailing list archive

[Merge] lp:~bmerry/duplicity/pydrive-id-cache into lp:duplicity

 

Bruce Merry has proposed merging lp:~bmerry/duplicity/pydrive-id-cache into lp:duplicity.

Requested reviews:
  duplicity-team (duplicity-team)

For more details, see:
https://code.launchpad.net/~bmerry/duplicity/pydrive-id-cache/+merge/270903

Second attempt: I've now merged with the master branch. That merge did some strange things (it undid a whole bunch of my changes, which I had to put back in in the following commit), so please take a quick look to ensure that this merge is sane (I don't normally use bzr so I might be doing it wrong).

This fixes the issue a number of users (including myself) have been having with duplicity creating files with duplicate filenames on Google Drive. It keeps a runtime cache of filename to object ID mappings, so that once it has uploaded an object it won't be fooled by weakly consistent directory listings. I've been using it for a while with no more duplicate filename issues, and another user has reported that it's fixed his issues as well.
-- 
Your team duplicity-team is requested to review the proposed merge of lp:~bmerry/duplicity/pydrive-id-cache into lp:duplicity.
=== modified file 'duplicity/backends/pydrivebackend.py'
--- duplicity/backends/pydrivebackend.py	2015-09-06 14:48:27 +0000
+++ duplicity/backends/pydrivebackend.py	2015-09-13 17:55:46 +0000
@@ -20,6 +20,7 @@
 import os
 
 import duplicity.backend
+from duplicity import log
 from duplicity.errors import BackendException
 
 
@@ -35,6 +36,7 @@
             from oauth2client.client import SignedJwtAssertionCredentials
             from pydrive.auth import GoogleAuth
             from pydrive.drive import GoogleDrive
+            from pydrive.files import FileNotUploadedError
         except ImportError:
             raise BackendException('PyDrive backend requires PyDrive installation'
                                    'Please read the manpage to fix.')
@@ -73,49 +75,109 @@
                 folder.Upload()
             parent_folder_id = folder['id']
         self.folder = parent_folder_id
-
-    def FilesList(self):
-        return self.drive.ListFile({'q': "'" + self.folder + "' in parents and trashed=false"}).GetList()
+        self.id_cache = {}
+
+    def file_by_name(self, filename):
+        from pydrive.files import ApiRequestError
+        if filename in self.id_cache:
+            # It might since have been locally moved, renamed or deleted, so we
+            # need to validate the entry.
+            file_id = self.id_cache[filename]
+            drive_file = self.drive.CreateFile({'id': file_id})
+            try:
+                if drive_file['title'] == filename and not drive_file['labels']['trashed']:
+                    for parent in drive_file['parents']:
+                        if parent['id'] == self.folder:
+                            log.Info("PyDrive backend: found file '%s' with id %s in ID cache" % (filename, file_id))
+                            return drive_file
+            except ApiRequestError as error:
+                # A 404 occurs if the ID is no longer valid
+                if error.args[0].resp.status != 404:
+                    raise
+            # If we get here, the cache entry is invalid
+            log.Info("PyDrive backend: invalidating '%s' (previously ID %s) from ID cache" % (filename, file_id))
+            del self.id_cache[filename]
+
+        # Not found in the cache, so use directory listing. This is less
+        # reliable because there is no strong consistency.
+        q = "title='%s' and '%s' in parents and trashed=false" % (filename, self.folder)
+        fields = 'items(title,id,fileSize,downloadUrl,exportLinks),nextPageToken'
+        flist = self.drive.ListFile({'q': q, 'fields': fields}).GetList()
+        if len(flist) > 1:
+            log.FatalError(_("PyDrive backend: multiple files called '%s'.") % (filename,))
+        elif flist:
+            file_id = flist[0]['id']
+            self.id_cache[filename] = flist[0]['id']
+            log.Info("PyDrive backend: found file '%s' with id %s on server, adding to cache" % (filename, file_id))
+            return flist[0]
+        log.Info("PyDrive backend: file '%s' not found in cache or on server" % (filename,))
+        return None
 
     def id_by_name(self, filename):
-        try:
-            return next(item for item in self.FilesList() if item['title'] == filename)['id']
-        except:
+        drive_file = self.file_by_name(filename)
+        if drive_file is None:
             return ''
+        else:
+            return drive_file['id']
 
     def _put(self, source_path, remote_filename):
-        drive_file = self.drive.CreateFile({'title': remote_filename, 'parents': [{"kind": "drive#fileLink", "id": self.folder}]})
+        drive_file = self.file_by_name(remote_filename)
+        if drive_file is None:
+            # No existing file, make a new one
+            drive_file = self.drive.CreateFile({'title': remote_filename, 'parents': [{"kind": "drive#fileLink", "id": self.folder}]})
+            log.Info("PyDrive backend: creating new file '%s'" % (remote_filename,))
+        else:
+            log.Info("PyDrive backend: replacing existing file '%s' with id '%s'" % (
+                remote_filename, drive_file['id']))
         drive_file.SetContentFile(source_path.name)
         drive_file.Upload()
+        self.id_cache[remote_filename] = drive_file['id']
 
     def _get(self, remote_filename, local_path):
-        drive_file = self.drive.CreateFile({'id': self.id_by_name(remote_filename)})
+        drive_file = self.file_by_name(remote_filename)
         drive_file.GetContentFile(local_path.name)
 
     def _list(self):
-        return [item['title'] for item in self.FilesList()]
+        drive_files = self.drive.ListFile({
+            'q': "'" + self.folder + "' in parents and trashed=false",
+            'fields': 'items(title,id),nextPageToken'}).GetList()
+        filenames = set(item['title'] for item in drive_files)
+        # Check the cache as well. A file might have just been uploaded but
+        # not yet appear in the listing.
+        # Note: do not use iterkeys() here, because file_by_name will modify
+        # the cache if it finds invalid entries.
+        for filename in self.id_cache.keys():
+            if (filename not in filenames) and (self.file_by_name(filename) is not None):
+                filenames.add(filename)
+        return list(filenames)
 
     def _delete(self, filename):
         file_id = self.id_by_name(filename)
-        drive_file = self.drive.CreateFile({'id': file_id})
-        drive_file.auth.service.files().delete(fileId=drive_file['id']).execute()
-
-    def _delete_list(self, filename_list):
-        to_remove = set(filename_list)
-        for item in self.FilesList():
-            if item['title'] not in to_remove:
-                continue
-            file_id = item['id']
-            drive_file = self.drive.CreateFile({'id': file_id})
-            drive_file.auth.service.files().delete(fileId=drive_file['id']).execute()
+        if file_id != '':
+            self.drive.auth.service.files().delete(fileId=file_id).execute()
+        else:
+            log.Warn("File '%s' does not exist while trying to delete it" % (filename,))
 
     def _query(self, filename):
-        try:
-            size = int((item for item in self.FilesList() if item['title'] == filename).next()['fileSize'])
-        except:
+        drive_file = self.file_by_name(filename)
+        if drive_file is None:
             size = -1
+        else:
+            size = int(drive_file['fileSize'])
         return {'size': size}
 
+    def _error_code(self, operation, error):
+        from pydrive.files import ApiRequestError, FileNotUploadedError
+        if isinstance(error, FileNotUploadedError):
+            return log.ErrorCode.backend_not_found
+        elif isinstance(error, ApiRequestError):
+            http_status = error.args[0].resp.status
+            if http_status == 404:
+                return log.ErrorCode.backend_not_found
+            elif http_status == 403:
+                return log.ErrorCode.backend_permission_denied
+        return log.ErrorCode.backend_error
+
 duplicity.backend.register_backend('pydrive', PyDriveBackend)
 """ pydrive is an alternate way to access gdocs """
 duplicity.backend.register_backend('pydrive+gdocs', PyDriveBackend)


Follow ups