duplicity-team team mailing list archive
-
duplicity-team team
-
Mailing list archive
-
Message #03072
[Merge] lp:~bmerry/duplicity/pydrive-id-cache into lp:duplicity
Bruce Merry has proposed merging lp:~bmerry/duplicity/pydrive-id-cache into lp:duplicity.
Requested reviews:
duplicity-team (duplicity-team)
For more details, see:
https://code.launchpad.net/~bmerry/duplicity/pydrive-id-cache/+merge/269803
This fixes the issue a number of users (including myself) have been having with duplicity creating files with duplicate filenames on Google Drive. It keeps a runtime cache of filename to object ID mappings, so that once it has uploaded an object it won't be fooled by weakly consistent directory listings. I've been using it for a while with no more duplicate filename issues, and another user has reported that it's fixed his issues as well.
--
Your team duplicity-team is requested to review the proposed merge of lp:~bmerry/duplicity/pydrive-id-cache into lp:duplicity.
=== modified file 'duplicity/backends/pydrivebackend.py'
--- duplicity/backends/pydrivebackend.py 2015-06-17 18:20:51 +0000
+++ duplicity/backends/pydrivebackend.py 2015-09-01 20:04:35 +0000
@@ -20,6 +20,7 @@
import os
import duplicity.backend
+from duplicity import log
from duplicity.errors import BackendException
@@ -35,6 +36,7 @@
from oauth2client.client import SignedJwtAssertionCredentials
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
+ from pydrive.files import FileNotUploadedError
except ImportError:
raise BackendException('PyDrive backend requires PyDrive installation'
'Please read the manpage to fix.')
@@ -73,32 +75,88 @@
folder.Upload()
parent_folder_id = folder['id']
self.folder = parent_folder_id
+<<<<<<< TREE
def FilesList(self):
return self.drive.ListFile({'q': "'" + self.folder + "' in parents and trashed=false"}).GetList()
+=======
+ self.id_cache = {}
+
+ def file_by_name(self, filename):
+ from pydrive.files import ApiRequestError
+ if filename in self.id_cache:
+ # It might since have been locally moved, renamed or deleted, so we
+ # need to validate the entry.
+ file_id = self.id_cache[filename]
+ drive_file = self.drive.CreateFile({'id': file_id})
+ try:
+ if drive_file['title'] == filename and not drive_file['labels']['trashed']:
+ for parent in drive_file['parents']:
+ if parent['id'] == self.folder:
+ log.Info("PyDrive backend: found file '%s' with id %s in ID cache" % (filename, file_id))
+ return drive_file
+ except ApiRequestError as error:
+ # A 404 occurs if the ID is no longer valid
+ if error.args[0].resp.status != 404:
+ raise
+ # If we get here, the cache entry is invalid
+ log.Info("PyDrive backend: invalidating '%s' (previously ID %s) from ID cache" % (filename, file_id))
+ del self.id_cache[filename]
+
+ # Not found in the cache, so use directory listing. This is less
+ # reliable because there is no strong consistency.
+ q = "title='%s' and '%s' in parents and trashed=false" % (filename, self.folder)
+ fields = 'items(title,id,fileSize,downloadUrl,exportLinks),nextPageToken'
+ flist = self.drive.ListFile({'q': q, 'fields': fields}).GetList()
+ if len(flist) > 1:
+ log.FatalError(_("PyDrive backend: multiple files called '%s'.") % (filename,))
+ elif flist:
+ file_id = flist[0]['id']
+ self.id_cache[filename] = flist[0]['id']
+ return flist[0]
+ return None
+>>>>>>> MERGE-SOURCE
def id_by_name(self, filename):
- try:
- return next(item for item in self.FilesList() if item['title'] == filename)['id']
- except:
+ drive_file = self.file_by_name(filename)
+ if drive_file is None:
return ''
+ else:
+ return drive_file['id']
def _put(self, source_path, remote_filename):
- drive_file = self.drive.CreateFile({'title': remote_filename, 'parents': [{"kind": "drive#fileLink", "id": self.folder}]})
+ drive_file = self.file_by_name(remote_filename)
+ if drive_file is None:
+ # No existing file, make a new one
+ drive_file = self.drive.CreateFile({'title': remote_filename, 'parents': [{"kind": "drive#fileLink", "id": self.folder}]})
drive_file.SetContentFile(source_path.name)
drive_file.Upload()
+ self.id_cache[remote_filename] = drive_file['id']
def _get(self, remote_filename, local_path):
- drive_file = self.drive.CreateFile({'id': self.id_by_name(remote_filename)})
+ drive_file = self.file_by_name(remote_filename)
drive_file.GetContentFile(local_path.name)
def _list(self):
- return [item['title'] for item in self.FilesList()]
+ drive_files = self.drive.ListFile({
+ 'q': "'" + self.folder + "' in parents and trashed=false",
+ 'fields': 'items(title,id),nextPageToken'}).GetList()
+ filenames = set(item['title'] for item in drive_files)
+ # Check the cache as well. A file might have just been uploaded but
+ # not yet appear in the listing.
+ # Note: do not use iterkeys() here, because file_by_name will modify
+ # the cache if it finds invalid entries.
+ for filename in self.id_cache.keys():
+ if (filename not in filenames) and (self.file_by_name(filename) is not None):
+ filenames.add(filename)
+ return list(filenames)
def _delete(self, filename):
file_id = self.id_by_name(filename)
- drive_file = self.drive.CreateFile({'id': file_id})
- drive_file.auth.service.files().delete(fileId=drive_file['id']).execute()
+ if file_id != '':
+ self.drive.auth.service.files().delete(fileId=file_id).execute()
+ else:
+ log.Warn("File '%s' does not exist while trying to delete it" % (filename,))
def _delete_list(self, filename_list):
to_remove = set(filename_list)
@@ -110,12 +168,25 @@
drive_file.auth.service.files().delete(fileId=drive_file['id']).execute()
def _query(self, filename):
- try:
- size = int((item for item in self.FilesList() if item['title'] == filename).next()['fileSize'])
- except:
+ drive_file = self.file_by_name(filename)
+ if drive_file is None:
size = -1
+ else:
+ size = int(drive_file['fileSize'])
return {'size': size}
+ def _error_code(self, operation, error):
+ from pydrive.files import ApiRequestError, FileNotUploadedError
+ if isinstance(error, FileNotUploadedError):
+ return log.ErrorCode.backend_not_found
+ elif isinstance(error, ApiRequestError):
+ http_status = error.args[0].resp.status
+ if http_status == 404:
+ return log.ErrorCode.backend_not_found
+ elif http_status == 403:
+ return log.ErrorCode.backend_permission_denied
+ return log.ErrorCode.backend_error
+
duplicity.backend.register_backend('pydrive', PyDriveBackend)
""" pydrive is an alternate way to access gdocs """
duplicity.backend.register_backend('pydrive+gdocs', PyDriveBackend)