duplicity-team team mailing list archive
-
duplicity-team team
-
Mailing list archive
-
Message #04266
[Merge] lp:~marix/duplicity/add-azure-arguments into lp:duplicity
Matthias Bach has proposed merging lp:~marix/duplicity/add-azure-arguments into lp:duplicity.
Requested reviews:
duplicity-team (duplicity-team)
For more details, see:
https://code.launchpad.net/~marix/duplicity/add-azure-arguments/+merge/323564
Using the Azure backend to store large amounts of data we found that performance is sub-optimal. The changes on this branch add command line parameters to fine-tune some parameters of the Azure storage library, allowing to push write performance towards Azure above 1 Gb/s for large back-ups. If a user does not provide the parameters the defaults of the Azure storage library will continue to be used.
--
Your team duplicity-team is requested to review the proposed merge of lp:~marix/duplicity/add-azure-arguments into lp:duplicity.
=== modified file 'bin/duplicity.1'
--- bin/duplicity.1 2017-04-22 19:30:28 +0000
+++ bin/duplicity.1 2017-05-03 14:16:05 +0000
@@ -782,6 +782,25 @@
when uploading to S3 to ensure you kill connections to slow S3 endpoints.
.TP
+.BI "--azure-max-single-put-size"
+Specify the number of the largest supported upload size where the Azure
+library makes only one put call. If the content size is known and below this
+value the Azure library will only perform one put request to upload one block.
+The number is expected to be in bytes.
+
+.TP
+.BI "--azure-max-block-size"
+Specify the number for the block size used by the Azure library to upload
+blobs if it is split into multiple blocks.
+The maximum block size the service supports is 104857600 (100MiB) and the
+default is 4194304 (4MiB)
+
+.TP
+.BI ""--azure-max-connections"
+Specify the number of maximum connections to transfer one blob to Azure
+blob size exceeds 64MB. The default values is 2.
+
+.TP
.BI "--scp-command " command
.B (only ssh pexpect backend with --use-scp enabled)
The
=== modified file 'duplicity/backends/azurebackend.py'
--- duplicity/backends/azurebackend.py 2017-03-13 17:16:08 +0000
+++ duplicity/backends/azurebackend.py 2017-05-03 14:16:05 +0000
@@ -22,6 +22,7 @@
import os
import duplicity.backend
+from duplicity import globals
from duplicity import log
from duplicity.errors import BackendException
@@ -73,6 +74,24 @@
raise BackendException(
'Neither AZURE_ACCOUNT_KEY nor AZURE_SHARED_ACCESS_SIGNATURE environment variable not set.')
+ if globals.azure_max_single_put_size:
+ # check if we use azure-storage>=0.30.0
+ try:
+ _ = self.blob_service.MAX_SINGLE_PUT_SIZE
+ self.blob_service.MAX_SINGLE_PUT_SIZE = globals.azure_max_single_put_size
+ # fallback for azure-storage<0.30.0
+ except AttributeError:
+ self.blob_service._BLOB_MAX_DATA_SIZE = globals.azure_max_single_put_size
+
+ if globals.azure_max_block_size:
+ # check if we use azure-storage>=0.30.0
+ try:
+ _ = self.blob_service.MAX_BLOCK_SIZE
+ self.blob_service.MAX_BLOCK_SIZE = globals.azure_max_block_size
+ # fallback for azure-storage<0.30.0
+ except AttributeError:
+ self.blob_service._BLOB_MAX_CHUNK_DATA_SIZE = globals.azure_max_block_size
+
def _create_container(self):
try:
self.blob_service.create_container(self.container, fail_on_exist=True)
@@ -85,11 +104,15 @@
log.ErrorCode.connection_failed)
def _put(self, source_path, remote_filename):
+ kwargs = {}
+ if globals.azure_max_connections:
+ kwargs['max_connections'] = globals.azure_max_connections
+
# https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container
try:
- self.blob_service.create_blob_from_path(self.container, remote_filename, source_path.name)
+ self.blob_service.create_blob_from_path(self.container, remote_filename, source_path.name, **kwargs)
except AttributeError: # Old versions use a different method name
- self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name)
+ self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name, **kwargs)
def _get(self, remote_filename, local_path):
# https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs
=== modified file 'duplicity/commandline.py'
--- duplicity/commandline.py 2017-04-26 16:02:56 +0000
+++ duplicity/commandline.py 2017-05-03 14:16:05 +0000
@@ -561,6 +561,21 @@
# Option to allow use of server side encryption in s3
parser.add_option("--s3-use-server-side-encryption", action="store_true", dest="s3_use_sse")
+ # Number of the largest supported upload size where the Azure library makes only one put call.
+ # This is used to upload a single block if the content length is known and is less than this value.
+ # The default is 67108864 (64MiB)
+ parser.add_option("--azure-max-single-put-size", type="int", metavar=_("number"))
+
+ # Number for the block size used by the Azure library to upload a blob if the length is unknown
+ # or is larger than the value set by --azure-max-single-put-size".
+ # The maximum block size the service supports is 100MiB.
+ # The default is 4 * 1024 * 1024 (4MiB)
+ parser.add_option("--azure-max-block-size", type="int", metavar=_("number"))
+
+ # The number for the maximum parallel connections to use when the blob size exceeds 64MB.
+ # max_connections (int) – Maximum number of parallel connections to use when the blob size exceeds 64MB.
+ parser.add_option("--azure-max-connections", type="int", metavar=_("number"))
+
# scp command to use (ssh pexpect backend)
parser.add_option("--scp-command", metavar=_("command"))
=== modified file 'duplicity/globals.py'
--- duplicity/globals.py 2017-02-27 13:18:57 +0000
+++ duplicity/globals.py 2017-05-03 14:16:05 +0000
@@ -215,6 +215,15 @@
# Use server side encryption in s3
s3_use_sse = False
+# The largest size upload supported in a single put call for azure
+azure_max_single_put_size = None
+
+# The size of the blocks put to azure blob storage if bigger than azure_max_single_put_size
+azure_max_block_size = None
+
+# Maximum number of parallel connections to use when the blob size for azure exceeds 64MB
+azure_max_connections = None
+
# Whether to use the full email address as the user name when
# logging into an imap server. If false just the user name
# part of the email address is used.
Follow ups