← Back to team overview

duplicity-team team mailing list archive

[Merge] lp:~marix/duplicity/add-azure-arguments into lp:duplicity

 

Matthias Bach has proposed merging lp:~marix/duplicity/add-azure-arguments into lp:duplicity.

Requested reviews:
  duplicity-team (duplicity-team)

For more details, see:
https://code.launchpad.net/~marix/duplicity/add-azure-arguments/+merge/323564

Using the Azure backend to store large amounts of data we found that performance is sub-optimal. The changes on this branch add command line parameters to fine-tune some parameters of the Azure storage library, allowing to push write performance towards Azure above 1 Gb/s for large back-ups. If a user does not provide the parameters the defaults of the Azure storage library will continue to be used.
-- 
Your team duplicity-team is requested to review the proposed merge of lp:~marix/duplicity/add-azure-arguments into lp:duplicity.
=== modified file 'bin/duplicity.1'
--- bin/duplicity.1	2017-04-22 19:30:28 +0000
+++ bin/duplicity.1	2017-05-03 14:16:05 +0000
@@ -782,6 +782,25 @@
 when uploading to S3 to ensure you kill connections to slow S3 endpoints.
 
 .TP
+.BI "--azure-max-single-put-size"
+Specify the number of the largest supported upload size where the Azure
+library makes only one put call. If the content size is known and below this
+value the Azure library will only perform one put request to upload one block.
+The number is expected to be in bytes.
+
+.TP
+.BI "--azure-max-block-size"
+Specify the number for the block size used by the Azure library to upload
+blobs if it is split into multiple blocks.
+The maximum block size the service supports is 104857600 (100MiB) and the
+default is 4194304 (4MiB)
+
+.TP
+.BI ""--azure-max-connections"
+Specify the number of maximum connections to transfer one blob to Azure
+blob size exceeds 64MB. The default values is 2.
+
+.TP
 .BI "--scp-command " command
 .B (only ssh pexpect backend with --use-scp enabled)
 The

=== modified file 'duplicity/backends/azurebackend.py'
--- duplicity/backends/azurebackend.py	2017-03-13 17:16:08 +0000
+++ duplicity/backends/azurebackend.py	2017-05-03 14:16:05 +0000
@@ -22,6 +22,7 @@
 import os
 
 import duplicity.backend
+from duplicity import globals
 from duplicity import log
 from duplicity.errors import BackendException
 
@@ -73,6 +74,24 @@
             raise BackendException(
                 'Neither AZURE_ACCOUNT_KEY nor AZURE_SHARED_ACCESS_SIGNATURE environment variable not set.')
 
+        if globals.azure_max_single_put_size:
+            # check if we use azure-storage>=0.30.0
+            try:
+                _ = self.blob_service.MAX_SINGLE_PUT_SIZE
+                self.blob_service.MAX_SINGLE_PUT_SIZE = globals.azure_max_single_put_size
+            # fallback for azure-storage<0.30.0
+            except AttributeError:
+                self.blob_service._BLOB_MAX_DATA_SIZE = globals.azure_max_single_put_size
+
+        if globals.azure_max_block_size:
+            # check if we use azure-storage>=0.30.0
+            try:
+                _ = self.blob_service.MAX_BLOCK_SIZE
+                self.blob_service.MAX_BLOCK_SIZE = globals.azure_max_block_size
+            # fallback for azure-storage<0.30.0
+            except AttributeError:
+                self.blob_service._BLOB_MAX_CHUNK_DATA_SIZE = globals.azure_max_block_size
+
     def _create_container(self):
         try:
             self.blob_service.create_container(self.container, fail_on_exist=True)
@@ -85,11 +104,15 @@
                            log.ErrorCode.connection_failed)
 
     def _put(self, source_path, remote_filename):
+        kwargs = {}
+        if globals.azure_max_connections:
+            kwargs['max_connections'] = globals.azure_max_connections
+
         # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container
         try:
-            self.blob_service.create_blob_from_path(self.container, remote_filename, source_path.name)
+            self.blob_service.create_blob_from_path(self.container, remote_filename, source_path.name, **kwargs)
         except AttributeError:  # Old versions use a different method name
-            self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name)
+            self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name, **kwargs)
 
     def _get(self, remote_filename, local_path):
         # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs

=== modified file 'duplicity/commandline.py'
--- duplicity/commandline.py	2017-04-26 16:02:56 +0000
+++ duplicity/commandline.py	2017-05-03 14:16:05 +0000
@@ -561,6 +561,21 @@
     # Option to allow use of server side encryption in s3
     parser.add_option("--s3-use-server-side-encryption", action="store_true", dest="s3_use_sse")
 
+    # Number of the largest supported upload size where the Azure library makes only one put call.
+    # This is used to upload a single block if the content length is known and is less than this value.
+    # The default is 67108864 (64MiB)
+    parser.add_option("--azure-max-single-put-size", type="int", metavar=_("number"))
+
+    # Number for the block size used by the Azure library to upload a blob if the length is unknown
+    # or is larger than the value set by --azure-max-single-put-size".
+    # The maximum block size the service supports is 100MiB.
+    # The default is 4 * 1024 * 1024 (4MiB)
+    parser.add_option("--azure-max-block-size", type="int", metavar=_("number"))
+
+    # The number for the maximum parallel connections to use when the blob size exceeds 64MB.
+    # max_connections (int) – Maximum number of parallel connections to use when the blob size exceeds 64MB.
+    parser.add_option("--azure-max-connections", type="int", metavar=_("number"))
+
     # scp command to use (ssh pexpect backend)
     parser.add_option("--scp-command", metavar=_("command"))
 

=== modified file 'duplicity/globals.py'
--- duplicity/globals.py	2017-02-27 13:18:57 +0000
+++ duplicity/globals.py	2017-05-03 14:16:05 +0000
@@ -215,6 +215,15 @@
 # Use server side encryption in s3
 s3_use_sse = False
 
+# The largest size upload supported in a single put call for azure
+azure_max_single_put_size = None
+
+# The size of the blocks put to azure blob storage if bigger than azure_max_single_put_size
+azure_max_block_size = None
+
+# Maximum number of parallel connections to use when the blob size for azure exceeds 64MB
+azure_max_connections = None
+
 # Whether to use the full email address as the user name when
 # logging into an imap server. If false just the user name
 # part of the email address is used.


Follow ups