← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~allenap/maas/command-line-multiple-args into lp:maas

 

Gavin Panella has proposed merging lp:~allenap/maas/command-line-multiple-args into lp:maas.

Commit message:
Use the built-in email packages to build multipart/form-data messages.

Previously it was not possible to have non-ASCII fields, and edge-cases were not accounted for. The implementation is also tested with Django's multipart parser to give confidence that requests will work end-to-end.

Requested reviews:
  MAAS Maintainers (maas-maintainers)

For more details, see:
https://code.launchpad.net/~allenap/maas/command-line-multiple-args/+merge/125863
-- 
https://code.launchpad.net/~allenap/maas/command-line-multiple-args/+merge/125863
Your team MAAS Maintainers is requested to review the proposed merge of lp:~allenap/maas/command-line-multiple-args into lp:maas.
=== modified file 'src/apiclient/multipart.py'
--- src/apiclient/multipart.py	2012-05-23 15:45:24 +0000
+++ src/apiclient/multipart.py	2012-09-22 20:16:20 +0000
@@ -14,49 +14,104 @@
     'encode_multipart_data',
     ]
 
+from collections import Mapping
+from email.generator import Generator
+from email.mime.application import MIMEApplication
+from email.mime.multipart import MIMEMultipart
+from io import (
+    BytesIO,
+    IOBase,
+    )
+from itertools import chain
 import mimetypes
-import random
-import string
-
-
-def make_random_boundary(length=30):
-    """Create a random string for use in MIME boundary lines."""
-    return b''.join(random.choice(string.letters) for ii in range(length))
-
-
-def get_content_type(filename):
+
+
+def get_content_type(*names):
     """Return the MIME content type for the file with the given name."""
-    return mimetypes.guess_type(filename)[0] or b'application/octet-stream'
-
-
-def encode_field(field_name, data, boundary):
-    """MIME-encode a form field."""
-    field_name = field_name.encode('ascii')
-    return (
-        b'--' + boundary,
-        b'Content-Disposition: form-data; name="%s"' % field_name,
-        b'',
-        bytes(data),
-        )
-
-
-def encode_file(name, fileObj, boundary):
-    """MIME-encode a file upload."""
-    content_type = get_content_type(name)
-    name = name.encode('ascii')
-    return (
-        b'--' + boundary,
-        b'Content-Disposition: form-data; name="%s"; filename="%s"' %
-            (name, name),
-        b'Content-Type: %s' % content_type,
-        b'',
-        fileObj.read(),
-        )
-
-
-def encode_multipart_data(data, files):
+    for name in names:
+        if name is not None:
+            mimetype, encoding = mimetypes.guess_type(name)
+            if mimetype is not None:
+                return mimetype
+    else:
+        return "application/octet-stream"
+
+
+def make_bytes_payload(name, content):
+    payload = MIMEApplication(content)
+    payload.add_header("Content-Disposition", "form-data", name=name)
+    return payload
+
+
+def make_string_payload(name, content):
+    payload = MIMEApplication(content.encode("utf-8"), charset="utf-8")
+    payload.add_header("Content-Disposition", "form-data", name=name)
+    payload.set_type("text/plain")
+    return payload
+
+
+def make_file_payload(name, content):
+    payload = MIMEApplication(content.read())
+    payload.add_header(
+        "Content-Disposition", "form-data", name=name, filename=name)
+    names = name, getattr(content, "name", None)
+    payload.set_type(get_content_type(*names))
+    return payload
+
+
+def make_payload(name, content):
+    if isinstance(content, bytes):
+        return make_bytes_payload(name, content)
+    elif isinstance(content, unicode):
+        return make_string_payload(name, content)
+    elif isinstance(content, IOBase):
+        return make_file_payload(name, content)
+    elif callable(content):
+        with content() as content:
+            return make_payload(name, content)
+    else:
+        raise AssertionError(
+            "%r is unrecognised: %r" % (name, content))
+
+
+def build_multipart_message(data):
+    message = MIMEMultipart("form-data")
+    for name, content in data:
+        payload = make_payload(name, content)
+        message.attach(payload)
+    return message
+
+
+def encode_multipart_message(message):
+    # The message must be multipart.
+    assert message.is_multipart()
+    # The body length cannot yet be known.
+    assert "Content-Length" not in message
+    # So line-endings can be fixed-up later on, component payloads must have
+    # no Content-Length and their Content-Transfer-Encoding must be base64
+    # (and not quoted-printable, which Django doesn't appear to understand).
+    for part in message.get_payload():
+        assert "Content-Length" not in part
+        assert part["Content-Transfer-Encoding"] == "base64"
+    # Flatten the message without headers.
+    buf = BytesIO()
+    generator = Generator(buf, False)  # Don't mangle "^From".
+    generator._write_headers = lambda self: None  # Ignore.
+    generator.flatten(message)
+    # Ensure the body has CRLF-delimited lines. See
+    # http://bugs.python.org/issue1349106.
+    body = b"\r\n".join(buf.getvalue().splitlines())
+    # Only now is it safe to set the content length.
+    message.add_header("Content-Length", "%d" % len(body))
+    return message.items(), body
+
+
+def encode_multipart_data(data=(), files=()):
     """Create a MIME multipart payload from L{data} and L{files}.
 
+    **Note** that this function is deprecated. Use `prepare_multipart_message`
+    and `encode_multipart_message` instead.
+
     @param data: A mapping of names (ASCII strings) to data (byte string).
     @param files: A mapping of names (ASCII strings) to file objects ready to
         be read.
@@ -64,19 +119,10 @@
         and C{headers} is a dict of headers to add to the enclosing request in
         which this payload will travel.
     """
-    boundary = make_random_boundary()
-
-    lines = []
-    for name, content in data.items():
-        lines.extend(encode_field(name, content, boundary))
-    for name, file_obj in files.items():
-        lines.extend(encode_file(name, file_obj, boundary))
-    lines.extend((b'--%s--' % boundary, b''))
-    body = b'\r\n'.join(lines)
-
-    headers = {
-        b'content-type': b'multipart/form-data; boundary=' + boundary,
-        b'content-length': b'%s' % (len(body)),
-        }
-
-    return body, headers
+    if isinstance(data, Mapping):
+        data = data.items()
+    if isinstance(files, Mapping):
+        files = files.items()
+    message = build_multipart_message(chain(data, files))
+    headers, body = encode_multipart_message(message)
+    return body, dict(headers)

=== added file 'src/apiclient/testing/django.py'
--- src/apiclient/testing/django.py	1970-01-01 00:00:00 +0000
+++ src/apiclient/testing/django.py	2012-09-22 20:16:20 +0000
@@ -0,0 +1,48 @@
+# Copyright 2012 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Convenience functions for testing against Django."""
+
+from __future__ import (
+    absolute_import,
+    print_function,
+    unicode_literals,
+    )
+
+__metaclass__ = type
+__all__ = []
+
+from io import BytesIO
+
+from django.core.files.uploadhandler import MemoryFileUploadHandler
+from django.http.multipartparser import MultiPartParser
+
+
+def parse_headers_and_body_with_django(headers, body):
+    """Parse `headers` and `body` with Django's :class:`MultiPartParser`.
+
+    `MultiPartParser` is a curiously ugly and RFC non-compliant concoction.
+
+    Amongst other things, it coerces all field names, field data, and
+    filenames into Unicode strings using the "replace" error strategy, so be
+    warned that your data may be silently mangled.
+
+    It also, in 1.3.1 at least, does not recognise any transfer encodings at
+    *all* because its header parsing code was broken.
+
+    I'm also fairly sure that it'll fall over on headers than span more than
+    one line.
+
+    In short, it's a piece of code that inspires little confidence, yet we
+    must work with it, hence we need to round-trip test multipart handling
+    with it.
+    """
+    handler = MemoryFileUploadHandler()
+    meta = {
+        "HTTP_CONTENT_TYPE": headers["Content-Type"],
+        "HTTP_CONTENT_LENGTH": headers["Content-Length"],
+        }
+    parser = MultiPartParser(
+        META=meta, input_data=BytesIO(body),
+        upload_handlers=[handler])
+    return parser.parse()

=== modified file 'src/apiclient/tests/test_maas_client.py'
--- src/apiclient/tests/test_maas_client.py	2012-09-12 19:56:23 +0000
+++ src/apiclient/tests/test_maas_client.py	2012-09-22 20:16:20 +0000
@@ -12,7 +12,6 @@
 __metaclass__ = type
 __all__ = []
 
-from email.parser import Parser
 from random import randint
 from urlparse import (
     parse_qs,
@@ -25,6 +24,7 @@
     MAASDispatcher,
     MAASOAuth,
     )
+from apiclient.testing.django import parse_headers_and_body_with_django
 from maastesting.factory import factory
 from maastesting.testcase import TestCase
 
@@ -142,9 +142,9 @@
         params = {factory.getRandomString(): factory.getRandomString()}
         url, headers, body = make_client()._formulate_change(
             make_path(), params)
-        body = Parser().parsestr(body).get_payload()
-        self.assertIn('name="%s"' % params.keys()[0], body)
-        self.assertIn('\r\n%s\r\n' % params.values()[0], body)
+        post, _ = parse_headers_and_body_with_django(headers, body)
+        self.assertEqual(
+            {name: [value] for name, value in params.items()}, post)
 
     def test_get_dispatches_to_resource(self):
         path = make_path()
@@ -198,11 +198,9 @@
         client = make_client()
         client.post(make_path(), method, parameter=param)
         request = client.dispatcher.last_call
-        body = Parser().parsestr(request['data']).get_payload()
-        self.assertIn('name="op"', body)
-        self.assertIn('\r\n%s\r\n' % method, body)
-        self.assertIn('name="parameter"', body)
-        self.assertIn('\r\n%s\r\n' % param, body)
+        post, _ = parse_headers_and_body_with_django(
+            request["headers"], request["data"])
+        self.assertEqual({"parameter": [param], "op": [method]}, post)
 
     def test_put_dispatches_to_resource(self):
         path = make_path()

=== modified file 'src/apiclient/tests/test_multipart.py'
--- src/apiclient/tests/test_multipart.py	2012-05-23 15:45:24 +0000
+++ src/apiclient/tests/test_multipart.py	2012-09-22 20:16:20 +0000
@@ -13,74 +13,35 @@
 __all__ = []
 
 from io import BytesIO
-from random import randint
-import re
-from textwrap import dedent
+from os import urandom
 
 from apiclient.multipart import (
-    encode_field,
-    encode_file,
     encode_multipart_data,
     get_content_type,
-    make_random_boundary,
     )
+from apiclient.testing.django import parse_headers_and_body_with_django
+from django.utils.datastructures import MultiValueDict
 from maastesting.factory import factory
 from maastesting.testcase import TestCase
-from testtools.matchers import EndsWith
+from testtools.matchers import (
+    EndsWith,
+    StartsWith,
+    )
+
+
+ahem_django_ahem = (
+    "If the mismatch appears to be because the parsed values "
+    "are base64 encoded, then check you're using a >=1.4 release "
+    "of Django.")
 
 
 class TestMultiPart(TestCase):
 
-    def test_make_random_boundary_produces_bytes(self):
-        self.assertIsInstance(make_random_boundary(), bytes)
-
-    def test_make_random_boundary_produces_different_strings(self):
-        self.assertNotEqual(
-            make_random_boundary(),
-            make_random_boundary())
-
-    def test_make_random_boundary_obeys_length(self):
-        length = randint(5, 100)
-        self.assertEqual(length, len(make_random_boundary(length)))
-
-    def test_make_random_boundary_uses_no_weird_characters(self):
-        boundary = make_random_boundary(1000)
-        self.assertTrue(boundary.isalnum())
-        self.assertEqual([boundary], boundary.split())
-        self.assertNotIn('-', boundary)
-
     def test_get_content_type_guesses_type(self):
         guess = get_content_type('text.txt')
         self.assertEqual('text/plain', guess)
         self.assertIsInstance(guess, bytes)
 
-    def test_get_content_type_defaults_to_raw_bytes(self):
-        guess = get_content_type('mysterious-data')
-        self.assertEqual('application/octet-stream', guess)
-        self.assertIsInstance(guess, bytes)
-
-    def test_encode_field_encodes_form_field_as_sequence_of_byteses(self):
-        name = factory.getRandomString()
-        data = factory.getRandomString().encode('ascii')
-        boundary = make_random_boundary(5)
-        encoded_field = encode_field(name, data, boundary)
-        self.assertIn(b'--' + boundary, encoded_field)
-        self.assertIn(data, encoded_field)
-        text = b'\n'.join(encoded_field)
-        self.assertIn(b'name="%s"' % name, text)
-        self.assertIsInstance(text, bytes)
-
-    def test_encode_file_encodes_file_as_sequence_of_byteses(self):
-        name = factory.getRandomString()
-        data = factory.getRandomString().encode('ascii')
-        boundary = make_random_boundary(5)
-        encoded_file = encode_file(name, BytesIO(data), boundary)
-        self.assertIn(b'--' + boundary, encoded_file)
-        self.assertIn(data, encoded_file)
-        text = b'\n'.join(encoded_file)
-        self.assertIn(b'name="%s"' % name, text)
-        self.assertIsInstance(text, bytes)
-
     def test_encode_multipart_data_produces_bytes(self):
         data = {
             factory.getRandomString():
@@ -97,41 +58,61 @@
         data = {b'foo': factory.getRandomString().encode('ascii')}
         files = {b'bar': BytesIO(factory.getRandomString().encode('ascii'))}
         body, headers = encode_multipart_data(data, files)
-        self.assertThat(body, EndsWith(b'--\r\n'))
+        self.assertThat(body, EndsWith(b'--'))
 
     def test_encode_multipart_data(self):
         # The encode_multipart_data() function should take a list of
         # parameters and files and encode them into a MIME
         # multipart/form-data suitable for posting to the MAAS server.
-        params = {"op": "add", "filename": "foo"}
-        fileObj = BytesIO(b"random data")
-        files = {"file": fileObj}
+        params = {"op": "add", "foo": "bar\u1234"}
+        random_data = urandom(32)
+        files = {"baz": BytesIO(random_data)}
         body, headers = encode_multipart_data(params, files)
-
-        expected_body_regex = b"""\
-            --(?P<boundary>.+)
-            Content-Disposition: form-data; name="filename"
-
-            foo
-            --(?P=boundary)
-            Content-Disposition: form-data; name="op"
-
-            add
-            --(?P=boundary)
-            Content-Disposition: form-data; name="file"; filename="file"
-            Content-Type: application/octet-stream
-
-            random data
-            --(?P=boundary)--
-            """
-        expected_body_regex = dedent(expected_body_regex)
-        expected_body_regex = b"\r\n".join(expected_body_regex.splitlines())
-        expected_body = re.compile(expected_body_regex, re.MULTILINE)
-        self.assertRegexpMatches(body, expected_body)
-
-        boundary = expected_body.match(body).group("boundary")
-        expected_headers = {
-            b"content-length": str(len(body)),
-            b"content-type": b"multipart/form-data; boundary=%s" % boundary,
-            }
-        self.assertEqual(expected_headers, headers)
+        self.assertEqual("%s" % len(body), headers["Content-Length"])
+        self.assertThat(
+            headers["Content-Type"],
+            StartsWith("multipart/form-data; boundary="))
+        # Round-trip through Django's multipart code.
+        post, files = parse_headers_and_body_with_django(headers, body)
+        self.assertEqual(
+            {name: [value] for name, value in params.items()}, post,
+            ahem_django_ahem)
+        self.assertSetEqual({"baz"}, set(files))
+        self.assertEqual(
+            random_data, files["baz"].read(),
+            ahem_django_ahem)
+
+    def test_encode_multipart_data_multiple_params(self):
+        # Sequences of parameters and files can be passed to
+        # encode_multipart_data() so that multiple parameters/files with the
+        # same name can be provided.
+        params_in = [
+            ("one", "ABC"),
+            ("one", "XYZ"),
+            ("two", "DEF"),
+            ("two", "UVW"),
+            ]
+        files_in = [
+            ("f-one", BytesIO(urandom(32))),
+            ("f-two", BytesIO(urandom(32))),
+            ]
+        body, headers = encode_multipart_data(params_in, files_in)
+        self.assertEqual("%s" % len(body), headers["Content-Length"])
+        self.assertThat(
+            headers["Content-Type"],
+            StartsWith("multipart/form-data; boundary="))
+        # Round-trip through Django's multipart code.
+        params_out, files_out = (
+            parse_headers_and_body_with_django(headers, body))
+        params_out_expected = MultiValueDict()
+        for name, value in params_in:
+            params_out_expected.appendlist(name, value)
+        self.assertEqual(
+            params_out_expected, params_out,
+            ahem_django_ahem)
+        self.assertSetEqual({"f-one", "f-two"}, set(files_out))
+        files_expected = {name: buf.getvalue() for name, buf in files_in}
+        files_observed = {name: buf.read() for name, buf in files_out.items()}
+        self.assertEqual(
+            files_expected, files_observed,
+            ahem_django_ahem)

=== modified file 'src/maascli/api.py'
--- src/maascli/api.py	2012-09-18 16:49:16 +0000
+++ src/maascli/api.py	2012-09-22 20:16:20 +0000
@@ -278,8 +278,18 @@
         auth = MAASOAuth(*credentials)
         auth.sign_request(uri, headers)
 
+    @classmethod
+    def print_response(cls, response, content):
+        """Show an HTTP response in a human-friendly way."""
+        # Print the response.
+        print(response.status, response.reason)
+        print()
+        cls.print_headers(response)
+        print()
+        print(content)
+
     @staticmethod
-    def print_response(response, content):
+    def print_headers(headers):
         """Show an HTTP response in a human-friendly way."""
         # Function to change headers like "transfer-encoding" into
         # "Transfer-Encoding".
@@ -287,14 +297,10 @@
             part.capitalize() for part in header.split("-"))
         # Format string to prettify reporting of response headers.
         form = "%%%ds: %%s" % (
-            max(len(header) for header in response) + 2)
+            max(len(header) for header in headers) + 2)
         # Print the response.
-        print(response.status, response.reason)
-        print()
-        for header in sorted(response):
-            print(form % (cap(header), response[header]))
-        print()
-        print(content)
+        for header in sorted(headers):
+            print(form % (cap(header), headers[header]))
 
 
 def register_actions(profile, handler, parser):