launchpad-reviewers team mailing list archive

Thread
Date

[Merge] lp:~allenap/maas/utf8-before-urlencode into lp:maas

To: mp+126587@xxxxxxxxxxxxxxxxxx
From: Gavin Panella <gavin.panella@xxxxxxxxxxxxx>
Date: Thu, 27 Sep 2012 01:44:22 -0000
Reply-to: mp+126587@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Gavin Panella has proposed merging lp:~allenap/maas/utf8-before-urlencode into lp:maas with lp:~allenap/maas/urlencode-is-duckist as a prerequisite.

Commit message:
Always UTF-8 encode Unicode strings before URL encoding them.

The standard library's urllib.urlencode is not Unicode safe. This new version is, and is round-trip tested with Django's stated query string handling policy.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~allenap/maas/utf8-before-urlencode/+merge/126587
-- 
https://code.launchpad.net/~allenap/maas/utf8-before-urlencode/+merge/126587
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~allenap/maas/utf8-before-urlencode into lp:maas.

=== modified file 'src/maascli/api.py'
--- src/maascli/api.py	2012-09-27 01:43:21 +0000
+++ src/maascli/api.py	2012-09-27 01:43:21 +0000
@@ -18,7 +18,6 @@
 import httplib
 import json
 import sys
-from urllib import urlencode
 from urlparse import (
     urljoin,
     urlparse,
@@ -42,6 +41,7 @@
     handler_command_name,
     parse_docstring,
     safe_name,
+    urlencode,
     )
 
 

=== modified file 'src/maascli/tests/test_utils.py'
--- src/maascli/tests/test_utils.py	2012-09-16 21:32:49 +0000
+++ src/maascli/tests/test_utils.py	2012-09-27 01:43:21 +0000
@@ -12,8 +12,16 @@
 __metaclass__ = type
 __all__ = []
 
+from urllib import unquote
+
+from django.utils.encoding import smart_unicode
 from maascli import utils
 from maastesting.testcase import TestCase
+from testtools.matchers import (
+    Equals,
+    IsInstance,
+    MatchesAll,
+    )
 
 
 class TestDocstringParsing(TestCase):
@@ -187,3 +195,23 @@
         # string.
         self.assertIsInstance(utils.ensure_trailing_slash(u"fred"), unicode)
         self.assertIsInstance(utils.ensure_trailing_slash(b"fred"), bytes)
+
+    def test_urlencode_encodes_utf8_and_quotes(self):
+        # urlencode UTF-8 encodes unicode strings and applies standard query
+        # string quoting rules, and always returns a byte string.
+        data = [("=\u1234", "&\u4321")]
+        query = utils.urlencode(data)
+        self.assertThat(
+            query, MatchesAll(
+                Equals(b"%3D%E1%88%B4=%26%E4%8C%A1"),
+                IsInstance(bytes)))
+
+    def test_urlencode_roundtrip_through_django(self):
+        # Check that urlencode's approach works with Django, as described on
+        # https://docs.djangoproject.com/en/dev/ref/unicode/.
+        data = [("=\u1234", "&\u4321")]
+        query = utils.urlencode(data)
+        name, value = query.split(b"=")
+        name, value = unquote(name), unquote(value)
+        name, value = smart_unicode(name), smart_unicode(value)
+        self.assertEqual(data, [(name, value)])

=== modified file 'src/maascli/utils.py'
--- src/maascli/utils.py	2012-09-16 21:32:49 +0000
+++ src/maascli/utils.py	2012-09-27 01:43:21 +0000
@@ -15,12 +15,14 @@
     "handler_command_name",
     "parse_docstring",
     "safe_name",
+    "urlencode",
     ]
 
 from functools import partial
 from inspect import getdoc
 import re
 from textwrap import dedent
+from urllib import quote_plus
 
 
 re_paragraph_splitter = re.compile(
@@ -86,3 +88,19 @@
     """Ensure that `string` has a trailing forward-slash."""
     slash = b"/" if isinstance(string, bytes) else u"/"
     return (string + slash) if not string.endswith(slash) else string
+
+
+def urlencode(data):
+    """A version of `urllib.urlencode` that isn't insane.
+
+    This only cares that `data` is an iterable of iterables. Each sub-iterable
+    must be of overall length 2, i.e. a name/value pair.
+
+    Unicode strings will be encoded to UTF-8. This is what Django expects; see
+    `smart_text` in the Django documentation.
+    """
+    enc = lambda string: quote_plus(
+        string.encode("utf-8") if isinstance(string, unicode) else string)
+    return b"&".join(
+        b"%s=%s" % (enc(name), enc(value))
+        for name, value in data)