launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #12537
[Merge] lp:~allenap/maas/utf8-before-urlencode into lp:maas
Gavin Panella has proposed merging lp:~allenap/maas/utf8-before-urlencode into lp:maas with lp:~allenap/maas/urlencode-is-duckist as a prerequisite.
Commit message:
Always UTF-8 encode Unicode strings before URL encoding them.
The standard library's urllib.urlencode is not Unicode safe. This new version is, and is round-trip tested with Django's stated query string handling policy.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~allenap/maas/utf8-before-urlencode/+merge/126587
--
https://code.launchpad.net/~allenap/maas/utf8-before-urlencode/+merge/126587
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~allenap/maas/utf8-before-urlencode into lp:maas.
=== modified file 'src/maascli/api.py'
--- src/maascli/api.py 2012-09-27 01:43:21 +0000
+++ src/maascli/api.py 2012-09-27 01:43:21 +0000
@@ -18,7 +18,6 @@
import httplib
import json
import sys
-from urllib import urlencode
from urlparse import (
urljoin,
urlparse,
@@ -42,6 +41,7 @@
handler_command_name,
parse_docstring,
safe_name,
+ urlencode,
)
=== modified file 'src/maascli/tests/test_utils.py'
--- src/maascli/tests/test_utils.py 2012-09-16 21:32:49 +0000
+++ src/maascli/tests/test_utils.py 2012-09-27 01:43:21 +0000
@@ -12,8 +12,16 @@
__metaclass__ = type
__all__ = []
+from urllib import unquote
+
+from django.utils.encoding import smart_unicode
from maascli import utils
from maastesting.testcase import TestCase
+from testtools.matchers import (
+ Equals,
+ IsInstance,
+ MatchesAll,
+ )
class TestDocstringParsing(TestCase):
@@ -187,3 +195,23 @@
# string.
self.assertIsInstance(utils.ensure_trailing_slash(u"fred"), unicode)
self.assertIsInstance(utils.ensure_trailing_slash(b"fred"), bytes)
+
+ def test_urlencode_encodes_utf8_and_quotes(self):
+ # urlencode UTF-8 encodes unicode strings and applies standard query
+ # string quoting rules, and always returns a byte string.
+ data = [("=\u1234", "&\u4321")]
+ query = utils.urlencode(data)
+ self.assertThat(
+ query, MatchesAll(
+ Equals(b"%3D%E1%88%B4=%26%E4%8C%A1"),
+ IsInstance(bytes)))
+
+ def test_urlencode_roundtrip_through_django(self):
+ # Check that urlencode's approach works with Django, as described on
+ # https://docs.djangoproject.com/en/dev/ref/unicode/.
+ data = [("=\u1234", "&\u4321")]
+ query = utils.urlencode(data)
+ name, value = query.split(b"=")
+ name, value = unquote(name), unquote(value)
+ name, value = smart_unicode(name), smart_unicode(value)
+ self.assertEqual(data, [(name, value)])
=== modified file 'src/maascli/utils.py'
--- src/maascli/utils.py 2012-09-16 21:32:49 +0000
+++ src/maascli/utils.py 2012-09-27 01:43:21 +0000
@@ -15,12 +15,14 @@
"handler_command_name",
"parse_docstring",
"safe_name",
+ "urlencode",
]
from functools import partial
from inspect import getdoc
import re
from textwrap import dedent
+from urllib import quote_plus
re_paragraph_splitter = re.compile(
@@ -86,3 +88,19 @@
"""Ensure that `string` has a trailing forward-slash."""
slash = b"/" if isinstance(string, bytes) else u"/"
return (string + slash) if not string.endswith(slash) else string
+
+
+def urlencode(data):
+ """A version of `urllib.urlencode` that isn't insane.
+
+ This only cares that `data` is an iterable of iterables. Each sub-iterable
+ must be of overall length 2, i.e. a name/value pair.
+
+ Unicode strings will be encoded to UTF-8. This is what Django expects; see
+ `smart_text` in the Django documentation.
+ """
+ enc = lambda string: quote_plus(
+ string.encode("utf-8") if isinstance(string, unicode) else string)
+ return b"&".join(
+ b"%s=%s" % (enc(name), enc(value))
+ for name, value in data)