← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:py3-fix-request-input-decoding into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:py3-fix-request-input-decoding into launchpad:master.

Commit message:
Fix handling of request inputs on Python 3

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1937345 in Launchpad itself: "OOPS when attempting to post a comment including non-ASCII character"
  https://bugs.launchpad.net/launchpad/+bug/1937345

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/406315

Cherry-pick https://github.com/zopefoundation/zope.publisher/pull/66 to get better handling of non-ISO-8859-1 URL-encoded form input on Python 3.  The exact nature of our countermeasures for zope.publisher's oddities has to change around a bit (and `get_query_string_params` no longer calls `BrowserRequest._decode`, which is probably a good thing on the whole), but most of the complexity will now simplify away once we're on Python 3 everywhere.

Dependencies MP: https://code.launchpad.net/~cjwatson/lp-source-dependencies/+git/lp-source-dependencies/+merge/406314
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:py3-fix-request-input-decoding into launchpad:master.
diff --git a/lib/lp/services/webapp/servers.py b/lib/lp/services/webapp/servers.py
index 5693f92..69a443c 100644
--- a/lib/lp/services/webapp/servers.py
+++ b/lib/lp/services/webapp/servers.py
@@ -529,19 +529,20 @@ def get_query_string_params(request):
     if query_string is None:
         query_string = ''
 
-    # PEP-3333 specifies that strings must only contain codepoints
-    # representable in ISO-8859-1.
     kwargs = {}
     if not six.PY2:
-        kwargs['encoding'] = 'ISO-8859-1'
+        kwargs['encoding'] = 'UTF-8'
         kwargs['errors'] = 'replace'
     parsed_qs = parse_qs(query_string, keep_blank_values=True, **kwargs)
-    # Use BrowserRequest._decode() for decoding the received parameters.
-    decoded_qs = {}
-    for key, values in six.iteritems(parsed_qs):
-        decoded_qs[key] = [
-            request._decode(value) for value in values]
-    return decoded_qs
+    if six.PY2:
+        decoded_qs = {}
+        for key, values in six.iteritems(parsed_qs):
+            decoded_qs[key] = [
+                (value.decode('UTF-8', 'replace') if isinstance(value, bytes)
+                 else value)
+                for value in values]
+        parsed_qs = decoded_qs
+    return parsed_qs
 
 
 class LaunchpadBrowserRequestMixin:
diff --git a/lib/lp/services/webapp/tests/test_servers.py b/lib/lp/services/webapp/tests/test_servers.py
index ab0f26a..ed66bfa 100644
--- a/lib/lp/services/webapp/tests/test_servers.py
+++ b/lib/lp/services/webapp/tests/test_servers.py
@@ -21,6 +21,7 @@ from lazr.restful.testing.webservice import (
     IGenericEntry,
     WebServiceTestCase,
     )
+import six
 from talisker.context import Context
 from talisker.logs import logging_context
 from zope.component import (
@@ -440,7 +441,15 @@ class TestBasicLaunchpadRequest(TestCase):
     def test_request_with_invalid_query_string_recovers(self):
         # When the query string has invalid utf-8, it is decoded with
         # replacement.
-        env = {'QUERY_STRING': 'field.title=subproc\xe9s '}
+        if six.PY2:
+            env = {'QUERY_STRING': 'field.title=subproc\xe9s '}
+        else:
+            # PEP 3333 requires environment variables to be native strings,
+            # so we can't actually get a bytes object in here on Python 3
+            # (both because the WSGI runner will never put it there, and
+            # because parse_qs would crash if we did).  Test the next best
+            # thing, namely percent-encoded invalid UTF-8.
+            env = {'QUERY_STRING': 'field.title=subproc%E9s '}
         request = LaunchpadBrowserRequest(io.BytesIO(b''), env)
         self.assertEqual(
             [u'subproc\ufffds '], request.query_string_params['field.title'])
diff --git a/requirements/launchpad.txt b/requirements/launchpad.txt
index 7eee4bf..e9a8867 100644
--- a/requirements/launchpad.txt
+++ b/requirements/launchpad.txt
@@ -175,6 +175,7 @@ zope.app.http==4.0.1
 zope.app.publication==4.3.1
 zope.app.publisher==4.2.0
 zope.app.wsgi==4.3.0
+zope.publisher==6.0.2+lp1
 # lp:~launchpad-committers/zope.session:launchpad
 zope.session==4.3.0+lp1
 zope.testbrowser==5.5.1