launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #32294
[Merge] ~ilkeremrekoc/launchpad:upgrade-multipart into launchpad:master
İlker Emre Koç has proposed merging ~ilkeremrekoc/launchpad:upgrade-multipart into launchpad:master.
Commit message:
upgrade multipart with line-break parsing
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~ilkeremrekoc/launchpad/+git/launchpad/+merge/482827
- Upgraded multipart from 0.2.4 to 1.2.1
- Since apport package can only send requests with LF line-breaks, and
the newest version of "multipart" package is incapable of accepting
anything other than CRLFs; for now, we added explicit parsing of LF or
CR to CRLF until Apport package can be SRU'd to a sufficient amount of
the user-base. Added the parsing in a way that only the Apport package
and the browser's url extension of "+storeblob" can reach it.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~ilkeremrekoc/launchpad:upgrade-multipart into launchpad:master.
diff --git a/lib/lp/services/webapp/servers.py b/lib/lp/services/webapp/servers.py
index f01969e..f0f46f0 100644
--- a/lib/lp/services/webapp/servers.py
+++ b/lib/lp/services/webapp/servers.py
@@ -32,6 +32,7 @@ from zope.formlib.itemswidgets import MultiDataHelper
from zope.formlib.widget import SimpleInputWidget
from zope.interface import alsoProvides, implementer
from zope.publisher.browser import BrowserRequest, BrowserResponse, TestRequest
+from zope.publisher.http import HTTPInputStream
from zope.publisher.interfaces import NotFound
from zope.publisher.xmlrpc import XMLRPCRequest, XMLRPCResponse
from zope.security.interfaces import IParticipation, Unauthorized
@@ -707,6 +708,7 @@ class LaunchpadBrowserRequest(
"""
retry_max_count = 5 # How many times we're willing to retry
+ buffer_size = 64 * 1024 # buffer size for the processInputs
def __init__(self, body_instream, environ, response=None):
BasicLaunchpadRequest.__init__(self, body_instream, environ, response)
@@ -765,6 +767,134 @@ class LaunchpadBrowserRequest(
"""See `ISynchronizer`."""
pass
+ def _skipParseProcess(self):
+ """Skips the parsing process inside processInputs by re-creating and
+ re-inserting the input stream"""
+
+ # The cache stream stores all the previous read operation results.
+ # getCacheStream() reads the remaining body as well and adds it to the
+ # cache stream. A complete re-creation is necessary since these stream
+ # classes don't have any "seek()" method we can use to reset.
+ self._body_instream = HTTPInputStream(
+ self._body_instream.getCacheStream(), self._environ
+ )
+
+ # XXX: ilkeremrekoc 2025-03-12
+ # processInputs is a part of Zope's request class, which we override.
+ # This addition is because we are about to upgrade multipart package,
+ # which in its newer versions is strict in its parsing of requests, only
+ # accepting CRLF line-breaks throughout. And since the old versions of
+ # apport package, which is the main bug-filing tool in Ubuntu, uses LF
+ # line-breaks in its requests, we need backwards compatibility until the
+ # older Ubuntu versions' apport packages can implement CRLF SRUs (Stable
+ # Release Update). Once they do, and a sufficient section of users and
+ # Ubuntu versions make the update, we can get rid of this method here.
+ def processInputs(self):
+ """See IPublisherRequest'
+
+ Processes inputs before traversal
+
+ This method overrides `BrowserRequest.processInputs()` to parse LF or
+ CR line-breaks inside requests if they exist into CRLF line-breaks.
+ Calls the parent method afterwards for further processing.
+ """
+
+ # Trigger the parsing only when the arriving request is pointed
+ # towards apport package's entry point.
+ # Note: Aside from apport, the browsers' UI can also reach this
+ # page. This shouldn't affect our current process but should be kept
+ # in mind for any large scale changes in the future.
+ # Note: The third condition is redundant but was added as an extra
+ # security since we really don't wish to enter this block if we are
+ # not in the jurisdiction of the apport package.
+ if (
+ self._environ["PATH_INFO"] == "/+storeblob"
+ and self._environ["REQUEST_METHOD"] == "POST"
+ and self._environ["CONTENT_TYPE"].startswith("multipart/form-data")
+ ):
+
+ # The request's body is put into a cache stream in the
+ # HTTPInputStream constructor, typed dynamically according to the
+ # size of the body. We wish to use the same type since the use of
+ # the cache is almost identical to ours.
+ parsing_body_stream = type(self._body_instream.cacheStream)()
+
+ buffer = self._body_instream.read(self.buffer_size)
+
+ # If the parsing happens, the content length will change with the
+ # new line-breaks so we store the new one.
+ newContentLength = 0
+
+ log_linebreak_type = "" # Store the linebreak type for logging
+
+ while buffer != b"":
+
+ if b"\r\n" in buffer:
+
+ # Since we assume there is a single line-break type in the
+ # request, we can skip the rest of the parsing when the
+ # CRLF is found.
+ self._skipParseProcess()
+
+ logging_context.push(linebreak_type="CRLF")
+ super().processInputs()
+ return
+
+ # The only CRLF in the buffer might have been split between
+ # the next buffer and the current one
+ elif buffer.endswith(b"\r"):
+ nextChar = self._body_instream.read(1)
+ buffer += nextChar
+
+ if nextChar == b"\n":
+ self._skipParseProcess()
+
+ logging_context.push(linebreak_type="CRLF")
+ super().processInputs()
+ return
+
+ if b"\n" in buffer:
+ buffer = buffer.replace(b"\n", b"\r\n")
+ newContentLength += len(buffer)
+
+ log_linebreak_type = "LF"
+ elif b"\r" in buffer:
+ # Technically, we don't need CR functionality in our
+ # codebase, but in this case, not having it would make
+ # things harder to understand as the question of "what
+ # happens" would be left unknowable since the downstream
+ # doesn't raise errors.
+
+ buffer = buffer.replace(b"\r", b"\r\n")
+ newContentLength += len(buffer)
+
+ log_linebreak_type = "CR"
+ else:
+ # If there are neither CRLF, LF or CR in a buffer, we
+ # cannot be sure if the Content-Length will change later,
+ # so, we store the buffer length just in case.
+ newContentLength += len(buffer)
+
+ parsing_body_stream.write(buffer)
+ buffer = self._body_instream.read(self.buffer_size)
+
+ # Note that we can reach this part only if we parsed the whole
+ # request.
+
+ # Updating the content length by anticipating change.
+ self._environ["CONTENT_LENGTH"] = newContentLength
+
+ logging_context.push(linebreak_type=log_linebreak_type)
+
+ # Reset the stream before we re-create the original one.
+ parsing_body_stream.seek(0)
+ self._body_instream = HTTPInputStream(
+ parsing_body_stream, self._environ
+ )
+
+ super().processInputs()
+ return
+
@implementer(IBrowserFormNG)
class BrowserFormNG:
diff --git a/lib/lp/services/webapp/tests/test_servers.py b/lib/lp/services/webapp/tests/test_servers.py
index 491fed7..ba8522e 100644
--- a/lib/lp/services/webapp/tests/test_servers.py
+++ b/lib/lp/services/webapp/tests/test_servers.py
@@ -5,6 +5,7 @@ import io
import unittest
from datetime import datetime, timedelta, timezone
from doctest import ELLIPSIS, NORMALIZE_WHITESPACE, DocTestSuite
+from textwrap import dedent
import transaction
from lazr.restful.interfaces import (
@@ -22,6 +23,7 @@ from talisker.logs import logging_context
from testtools.matchers import ContainsDict, Equals
from zope.component import getGlobalSiteManager, getUtility
from zope.interface import Interface, implementer
+from zope.publisher.http import HTTPInputStream
from zope.security.interfaces import Unauthorized
from zope.security.management import newInteraction
from zope.security.proxy import removeSecurityProxy
@@ -753,6 +755,212 @@ class TestLaunchpadBrowserRequest(TestCase):
)
+# XXX: ilkeremrekoc 2025-03-12
+# This test case is for the overridden processInputs inside
+# LaunchpadBrowserRequest. The reason for the overriding is connected to
+# the changes in multipart package (which now only accepts CRLF endings) and
+# the original state of apport package (which only sent LF endings)
+# (see https://bugs.launchpad.net/ubuntu/+source/apport/+bug/2096327). Thus,
+# we add the overridden method and this test case temporarily until the apport
+# package can be SRU'd in a sufficient percantage of Ubuntu users. Both of
+# these can be removed once the prerequisite is fulfilled.
+class TestLaunchpadBrowserRequestProcessInputs(TestCase):
+ layer = FunctionalLayer
+
+ def setUp(self):
+ super().setUp()
+
+ self.body_bytes = dedent(
+ """\
+ Content-Type: multipart/mixed; \
+ boundary="===============4913381966751462219=="
+ MIME-Version: 1.0
+ \n\
+ --===============4913381966751462219==
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Content-Disposition: form-data; name="FORM_SUBMIT"
+ \n\
+ 1
+ --===============4913381966751462219==
+ Content-Type: application/octet-stream
+ MIME-Version: 1.0
+ Content-Disposition: form-data; name="field.blob"; \
+ filename="x"
+ \n\
+ This is for a test....
+ \n\
+ The second line of test
+ \n\
+ The third
+ \n\
+ --===============4913381966751462219==--
+ \n\
+ """
+ ).encode("ASCII")
+
+ self.environ = {
+ "PATH_INFO": "/+storeblob",
+ "REQUEST_METHOD": "POST",
+ "CONTENT_TYPE": "multipart/form-data; \
+ boundary================4913381966751462219==",
+ }
+
+ def prepareRequest_lineBreakTest(self, body_bytes, environ):
+ """Return a `LaunchpadBrowserRequest` with the given form.
+
+ Also set the accepted charset to 'utf-8'.
+ """
+
+ body_stream = io.BytesIO(body_bytes)
+ input_stream = HTTPInputStream(body_stream, environ)
+
+ request = LaunchpadBrowserRequest(input_stream, environ)
+ request.charsets = ["utf-8"]
+ return request
+
+ def _assessProcessInputsResult(self, request):
+ """
+ Assesses whether the request has correctly parsed the blob field.
+
+ Passing in this case means the request's form is correctly parsed
+ and it only includes CRLF endings.
+ """
+
+ self.assertIn("field.blob", request.form)
+
+ result = request.form["field.blob"].read()
+
+ self.assertIn(b"\r\n", result)
+ self.assertNotRegex(result, b"[^\r]\n")
+
+ def test_processInputs_with_LF(self):
+ """
+ processInputs parses request bodies with LF line-breaks into CRLF.
+ """
+
+ request = self.prepareRequest_lineBreakTest(
+ self.body_bytes, self.environ
+ )
+ request.processInputs()
+
+ self._assessProcessInputsResult(request)
+
+ def test_processInputs_with_CR(self):
+ """
+ processInputs parses request bodies with CR line-breaks into CRLF.
+ """
+
+ body_bytes_with_CR = self.body_bytes.replace(b"\n", b"\r")
+
+ request = self.prepareRequest_lineBreakTest(
+ body_bytes_with_CR, self.environ
+ )
+ request.processInputs()
+
+ self._assessProcessInputsResult(request)
+
+ def test_processInputs_with_CRLF(self):
+ """
+ processInputs passes request bodies with CRLF line-breaks.
+ """
+
+ body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
+
+ request = self.prepareRequest_lineBreakTest(
+ body_bytes_with_CRLF, self.environ
+ )
+ request.processInputs()
+
+ self._assessProcessInputsResult(request)
+
+ def test_processInputs_with_multiple_buffer_runs(self):
+ """
+ processInputs should work even when the message overflows the buffer
+ """
+
+ request = self.prepareRequest_lineBreakTest(
+ self.body_bytes, self.environ
+ )
+ request.buffer_size = 4
+
+ request.processInputs()
+
+ self._assessProcessInputsResult(request)
+
+ def test_processInputs_with_cut_CRLF(self):
+ """
+ processInputs should work even when a CRLF is cut in the middle
+ """
+
+ body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
+ CR_index = body_bytes_with_CRLF.index(b"\r")
+
+ request = self.prepareRequest_lineBreakTest(
+ body_bytes_with_CRLF, self.environ
+ )
+ request.buffer_size = len(body_bytes_with_CRLF[: CR_index + 1])
+
+ request.processInputs()
+
+ self._assessProcessInputsResult(request)
+
+ def test_processInputs_different_environ_with_LF(self):
+ """
+ processInputs shouldn't work outside its domain. If LF line-breaks are
+ present outside of the apport package domain, the parsing should fail.
+ """
+
+ different_environ = self.environ.copy()
+ different_environ["PATH_INFO"] = ""
+
+ request = self.prepareRequest_lineBreakTest(
+ self.body_bytes, different_environ
+ )
+
+ request.processInputs()
+
+ self.assertNotIn("field.blob", request.form)
+
+ def test_processInputs_different_environ_with_CR(self):
+ """
+ processInputs shouldn't work outside its domain. If CR line-breaks are
+ present outside of the apport package domain, the parsing should fail.
+ """
+
+ body_bytes_with_CR = self.body_bytes.replace(b"\n", b"\r")
+
+ different_environ = self.environ.copy()
+ different_environ["PATH_INFO"] = ""
+
+ request = self.prepareRequest_lineBreakTest(
+ body_bytes_with_CR, different_environ
+ )
+
+ request.processInputs()
+
+ self.assertNotIn("field.blob", request.form)
+
+ def test_processInputs_different_environ_with_CRLF(self):
+ """
+ processInputs should work with CRLF everytime, regardless of domain.
+ """
+
+ body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
+
+ different_environ = self.environ.copy()
+ different_environ["PATH_INFO"] = ""
+
+ request = self.prepareRequest_lineBreakTest(
+ body_bytes_with_CRLF, different_environ
+ )
+
+ request.processInputs()
+
+ self._assessProcessInputsResult(request)
+
+
class TestWebServiceRequestToBrowserRequest(WebServiceTestCase):
def test_unicode_path_info(self):
web_service_request = WebServiceTestRequest(
diff --git a/requirements/launchpad.txt b/requirements/launchpad.txt
index bf2650b..ff191d3 100644
--- a/requirements/launchpad.txt
+++ b/requirements/launchpad.txt
@@ -96,7 +96,7 @@ mistune==0.8.3
monotonic==1.5
more-itertools==10.5.0
msgpack==1.0.2
-multipart==0.2.4
+multipart==1.2.1
netaddr==0.7.19
netifaces==0.11.0
oauth==1.0
Follow ups