launchpad-reviewers team mailing list archive

Thread
Date

[Merge] ~ilkeremrekoc/launchpad:revert-multipart into launchpad:master

To: mp+485077@xxxxxxxxxxxxxxxxxx
From: İlker Emre Koç <mp+485077@xxxxxxxxxxxxxxxxxx>
Date: Mon, 28 Apr 2025 06:29:33 -0000
Reply-to: mp+485077@xxxxxxxxxxxxxxxxxx
Sender: noreply@xxxxxxxxxxxxx

İlker Emre Koç has proposed merging ~ilkeremrekoc/launchpad:revert-multipart into launchpad:master.

Commit message:
Revert "upgrade multipart with line-break parsing"

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~ilkeremrekoc/launchpad/+git/launchpad/+merge/485077

I am reverting since. The upgrade failed some buildbot tests.

This reverts commit a3fbbd49e00800ee6d2501ecec93ceaf3b3c5535.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~ilkeremrekoc/launchpad:revert-multipart into launchpad:master.

diff --git a/lib/lp/services/webapp/servers.py b/lib/lp/services/webapp/servers.py
index eb74580..f01969e 100644
--- a/lib/lp/services/webapp/servers.py
+++ b/lib/lp/services/webapp/servers.py
@@ -3,10 +3,8 @@
 
 """Definition of the internet servers that Launchpad uses."""
 
-import tempfile
 import threading
 import xmlrpc.client
-from io import BytesIO
 from urllib.parse import parse_qs
 
 import six
@@ -34,7 +32,6 @@ from zope.formlib.itemswidgets import MultiDataHelper
 from zope.formlib.widget import SimpleInputWidget
 from zope.interface import alsoProvides, implementer
 from zope.publisher.browser import BrowserRequest, BrowserResponse, TestRequest
-from zope.publisher.http import HTTPInputStream
 from zope.publisher.interfaces import NotFound
 from zope.publisher.xmlrpc import XMLRPCRequest, XMLRPCResponse
 from zope.security.interfaces import IParticipation, Unauthorized
@@ -710,7 +707,6 @@ class LaunchpadBrowserRequest(
     """
 
     retry_max_count = 5  # How many times we're willing to retry
-    buffer_size = 64 * 1024  # buffer size for the processInputs
 
     def __init__(self, body_instream, environ, response=None):
         BasicLaunchpadRequest.__init__(self, body_instream, environ, response)
@@ -769,169 +765,6 @@ class LaunchpadBrowserRequest(
         """See `ISynchronizer`."""
         pass
 
-    def _skipParseProcess(self):
-        """Skips the parsing process inside processInputs by re-creating and
-        re-inserting the input stream"""
-
-        # The cache stream stores all the previous read operation results.
-        # getCacheStream() reads the remaining body as well and adds it to the
-        # cache stream. A complete re-creation is necessary since these stream
-        # classes don't have any "seek()" method we can use to reset.
-        new_body_instream = self._body_instream.getCacheStream()
-
-        if isinstance(self._body_instream.stream, HTTPInputStream):
-            self._body_instream.stream.cacheStream.close()
-
-        self._body_instream = HTTPInputStream(new_body_instream, self._environ)
-
-    def _createResetableStream(self, environment):
-        """Creates a stream to be resetted at the end of the parsing process.
-
-        This method is a copy of the __init__ method of HTTPInputStream class
-        in zope.publisher since we use the returned value of this method in
-        the same request instances in the same way while being unable to
-        properly use that method here.
-        """
-
-        size = environment.get("CONTENT_LENGTH")
-        # There can be no size in the environment (None) or the size
-        # can be an empty string, in which case we treat it as absent.
-        if not size:
-            size = environment.get("HTTP_CONTENT_LENGTH")
-
-        if not size or int(size) < 65536:
-            resetable_stream = BytesIO()
-        else:
-            resetable_stream = tempfile.TemporaryFile()
-
-        return resetable_stream
-
-    # XXX: ilkeremrekoc 2025-03-12
-    # processInputs is a part of Zope's request class, which we override.
-    # This addition is because we are about to upgrade multipart package,
-    # which in its newer versions is strict in its parsing of requests, only
-    # accepting CRLF line-breaks throughout. And since the old versions of
-    # apport package, which is the main bug-filing tool in Ubuntu, uses LF
-    # line-breaks in its requests, we need backwards compatibility until the
-    # older Ubuntu versions' apport packages can implement CRLF SRUs (Stable
-    # Release Update). Once they do, and a sufficient section of users and
-    # Ubuntu versions make the update, we can get rid of this method here.
-    def processInputs(self):
-        """See IPublisherRequest'
-
-        Processes inputs before traversal
-
-        This method overrides `BrowserRequest.processInputs()` to parse LF or
-        CR line-breaks inside requests if they exist into CRLF line-breaks.
-        Calls the parent method afterwards for further processing.
-        """
-
-        # Trigger the parsing only when the arriving request is pointed
-        # towards apport package's entry point.
-        # Note: Aside from apport, the browsers' UI can also reach this
-        # page. This shouldn't affect our current process but should be kept
-        # in mind for any large scale changes in the future.
-        # Note: The third condition is redundant but was added as an extra
-        # security since we really don't wish to enter this block if we are
-        # not in the jurisdiction of the apport package.
-        if (
-            self._environ["PATH_INFO"] == "/+storeblob"
-            and self._environ["REQUEST_METHOD"] == "POST"
-            and self._environ["CONTENT_TYPE"].startswith("multipart/form-data")
-        ):
-
-            # The request's body can rise to enourmous sizes since this part
-            # of the code is for filing of bug reports, which require
-            # extensive auto generated information. As a result, we create
-            # the instance dynamically for different request sizes in the
-            # same way the zope.publisher framework's HTTPInputStream's
-            # cacheStream is created.
-            parsing_body_stream = self._createResetableStream(self._environ)
-
-            buffer = self._body_instream.read(self.buffer_size)
-
-            # If the parsing happens, the content length will change with the
-            # new line-breaks so we store the new one.
-            newContentLength = 0
-
-            log_linebreak_type = ""  # Store the linebreak type for logging
-
-            while buffer != b"":
-
-                if b"\r\n" in buffer:
-
-                    # Since we assume there is a single line-break type in the
-                    # request, we can skip the rest of the parsing when the
-                    # CRLF is found.
-                    self._skipParseProcess()
-                    parsing_body_stream.close()
-
-                    logging_context.push(linebreak_type="CRLF")
-                    super().processInputs()
-                    return
-
-                # The only CRLF in the buffer might have been split between
-                # the next buffer and the current one
-                elif buffer.endswith(b"\r"):
-                    nextChar = self._body_instream.read(1)
-                    buffer += nextChar
-
-                    if nextChar == b"\n":
-                        self._skipParseProcess()
-                        parsing_body_stream.close()
-
-                        logging_context.push(linebreak_type="CRLF")
-                        super().processInputs()
-                        return
-
-                if b"\n" in buffer:
-                    buffer = buffer.replace(b"\n", b"\r\n")
-                    newContentLength += len(buffer)
-
-                    log_linebreak_type = "LF"
-                elif b"\r" in buffer:
-                    # Technically, we don't need CR functionality in our
-                    # codebase, but in this case, not having it would make
-                    # things harder to understand as the question of "what
-                    # happens" would be left unknowable since the downstream
-                    # doesn't raise errors.
-
-                    buffer = buffer.replace(b"\r", b"\r\n")
-                    newContentLength += len(buffer)
-
-                    log_linebreak_type = "CR"
-                else:
-                    # If there are neither CRLF, LF or CR in a buffer, we
-                    # cannot be sure if the Content-Length will change later,
-                    # so, we store the buffer length just in case.
-                    newContentLength += len(buffer)
-
-                parsing_body_stream.write(buffer)
-                buffer = self._body_instream.read(self.buffer_size)
-
-            # Note that we can reach this part only if we parsed the whole
-            # request.
-
-            # Updating the content length by anticipating change.
-            self._environ["CONTENT_LENGTH"] = newContentLength
-
-            logging_context.push(linebreak_type=log_linebreak_type)
-
-            # Reset the stream before we re-create the original one.
-            parsing_body_stream.seek(0)
-
-            deleting_stream = self._body_instream
-            self._body_instream = HTTPInputStream(
-                parsing_body_stream, self._environ
-            )
-
-            if isinstance(deleting_stream.stream, HTTPInputStream):
-                deleting_stream.stream.cacheStream.close()
-            deleting_stream.cacheStream.close()
-
-        super().processInputs()
-        return
-
 
 @implementer(IBrowserFormNG)
 class BrowserFormNG:
diff --git a/lib/lp/services/webapp/tests/test_servers.py b/lib/lp/services/webapp/tests/test_servers.py
index bdd07ed..491fed7 100644
--- a/lib/lp/services/webapp/tests/test_servers.py
+++ b/lib/lp/services/webapp/tests/test_servers.py
@@ -5,7 +5,6 @@ import io
 import unittest
 from datetime import datetime, timedelta, timezone
 from doctest import ELLIPSIS, NORMALIZE_WHITESPACE, DocTestSuite
-from textwrap import dedent
 
 import transaction
 from lazr.restful.interfaces import (
@@ -23,7 +22,6 @@ from talisker.logs import logging_context
 from testtools.matchers import ContainsDict, Equals
 from zope.component import getGlobalSiteManager, getUtility
 from zope.interface import Interface, implementer
-from zope.publisher.http import HTTPInputStream
 from zope.security.interfaces import Unauthorized
 from zope.security.management import newInteraction
 from zope.security.proxy import removeSecurityProxy
@@ -755,427 +753,6 @@ class TestLaunchpadBrowserRequest(TestCase):
         )
 
 
-# XXX: ilkeremrekoc 2025-03-12
-# This test case is for the overridden processInputs inside
-# LaunchpadBrowserRequest. The reason for the overriding is connected to
-# the changes in multipart package (which now only accepts CRLF endings) and
-# the original state of apport package (which only sent LF endings)
-# (see https://bugs.launchpad.net/ubuntu/+source/apport/+bug/2096327). Thus,
-# we add the overridden method and this test case temporarily until the apport
-# package can be SRU'd in a sufficient percantage of Ubuntu users. Both of
-# these can be removed once the prerequisite is fulfilled.
-class TestLaunchpadBrowserRequestProcessInputs(TestCase):
-    layer = FunctionalLayer
-
-    def setUp(self):
-        super().setUp()
-
-        self.body_template_str = dedent(
-            """\
-            Content-Type: multipart/mixed; \
-                boundary="===============4913381966751462219=="
-            MIME-Version: 1.0
-            \n\
-            --===============4913381966751462219==
-            Content-Type: text/plain; charset="us-ascii"
-            MIME-Version: 1.0
-            Content-Transfer-Encoding: 7bit
-            Content-Disposition: form-data; name="FORM_SUBMIT"
-            \n\
-            1
-            --===============4913381966751462219==
-            Content-Type: application/octet-stream
-            MIME-Version: 1.0
-            Content-Disposition: form-data; name="field.blob"; \
-                filename="x"
-            \n\
-            %s
-            \n\
-            --===============4913381966751462219==--
-            \n\
-            """
-        )
-
-        self.standard_blob = dedent(
-            """
-            This is for a test....
-            \n\
-            The second line of test
-            \n\
-            The third
-            """
-        )
-
-        self.body_str = self.body_template_str % self.standard_blob
-        self.body_bytes = self.body_str.encode("ASCII")
-
-        self.environ = {
-            "PATH_INFO": "/+storeblob",
-            "REQUEST_METHOD": "POST",
-            "CONTENT_TYPE": "multipart/form-data; \
-                boundary================4913381966751462219==",
-        }
-
-    def prepareRequest_lineBreakTest(self, body_bytes, environ):
-        """Return a `LaunchpadBrowserRequest` with the given form.
-
-        Also set the accepted charset to 'utf-8'.
-        """
-
-        body_stream = io.BytesIO(body_bytes)
-        input_stream = HTTPInputStream(body_stream, environ)
-
-        request = LaunchpadBrowserRequest(input_stream, environ)
-        request.charsets = ["utf-8"]
-        return request
-
-    def _assessProcessInputsResult(self, request):
-        """
-        Assesses whether the request has correctly parsed the blob field.
-
-        Passing in this case means the request's form is correctly parsed
-        and it only includes CRLF endings.
-        """
-
-        self.assertIn("field.blob", request.form)
-
-        result = request.form["field.blob"].read()
-
-        self.assertIn(b"\r\n", result)
-        self.assertNotRegex(result, b"[^\r]\n")
-
-    def test_processInputs_with_LF(self):
-        """
-        processInputs parses request bodies with LF line-breaks into CRLF.
-        """
-
-        request = self.prepareRequest_lineBreakTest(
-            self.body_bytes, self.environ
-        )
-        request.processInputs()
-
-        self._assessProcessInputsResult(request)
-
-    def test_processInputs_with_CR(self):
-        """
-        processInputs parses request bodies with CR line-breaks into CRLF.
-        """
-
-        body_bytes_with_CR = self.body_bytes.replace(b"\n", b"\r")
-
-        request = self.prepareRequest_lineBreakTest(
-            body_bytes_with_CR, self.environ
-        )
-        request.processInputs()
-
-        self._assessProcessInputsResult(request)
-
-    def test_processInputs_with_CRLF(self):
-        """
-        processInputs passes request bodies with CRLF line-breaks.
-        """
-
-        body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
-
-        request = self.prepareRequest_lineBreakTest(
-            body_bytes_with_CRLF, self.environ
-        )
-        request.processInputs()
-
-        self._assessProcessInputsResult(request)
-
-    def test_processInputs_with_multiple_buffer_runs(self):
-        """
-        processInputs should work even when the message overflows the buffer
-        """
-
-        request = self.prepareRequest_lineBreakTest(
-            self.body_bytes, self.environ
-        )
-        request.buffer_size = 4
-
-        request.processInputs()
-
-        self._assessProcessInputsResult(request)
-
-    def test_processInputs_with_cut_CRLF(self):
-        """
-        processInputs should work even when a CRLF is cut in the middle
-        """
-
-        body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
-        CR_index = body_bytes_with_CRLF.index(b"\r")
-
-        request = self.prepareRequest_lineBreakTest(
-            body_bytes_with_CRLF, self.environ
-        )
-        request.buffer_size = len(body_bytes_with_CRLF[: CR_index + 1])
-
-        request.processInputs()
-
-        self._assessProcessInputsResult(request)
-
-    def test_processInputs_different_environ_with_LF(self):
-        """
-        processInputs shouldn't work outside its domain. If LF line-breaks are
-        present outside of the apport package domain, the parsing should fail.
-        """
-
-        different_environ = self.environ.copy()
-        different_environ["PATH_INFO"] = ""
-
-        request = self.prepareRequest_lineBreakTest(
-            self.body_bytes, different_environ
-        )
-
-        request.processInputs()
-
-        self.assertNotIn("field.blob", request.form)
-
-    def test_processInputs_different_environ_with_CR(self):
-        """
-        processInputs shouldn't work outside its domain. If CR line-breaks are
-        present outside of the apport package domain, the parsing should fail.
-        """
-
-        body_bytes_with_CR = self.body_bytes.replace(b"\n", b"\r")
-
-        different_environ = self.environ.copy()
-        different_environ["PATH_INFO"] = ""
-
-        request = self.prepareRequest_lineBreakTest(
-            body_bytes_with_CR, different_environ
-        )
-
-        request.processInputs()
-
-        self.assertNotIn("field.blob", request.form)
-
-    def test_processInputs_different_environ_with_CRLF(self):
-        """
-        processInputs should work with CRLF everytime, regardless of domain.
-        """
-
-        body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
-
-        different_environ = self.environ.copy()
-        different_environ["PATH_INFO"] = ""
-
-        request = self.prepareRequest_lineBreakTest(
-            body_bytes_with_CRLF, different_environ
-        )
-
-        request.processInputs()
-
-        self._assessProcessInputsResult(request)
-
-    def test_processInputs_content_length(self):
-        """
-        processInputs should change the content length in proportion to the
-        number of LFs.
-        """
-
-        original_body_length = len(self.body_str)
-        lf_count = self.body_str.count("\n")
-
-        environ = self.environ.copy()
-        environ["CONTENT_LENGTH"] = original_body_length
-
-        body_bytes = self.body_str.encode("ASCII")
-
-        request = self.prepareRequest_lineBreakTest(body_bytes, environ)
-
-        request.processInputs()
-
-        # Note that processInputs change the values within "_environ"
-        # internally.
-        self.assertEqual(
-            original_body_length + lf_count, request._environ["CONTENT_LENGTH"]
-        )
-
-
-class TestLaunchpadBrowserRequestProcessInputsLarge(TestCase):
-    # Test case for the large inputs slotted into ProcessInputs method
-
-    layer = FunctionalLayer
-
-    def setUp(self):
-        super().setUp()
-
-        self.body_template_str = dedent(
-            """\
-            Content-Type: multipart/mixed; \
-                boundary="===============4913381966751462219=="
-            MIME-Version: 1.0
-            \n\
-            --===============4913381966751462219==
-            Content-Type: text/plain; charset="us-ascii"
-            MIME-Version: 1.0
-            Content-Transfer-Encoding: 7bit
-            Content-Disposition: form-data; name="FORM_SUBMIT"
-            \n\
-            1
-            --===============4913381966751462219==
-            Content-Type: application/octet-stream
-            MIME-Version: 1.0
-            Content-Disposition: form-data; name="field.blob"; \
-                filename="x"
-            \n\
-            %s
-            \n\
-            --===============4913381966751462219==--
-            \n\
-            """
-        )
-
-        self.standard_blob = dedent(
-            """
-            This is for a test....
-            \n\
-            The second line of test
-            \n\
-            The third
-            """
-        )
-
-        self.body_str = self.body_template_str % self.standard_blob
-        self.body_bytes = self.body_str.encode("ASCII")
-
-        self.environ = {
-            "PATH_INFO": "/+storeblob",
-            "REQUEST_METHOD": "POST",
-            "CONTENT_TYPE": "multipart/form-data; \
-                boundary================4913381966751462219==",
-        }
-
-    def prepareRequest_lineBreakTest(self, body_bytes, environ):
-        """Return a `LaunchpadBrowserRequest` with the given form.
-
-        Also set the accepted charset to 'utf-8'.
-        """
-
-        body_stream = io.BytesIO(body_bytes)
-        input_stream = HTTPInputStream(body_stream, environ)
-
-        request = LaunchpadBrowserRequest(input_stream, environ)
-        request.charsets = ["utf-8"]
-        return request
-
-    def _assessProcessInputsResult(self, request):
-        """
-        Assesses whether the request has correctly parsed the blob field.
-
-        Passing in this case means the request's form is correctly parsed
-        and it only includes CRLF endings.
-        """
-
-        self.assertIn("field.blob", request.form)
-
-        result = request.form["field.blob"].read()
-
-        self.assertIn(b"\r\n", result)
-        self.assertNotRegex(result, b"[^\r]\n")
-
-    def test_processInputs_above_buffer_size_limit(self):
-        """
-        processInputs should work even when the initial message overflows the
-        buffer.
-        """
-
-        template_size = len(self.body_template_str) - 2  # Subtracting "%s"
-        buffer_size = LaunchpadBrowserRequest.buffer_size
-
-        space_to_fill = buffer_size - template_size
-        blob_size = len(self.standard_blob)
-
-        # Make the whole request above the buffer size limit
-        multiply_blob = (space_to_fill // blob_size) + 1
-        large_blob = self.standard_blob * multiply_blob
-
-        large_body_str = self.body_template_str % large_blob
-        large_body_bytes = large_body_str.encode("ASCII")
-
-        lf_count = large_body_str.count("\n")
-
-        environ = self.environ.copy()
-        environ["CONTENT_LENGTH"] = len(large_body_str)
-
-        request = self.prepareRequest_lineBreakTest(large_body_bytes, environ)
-
-        old_content_length = request._environ["CONTENT_LENGTH"]
-
-        request.processInputs()
-
-        self.assertEqual(
-            old_content_length + lf_count, request._environ["CONTENT_LENGTH"]
-        )
-        self._assessProcessInputsResult(request)
-
-    def test_processInputs_just_below_buffer_size_limit(self):
-        """
-        processInputs should work even when the message gets overflowed after
-        initiation.
-        """
-
-        template_size = len(self.body_template_str) - 2  # Subtracting "%s"
-        buffer_size = LaunchpadBrowserRequest.buffer_size
-
-        space_to_fill = buffer_size - template_size
-        blob_size = len(self.standard_blob)
-
-        # Make the whole request above the buffer size limit
-        multiply_blob = space_to_fill // blob_size
-        large_blob = self.standard_blob * multiply_blob
-
-        large_body_str = self.body_template_str % large_blob
-        large_body_bytes = large_body_str.encode("ASCII")
-
-        lf_count = large_body_str.count("\n")
-
-        environ = self.environ.copy()
-        environ["CONTENT_LENGTH"] = len(large_body_str)
-
-        request = self.prepareRequest_lineBreakTest(large_body_bytes, environ)
-
-        old_content_length = request._environ["CONTENT_LENGTH"]
-
-        request.processInputs()
-
-        self.assertEqual(
-            old_content_length + lf_count, request._environ["CONTENT_LENGTH"]
-        )
-        self._assessProcessInputsResult(request)
-
-    def test_processInputs_blob_only_LF(self):
-        """
-        processInputs should work even when the whole blob within the message
-        is gibberish, or made up of entirely LFs and is extremely large.
-        """
-
-        buffer_size = LaunchpadBrowserRequest.buffer_size
-
-        lf_blob = "\n" * buffer_size * 2
-
-        lf_body_str = self.body_template_str % lf_blob
-        lf_body_bytes = lf_body_str.encode("ASCII")
-
-        lf_count = lf_body_str.count("\n")
-
-        environ = self.environ.copy()
-        environ["CONTENT_LENGTH"] = len(lf_body_str)
-
-        request = self.prepareRequest_lineBreakTest(lf_body_bytes, environ)
-
-        old_content_length = request._environ["CONTENT_LENGTH"]
-
-        request.processInputs()
-
-        self.assertEqual(
-            old_content_length + lf_count, request._environ["CONTENT_LENGTH"]
-        )
-        self._assessProcessInputsResult(request)
-
-
 class TestWebServiceRequestToBrowserRequest(WebServiceTestCase):
     def test_unicode_path_info(self):
         web_service_request = WebServiceTestRequest(
diff --git a/requirements/launchpad.txt b/requirements/launchpad.txt
index dfb4119..022a99e 100644
--- a/requirements/launchpad.txt
+++ b/requirements/launchpad.txt
@@ -98,7 +98,7 @@ mistune==0.8.3
 monotonic==1.5
 more-itertools==10.5.0
 msgpack==1.0.2
-multipart==1.2.1
+multipart==0.2.4
 netaddr==0.7.19
 netifaces==0.11.0
 oauth==1.0