launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #32421
[Merge] ~ilkeremrekoc/launchpad:revert-multipart into launchpad:master
İlker Emre Koç has proposed merging ~ilkeremrekoc/launchpad:revert-multipart into launchpad:master.
Commit message:
Revert "upgrade multipart with line-break parsing"
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~ilkeremrekoc/launchpad/+git/launchpad/+merge/485077
I am reverting since. The upgrade failed some buildbot tests.
This reverts commit a3fbbd49e00800ee6d2501ecec93ceaf3b3c5535.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~ilkeremrekoc/launchpad:revert-multipart into launchpad:master.
diff --git a/lib/lp/services/webapp/servers.py b/lib/lp/services/webapp/servers.py
index eb74580..f01969e 100644
--- a/lib/lp/services/webapp/servers.py
+++ b/lib/lp/services/webapp/servers.py
@@ -3,10 +3,8 @@
"""Definition of the internet servers that Launchpad uses."""
-import tempfile
import threading
import xmlrpc.client
-from io import BytesIO
from urllib.parse import parse_qs
import six
@@ -34,7 +32,6 @@ from zope.formlib.itemswidgets import MultiDataHelper
from zope.formlib.widget import SimpleInputWidget
from zope.interface import alsoProvides, implementer
from zope.publisher.browser import BrowserRequest, BrowserResponse, TestRequest
-from zope.publisher.http import HTTPInputStream
from zope.publisher.interfaces import NotFound
from zope.publisher.xmlrpc import XMLRPCRequest, XMLRPCResponse
from zope.security.interfaces import IParticipation, Unauthorized
@@ -710,7 +707,6 @@ class LaunchpadBrowserRequest(
"""
retry_max_count = 5 # How many times we're willing to retry
- buffer_size = 64 * 1024 # buffer size for the processInputs
def __init__(self, body_instream, environ, response=None):
BasicLaunchpadRequest.__init__(self, body_instream, environ, response)
@@ -769,169 +765,6 @@ class LaunchpadBrowserRequest(
"""See `ISynchronizer`."""
pass
- def _skipParseProcess(self):
- """Skips the parsing process inside processInputs by re-creating and
- re-inserting the input stream"""
-
- # The cache stream stores all the previous read operation results.
- # getCacheStream() reads the remaining body as well and adds it to the
- # cache stream. A complete re-creation is necessary since these stream
- # classes don't have any "seek()" method we can use to reset.
- new_body_instream = self._body_instream.getCacheStream()
-
- if isinstance(self._body_instream.stream, HTTPInputStream):
- self._body_instream.stream.cacheStream.close()
-
- self._body_instream = HTTPInputStream(new_body_instream, self._environ)
-
- def _createResetableStream(self, environment):
- """Creates a stream to be resetted at the end of the parsing process.
-
- This method is a copy of the __init__ method of HTTPInputStream class
- in zope.publisher since we use the returned value of this method in
- the same request instances in the same way while being unable to
- properly use that method here.
- """
-
- size = environment.get("CONTENT_LENGTH")
- # There can be no size in the environment (None) or the size
- # can be an empty string, in which case we treat it as absent.
- if not size:
- size = environment.get("HTTP_CONTENT_LENGTH")
-
- if not size or int(size) < 65536:
- resetable_stream = BytesIO()
- else:
- resetable_stream = tempfile.TemporaryFile()
-
- return resetable_stream
-
- # XXX: ilkeremrekoc 2025-03-12
- # processInputs is a part of Zope's request class, which we override.
- # This addition is because we are about to upgrade multipart package,
- # which in its newer versions is strict in its parsing of requests, only
- # accepting CRLF line-breaks throughout. And since the old versions of
- # apport package, which is the main bug-filing tool in Ubuntu, uses LF
- # line-breaks in its requests, we need backwards compatibility until the
- # older Ubuntu versions' apport packages can implement CRLF SRUs (Stable
- # Release Update). Once they do, and a sufficient section of users and
- # Ubuntu versions make the update, we can get rid of this method here.
- def processInputs(self):
- """See IPublisherRequest'
-
- Processes inputs before traversal
-
- This method overrides `BrowserRequest.processInputs()` to parse LF or
- CR line-breaks inside requests if they exist into CRLF line-breaks.
- Calls the parent method afterwards for further processing.
- """
-
- # Trigger the parsing only when the arriving request is pointed
- # towards apport package's entry point.
- # Note: Aside from apport, the browsers' UI can also reach this
- # page. This shouldn't affect our current process but should be kept
- # in mind for any large scale changes in the future.
- # Note: The third condition is redundant but was added as an extra
- # security since we really don't wish to enter this block if we are
- # not in the jurisdiction of the apport package.
- if (
- self._environ["PATH_INFO"] == "/+storeblob"
- and self._environ["REQUEST_METHOD"] == "POST"
- and self._environ["CONTENT_TYPE"].startswith("multipart/form-data")
- ):
-
- # The request's body can rise to enourmous sizes since this part
- # of the code is for filing of bug reports, which require
- # extensive auto generated information. As a result, we create
- # the instance dynamically for different request sizes in the
- # same way the zope.publisher framework's HTTPInputStream's
- # cacheStream is created.
- parsing_body_stream = self._createResetableStream(self._environ)
-
- buffer = self._body_instream.read(self.buffer_size)
-
- # If the parsing happens, the content length will change with the
- # new line-breaks so we store the new one.
- newContentLength = 0
-
- log_linebreak_type = "" # Store the linebreak type for logging
-
- while buffer != b"":
-
- if b"\r\n" in buffer:
-
- # Since we assume there is a single line-break type in the
- # request, we can skip the rest of the parsing when the
- # CRLF is found.
- self._skipParseProcess()
- parsing_body_stream.close()
-
- logging_context.push(linebreak_type="CRLF")
- super().processInputs()
- return
-
- # The only CRLF in the buffer might have been split between
- # the next buffer and the current one
- elif buffer.endswith(b"\r"):
- nextChar = self._body_instream.read(1)
- buffer += nextChar
-
- if nextChar == b"\n":
- self._skipParseProcess()
- parsing_body_stream.close()
-
- logging_context.push(linebreak_type="CRLF")
- super().processInputs()
- return
-
- if b"\n" in buffer:
- buffer = buffer.replace(b"\n", b"\r\n")
- newContentLength += len(buffer)
-
- log_linebreak_type = "LF"
- elif b"\r" in buffer:
- # Technically, we don't need CR functionality in our
- # codebase, but in this case, not having it would make
- # things harder to understand as the question of "what
- # happens" would be left unknowable since the downstream
- # doesn't raise errors.
-
- buffer = buffer.replace(b"\r", b"\r\n")
- newContentLength += len(buffer)
-
- log_linebreak_type = "CR"
- else:
- # If there are neither CRLF, LF or CR in a buffer, we
- # cannot be sure if the Content-Length will change later,
- # so, we store the buffer length just in case.
- newContentLength += len(buffer)
-
- parsing_body_stream.write(buffer)
- buffer = self._body_instream.read(self.buffer_size)
-
- # Note that we can reach this part only if we parsed the whole
- # request.
-
- # Updating the content length by anticipating change.
- self._environ["CONTENT_LENGTH"] = newContentLength
-
- logging_context.push(linebreak_type=log_linebreak_type)
-
- # Reset the stream before we re-create the original one.
- parsing_body_stream.seek(0)
-
- deleting_stream = self._body_instream
- self._body_instream = HTTPInputStream(
- parsing_body_stream, self._environ
- )
-
- if isinstance(deleting_stream.stream, HTTPInputStream):
- deleting_stream.stream.cacheStream.close()
- deleting_stream.cacheStream.close()
-
- super().processInputs()
- return
-
@implementer(IBrowserFormNG)
class BrowserFormNG:
diff --git a/lib/lp/services/webapp/tests/test_servers.py b/lib/lp/services/webapp/tests/test_servers.py
index bdd07ed..491fed7 100644
--- a/lib/lp/services/webapp/tests/test_servers.py
+++ b/lib/lp/services/webapp/tests/test_servers.py
@@ -5,7 +5,6 @@ import io
import unittest
from datetime import datetime, timedelta, timezone
from doctest import ELLIPSIS, NORMALIZE_WHITESPACE, DocTestSuite
-from textwrap import dedent
import transaction
from lazr.restful.interfaces import (
@@ -23,7 +22,6 @@ from talisker.logs import logging_context
from testtools.matchers import ContainsDict, Equals
from zope.component import getGlobalSiteManager, getUtility
from zope.interface import Interface, implementer
-from zope.publisher.http import HTTPInputStream
from zope.security.interfaces import Unauthorized
from zope.security.management import newInteraction
from zope.security.proxy import removeSecurityProxy
@@ -755,427 +753,6 @@ class TestLaunchpadBrowserRequest(TestCase):
)
-# XXX: ilkeremrekoc 2025-03-12
-# This test case is for the overridden processInputs inside
-# LaunchpadBrowserRequest. The reason for the overriding is connected to
-# the changes in multipart package (which now only accepts CRLF endings) and
-# the original state of apport package (which only sent LF endings)
-# (see https://bugs.launchpad.net/ubuntu/+source/apport/+bug/2096327). Thus,
-# we add the overridden method and this test case temporarily until the apport
-# package can be SRU'd in a sufficient percantage of Ubuntu users. Both of
-# these can be removed once the prerequisite is fulfilled.
-class TestLaunchpadBrowserRequestProcessInputs(TestCase):
- layer = FunctionalLayer
-
- def setUp(self):
- super().setUp()
-
- self.body_template_str = dedent(
- """\
- Content-Type: multipart/mixed; \
- boundary="===============4913381966751462219=="
- MIME-Version: 1.0
- \n\
- --===============4913381966751462219==
- Content-Type: text/plain; charset="us-ascii"
- MIME-Version: 1.0
- Content-Transfer-Encoding: 7bit
- Content-Disposition: form-data; name="FORM_SUBMIT"
- \n\
- 1
- --===============4913381966751462219==
- Content-Type: application/octet-stream
- MIME-Version: 1.0
- Content-Disposition: form-data; name="field.blob"; \
- filename="x"
- \n\
- %s
- \n\
- --===============4913381966751462219==--
- \n\
- """
- )
-
- self.standard_blob = dedent(
- """
- This is for a test....
- \n\
- The second line of test
- \n\
- The third
- """
- )
-
- self.body_str = self.body_template_str % self.standard_blob
- self.body_bytes = self.body_str.encode("ASCII")
-
- self.environ = {
- "PATH_INFO": "/+storeblob",
- "REQUEST_METHOD": "POST",
- "CONTENT_TYPE": "multipart/form-data; \
- boundary================4913381966751462219==",
- }
-
- def prepareRequest_lineBreakTest(self, body_bytes, environ):
- """Return a `LaunchpadBrowserRequest` with the given form.
-
- Also set the accepted charset to 'utf-8'.
- """
-
- body_stream = io.BytesIO(body_bytes)
- input_stream = HTTPInputStream(body_stream, environ)
-
- request = LaunchpadBrowserRequest(input_stream, environ)
- request.charsets = ["utf-8"]
- return request
-
- def _assessProcessInputsResult(self, request):
- """
- Assesses whether the request has correctly parsed the blob field.
-
- Passing in this case means the request's form is correctly parsed
- and it only includes CRLF endings.
- """
-
- self.assertIn("field.blob", request.form)
-
- result = request.form["field.blob"].read()
-
- self.assertIn(b"\r\n", result)
- self.assertNotRegex(result, b"[^\r]\n")
-
- def test_processInputs_with_LF(self):
- """
- processInputs parses request bodies with LF line-breaks into CRLF.
- """
-
- request = self.prepareRequest_lineBreakTest(
- self.body_bytes, self.environ
- )
- request.processInputs()
-
- self._assessProcessInputsResult(request)
-
- def test_processInputs_with_CR(self):
- """
- processInputs parses request bodies with CR line-breaks into CRLF.
- """
-
- body_bytes_with_CR = self.body_bytes.replace(b"\n", b"\r")
-
- request = self.prepareRequest_lineBreakTest(
- body_bytes_with_CR, self.environ
- )
- request.processInputs()
-
- self._assessProcessInputsResult(request)
-
- def test_processInputs_with_CRLF(self):
- """
- processInputs passes request bodies with CRLF line-breaks.
- """
-
- body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
-
- request = self.prepareRequest_lineBreakTest(
- body_bytes_with_CRLF, self.environ
- )
- request.processInputs()
-
- self._assessProcessInputsResult(request)
-
- def test_processInputs_with_multiple_buffer_runs(self):
- """
- processInputs should work even when the message overflows the buffer
- """
-
- request = self.prepareRequest_lineBreakTest(
- self.body_bytes, self.environ
- )
- request.buffer_size = 4
-
- request.processInputs()
-
- self._assessProcessInputsResult(request)
-
- def test_processInputs_with_cut_CRLF(self):
- """
- processInputs should work even when a CRLF is cut in the middle
- """
-
- body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
- CR_index = body_bytes_with_CRLF.index(b"\r")
-
- request = self.prepareRequest_lineBreakTest(
- body_bytes_with_CRLF, self.environ
- )
- request.buffer_size = len(body_bytes_with_CRLF[: CR_index + 1])
-
- request.processInputs()
-
- self._assessProcessInputsResult(request)
-
- def test_processInputs_different_environ_with_LF(self):
- """
- processInputs shouldn't work outside its domain. If LF line-breaks are
- present outside of the apport package domain, the parsing should fail.
- """
-
- different_environ = self.environ.copy()
- different_environ["PATH_INFO"] = ""
-
- request = self.prepareRequest_lineBreakTest(
- self.body_bytes, different_environ
- )
-
- request.processInputs()
-
- self.assertNotIn("field.blob", request.form)
-
- def test_processInputs_different_environ_with_CR(self):
- """
- processInputs shouldn't work outside its domain. If CR line-breaks are
- present outside of the apport package domain, the parsing should fail.
- """
-
- body_bytes_with_CR = self.body_bytes.replace(b"\n", b"\r")
-
- different_environ = self.environ.copy()
- different_environ["PATH_INFO"] = ""
-
- request = self.prepareRequest_lineBreakTest(
- body_bytes_with_CR, different_environ
- )
-
- request.processInputs()
-
- self.assertNotIn("field.blob", request.form)
-
- def test_processInputs_different_environ_with_CRLF(self):
- """
- processInputs should work with CRLF everytime, regardless of domain.
- """
-
- body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
-
- different_environ = self.environ.copy()
- different_environ["PATH_INFO"] = ""
-
- request = self.prepareRequest_lineBreakTest(
- body_bytes_with_CRLF, different_environ
- )
-
- request.processInputs()
-
- self._assessProcessInputsResult(request)
-
- def test_processInputs_content_length(self):
- """
- processInputs should change the content length in proportion to the
- number of LFs.
- """
-
- original_body_length = len(self.body_str)
- lf_count = self.body_str.count("\n")
-
- environ = self.environ.copy()
- environ["CONTENT_LENGTH"] = original_body_length
-
- body_bytes = self.body_str.encode("ASCII")
-
- request = self.prepareRequest_lineBreakTest(body_bytes, environ)
-
- request.processInputs()
-
- # Note that processInputs change the values within "_environ"
- # internally.
- self.assertEqual(
- original_body_length + lf_count, request._environ["CONTENT_LENGTH"]
- )
-
-
-class TestLaunchpadBrowserRequestProcessInputsLarge(TestCase):
- # Test case for the large inputs slotted into ProcessInputs method
-
- layer = FunctionalLayer
-
- def setUp(self):
- super().setUp()
-
- self.body_template_str = dedent(
- """\
- Content-Type: multipart/mixed; \
- boundary="===============4913381966751462219=="
- MIME-Version: 1.0
- \n\
- --===============4913381966751462219==
- Content-Type: text/plain; charset="us-ascii"
- MIME-Version: 1.0
- Content-Transfer-Encoding: 7bit
- Content-Disposition: form-data; name="FORM_SUBMIT"
- \n\
- 1
- --===============4913381966751462219==
- Content-Type: application/octet-stream
- MIME-Version: 1.0
- Content-Disposition: form-data; name="field.blob"; \
- filename="x"
- \n\
- %s
- \n\
- --===============4913381966751462219==--
- \n\
- """
- )
-
- self.standard_blob = dedent(
- """
- This is for a test....
- \n\
- The second line of test
- \n\
- The third
- """
- )
-
- self.body_str = self.body_template_str % self.standard_blob
- self.body_bytes = self.body_str.encode("ASCII")
-
- self.environ = {
- "PATH_INFO": "/+storeblob",
- "REQUEST_METHOD": "POST",
- "CONTENT_TYPE": "multipart/form-data; \
- boundary================4913381966751462219==",
- }
-
- def prepareRequest_lineBreakTest(self, body_bytes, environ):
- """Return a `LaunchpadBrowserRequest` with the given form.
-
- Also set the accepted charset to 'utf-8'.
- """
-
- body_stream = io.BytesIO(body_bytes)
- input_stream = HTTPInputStream(body_stream, environ)
-
- request = LaunchpadBrowserRequest(input_stream, environ)
- request.charsets = ["utf-8"]
- return request
-
- def _assessProcessInputsResult(self, request):
- """
- Assesses whether the request has correctly parsed the blob field.
-
- Passing in this case means the request's form is correctly parsed
- and it only includes CRLF endings.
- """
-
- self.assertIn("field.blob", request.form)
-
- result = request.form["field.blob"].read()
-
- self.assertIn(b"\r\n", result)
- self.assertNotRegex(result, b"[^\r]\n")
-
- def test_processInputs_above_buffer_size_limit(self):
- """
- processInputs should work even when the initial message overflows the
- buffer.
- """
-
- template_size = len(self.body_template_str) - 2 # Subtracting "%s"
- buffer_size = LaunchpadBrowserRequest.buffer_size
-
- space_to_fill = buffer_size - template_size
- blob_size = len(self.standard_blob)
-
- # Make the whole request above the buffer size limit
- multiply_blob = (space_to_fill // blob_size) + 1
- large_blob = self.standard_blob * multiply_blob
-
- large_body_str = self.body_template_str % large_blob
- large_body_bytes = large_body_str.encode("ASCII")
-
- lf_count = large_body_str.count("\n")
-
- environ = self.environ.copy()
- environ["CONTENT_LENGTH"] = len(large_body_str)
-
- request = self.prepareRequest_lineBreakTest(large_body_bytes, environ)
-
- old_content_length = request._environ["CONTENT_LENGTH"]
-
- request.processInputs()
-
- self.assertEqual(
- old_content_length + lf_count, request._environ["CONTENT_LENGTH"]
- )
- self._assessProcessInputsResult(request)
-
- def test_processInputs_just_below_buffer_size_limit(self):
- """
- processInputs should work even when the message gets overflowed after
- initiation.
- """
-
- template_size = len(self.body_template_str) - 2 # Subtracting "%s"
- buffer_size = LaunchpadBrowserRequest.buffer_size
-
- space_to_fill = buffer_size - template_size
- blob_size = len(self.standard_blob)
-
- # Make the whole request above the buffer size limit
- multiply_blob = space_to_fill // blob_size
- large_blob = self.standard_blob * multiply_blob
-
- large_body_str = self.body_template_str % large_blob
- large_body_bytes = large_body_str.encode("ASCII")
-
- lf_count = large_body_str.count("\n")
-
- environ = self.environ.copy()
- environ["CONTENT_LENGTH"] = len(large_body_str)
-
- request = self.prepareRequest_lineBreakTest(large_body_bytes, environ)
-
- old_content_length = request._environ["CONTENT_LENGTH"]
-
- request.processInputs()
-
- self.assertEqual(
- old_content_length + lf_count, request._environ["CONTENT_LENGTH"]
- )
- self._assessProcessInputsResult(request)
-
- def test_processInputs_blob_only_LF(self):
- """
- processInputs should work even when the whole blob within the message
- is gibberish, or made up of entirely LFs and is extremely large.
- """
-
- buffer_size = LaunchpadBrowserRequest.buffer_size
-
- lf_blob = "\n" * buffer_size * 2
-
- lf_body_str = self.body_template_str % lf_blob
- lf_body_bytes = lf_body_str.encode("ASCII")
-
- lf_count = lf_body_str.count("\n")
-
- environ = self.environ.copy()
- environ["CONTENT_LENGTH"] = len(lf_body_str)
-
- request = self.prepareRequest_lineBreakTest(lf_body_bytes, environ)
-
- old_content_length = request._environ["CONTENT_LENGTH"]
-
- request.processInputs()
-
- self.assertEqual(
- old_content_length + lf_count, request._environ["CONTENT_LENGTH"]
- )
- self._assessProcessInputsResult(request)
-
-
class TestWebServiceRequestToBrowserRequest(WebServiceTestCase):
def test_unicode_path_info(self):
web_service_request = WebServiceTestRequest(
diff --git a/requirements/launchpad.txt b/requirements/launchpad.txt
index dfb4119..022a99e 100644
--- a/requirements/launchpad.txt
+++ b/requirements/launchpad.txt
@@ -98,7 +98,7 @@ mistune==0.8.3
monotonic==1.5
more-itertools==10.5.0
msgpack==1.0.2
-multipart==1.2.1
+multipart==0.2.4
netaddr==0.7.19
netifaces==0.11.0
oauth==1.0