launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #32262
Re: [Merge] ~ilkeremrekoc/launchpad:accept-crlf-bugreports into launchpad:master
Replied
Diff comments:
> diff --git a/lib/lp/bugs/utilities/filebugdataparser.py b/lib/lp/bugs/utilities/filebugdataparser.py
> index 4806dcb..6b37f14 100644
> --- a/lib/lp/bugs/utilities/filebugdataparser.py
> +++ b/lib/lp/bugs/utilities/filebugdataparser.py
> @@ -32,6 +32,105 @@ class FileBugDataParser:
> self.attachments = []
> self.BUFFER_SIZE = 8192
>
> + # XXX: ilkeremrekoc 2025-02-26: The whole point of trying to find the "LF"
> + # or "CRLF" is because the Apport package (which is used to send
> + # bug-reports to Launchpad) uses LF line-breaks since its inception.
> + # This is against the RFC standards, which standardized the "CRLF" as the
> + # line-break for all HTTP requests. And because the main parser for zope
> + # (which is the "multipart" package) started enforcing this standard
> + # strictly in its newer versions, and considering we must upgrade our
> + # dependencies, we had to make sure FileBugDataParser accepts the "CRLF"
> + # as well. But we cannot accept only "CRLF" for now because the apport
> + # package for every older version of Ubuntu would still use the "LF"
> + # making it impossible to send bug-reports from older LTS versions.
> + # So, until a sufficient number or all the apport packages in future,
> + # current and past Ubuntu versions are patched to only send "CRLF" we
> + # must use this method to ensure both "LF" and "CRLF" is accepted.
> + # Once the apport package patches are done, we can revert this change,
> + # get rid of this method and its tests and simply change
> + # the "LF" expectance in the previous version into "CRLF" expectance.
> + def _findLineBreakType(self) -> bytes:
> + """Find the line-break/end_string the message is using and return it.
> +
> + Assumptions:
> + - The method would be run at the start of any parsing run as this
> + method doesn't open the blob file but resets the blob-stream's
> + cursor.
> +
> + - The line-break can only be LF ("\n") or CRLF ("\r\n") with
> + no CR ("\r") functionality.
> +
> + - The message must have at least one line-break character when
> + parsing. An empty message or one without any line-breaks doesn't
> + count (which should be impossible anyway but I digress).
> +
> + - The whole message must be made up of a single line-break type.
> + If more than one type is present, something will break after this
> + method.
> +
> + Reads through the message until it finds an LF character ("\n") before
> + closing and reopening the file-alias/blob to reset its cursor (as the
> + file-alias/blob doesn't have a "seek" functionality). Then returns
> + whichever line-break type is used in the message.
> +
> + ...
> + :raises AssertionError: If the method cannot find any LF linebreak on
> + the message stream, it raises.
> + ...
> + :return: Either byte typed CRLF (b"\r\n") or byte typed LF (b"\n")
> + :rtype: bytes
> + """
> +
> + # The LF line break is assumed to be a part of the message as it is
> + # a part of both LF and CRLF.
> + lf_linebreak = b"\n"
> +
> + # A temporary buffer we don't need to save outside the scope as it is
> + # only for finding the first line-break.
> + temp_buffer = b""
> +
> + while lf_linebreak not in temp_buffer:
> + data = self.blob_file.read(self.BUFFER_SIZE)
> +
> + # Append the data to the temp_buffer. This is to ensure any
> + # CR ("\r") that is part of a CRLF isn't deleted in a preceding
> + # buffer accidentally.
> + temp_buffer += data
> +
> + if len(data) < self.BUFFER_SIZE:
> + # End of file.
> +
> + if lf_linebreak not in temp_buffer:
> + # If the linebreak isn't present, then the message must
> + # be broken since the method must read from the start
> +
> + if (
> + b"\r" in temp_buffer
> + ): # If the linebreak inside the whole message is only CR
> + raise AssertionError(
> + "The wrong linebreak is used. CR isn't accepted."
> + )
> +
> + raise AssertionError(
> + "There are no linebreaks in the blob."
> + )
> + break
> +
> + # This part is for ".seek(0)" functionality that LibraryFileAlias
> + # lack, requiring the calls to close() -> open() back to back
> + # to reset the stream's read cursor to the start of the file.
> + self.blob_file.close()
> + self.blob_file.open()
> +
> + lf_index = temp_buffer.index(lf_linebreak)
> +
> + # A slice is needed even if for a single character as bytes type acts
> + # differently in slices and in singe character accesses.
Yup, good practice anyway to re-order your commits logically at the end, rather than in the order you made them.
> + if temp_buffer[lf_index - 1 : lf_index] == b"\r":
> + return b"\r\n"
> +
> + return b"\n"
> +
> def _consumeBytes(self, end_string):
> """Read bytes from the message up to the end_string.
>
> diff --git a/lib/lp/bugs/utilities/tests/test_filebugdataparser.py b/lib/lp/bugs/utilities/tests/test_filebugdataparser.py
> index 28d4be9..84b8e0b 100644
> --- a/lib/lp/bugs/utilities/tests/test_filebugdataparser.py
> +++ b/lib/lp/bugs/utilities/tests/test_filebugdataparser.py
> @@ -44,9 +67,65 @@ class TestFileBugDataParser(TestCase):
> self.assertEqual(b"", parser._consumeBytes(b"0"))
> self.assertEqual(b"", parser._consumeBytes(b"0"))
>
> + def test__findLineBreakType(self):
> + # _findLineBreakType reads the whole message until either an LF or
> + # a CRLF is found, returns that type in bytes string format and exits.
> +
> + msg = dedent(
> + """\
> + Header: value
> + Space-Folded-Header: this header
> + is folded with a space.
> + Tab-Folded-Header: this header
> + \tis folded with a tab.
> + Another-header: another-value
> +
> + Not-A-Header: not-a-value
> + """
> + )
> +
> + # Test the above message with LF endings
> + msg_with_lf = msg.encode("ASCII")
> + lf_parser = FileBugDataParser(MockBytesIO(msg_with_lf))
> + lf_linebreak = lf_parser._findLineBreakType()
> +
> + self.assertEqual(lf_linebreak, b"\n")
ack
> +
> + # Test the above message after replacing LF with CRLF
> + msg_with_crlf = msg.replace("\n", "\r\n").encode("ASCII")
> + crlf_parser = FileBugDataParser(MockBytesIO(msg_with_crlf))
> + crlf_linebreak = crlf_parser._findLineBreakType()
> +
> + self.assertEqual(crlf_linebreak, b"\r\n")
> +
> + # Test the above message after deleting the line-breaks
> + # Which should break as there must be at least some line-breaks
> + # in the message
We should also think about future users though. I don't think it's technically a problem to have no line breaks at all right? Why explicitly prevent this when it's not a necessary condition?
> + msg_without_linebreak = msg.replace("\n", "").encode("ASCII")
> + without_linebreak_parser = FileBugDataParser(
> + MockBytesIO(msg_without_linebreak)
> + )
> +
> + self.assertRaisesWithContent(
> + AssertionError,
> + "There are no linebreaks in the blob.",
> + without_linebreak_parser._findLineBreakType,
> + )
> +
> + # Test the above message after replacing LF with CR
> + # Which should break as we do not accept CR line-breaks
> + msg_with_cr = msg.replace("\n", "\r").encode("ASCII")
> + cr_parser = FileBugDataParser(MockBytesIO(msg_with_cr))
> +
> + self.assertRaisesWithContent(
> + AssertionError,
> + "The wrong linebreak is used. CR isn't accepted.",
> + cr_parser._findLineBreakType,
> + )
> +
> def test_readLine(self):
> # readLine reads a single line of the file.
> - parser = FileBugDataParser(io.BytesIO(b"123\n456\n789"))
> + parser = FileBugDataParser(MockBytesIO(b"123\n456\n789"))
> self.assertEqual(b"123\n", parser.readLine())
> self.assertEqual(b"456\n", parser.readLine())
> self.assertEqual(b"789", parser.readLine())
--
https://code.launchpad.net/~ilkeremrekoc/launchpad/+git/launchpad/+merge/481785
Your team Launchpad code reviewers is requested to review the proposed merge of ~ilkeremrekoc/launchpad:accept-crlf-bugreports into launchpad:master.
References