launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #26488
[Merge] ~cjwatson/launchpad:py3-escape-nonascii-uniquely into launchpad:master
Colin Watson has proposed merging ~cjwatson/launchpad:py3-escape-nonascii-uniquely into launchpad:master.
Commit message:
Make escape_nonascii_uniquely require bytes
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/398918
This is only used by MailingListAPIView.holdMessage, which operates on bytes.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:py3-escape-nonascii-uniquely into launchpad:master.
diff --git a/lib/lp/registry/xmlrpc/mailinglist.py b/lib/lp/registry/xmlrpc/mailinglist.py
index 6467dd5..d22dfc2 100644
--- a/lib/lp/registry/xmlrpc/mailinglist.py
+++ b/lib/lp/registry/xmlrpc/mailinglist.py
@@ -239,8 +239,8 @@ class MailingListAPIView(LaunchpadXMLRPCView):
def holdMessage(self, team_name, bytes):
"""See `IMailingListAPIView`."""
- # For testing purposes, accept both strings and Binary instances. In
- # production, bytes will always be a Binary so that unencoded
+ # For testing purposes, accept both byte strings and Binary instances.
+ # In production, bytes will always be a Binary so that unencoded
# non-ascii characters in the message can be safely passed across
# XMLRPC. For most tests though it's much more convenient to just
# pass 8-bit strings.
@@ -250,7 +250,7 @@ class MailingListAPIView(LaunchpadXMLRPCView):
# Although it is illegal for an email header to have unencoded
# non-ascii characters, it is better to let the list owner
# process the message than to cause an oops.
- header_body_separator = re.compile('\r\n\r\n|\r\r|\n\n')
+ header_body_separator = re.compile(br'\r\n\r\n|\r\r|\n\n')
match = header_body_separator.search(bytes)
header = bytes[:match.start()]
header = escape_nonascii_uniquely(header)
diff --git a/lib/lp/services/encoding.py b/lib/lp/services/encoding.py
index 29a0d3f..3c1728b 100644
--- a/lib/lp/services/encoding.py
+++ b/lib/lp/services/encoding.py
@@ -171,25 +171,27 @@ def escape_nonascii_uniquely(bogus_string):
all the nonascii characters have been replaced with the same ascii
character.
- >>> print(len('\xa9'), len('\\xa9'))
+ >>> print(len(b'\xa9'), len(b'\\xa9'))
1 4
- >>> print(escape_nonascii_uniquely('hello \xa9'))
+ >>> print(six.ensure_str(escape_nonascii_uniquely(b'hello \xa9')))
hello \xa9
This string only has ascii characters, so escape_nonascii_uniquely()
actually has no effect.
- >>> print(escape_nonascii_uniquely('hello \\xa9'))
+ >>> print(six.ensure_str(escape_nonascii_uniquely(b'hello \\xa9')))
hello \xa9
+
+ :type bogus_string: bytes
"""
- nonascii_regex = re.compile(r'[\200-\377]')
+ nonascii_regex = re.compile(br'[\200-\377]')
# By encoding the invalid ascii with a backslash, x, and then the
# hex value, it makes it easy to decode it by pasting into a python
# interpreter. quopri() is not used, since that could caused the
# decoding of an email to fail.
def quote(match):
- return '\\x%x' % ord(match.group(0))
+ return b'\\x%x' % ord(match.group(0))
return nonascii_regex.sub(quote, bogus_string)