← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:py3-mail-bytes into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:py3-mail-bytes into launchpad:master.

Commit message:
Start fixing up email handling for Python 3

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/391321

This is likely to be incomplete, as the email package has changed a lot in Python 3; but this still appears to work in Python 2, and it cleans up some things that impede getting the test suite working at all in Python 3.

The main effect is to start trying to explicitly treat email messages as bytes.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:py3-mail-bytes into launchpad:master.
diff --git a/lib/lp/services/mail/helpers.py b/lib/lp/services/mail/helpers.py
index 80f68d3..21ced5b 100644
--- a/lib/lp/services/mail/helpers.py
+++ b/lib/lp/services/mail/helpers.py
@@ -3,7 +3,7 @@
 
 __metaclass__ = type
 
-from cStringIO import StringIO as cStringIO
+from io import BytesIO
 import os.path
 import re
 import time
@@ -249,7 +249,7 @@ def save_mail_to_librarian(raw_mail, restricted=False):
     # be guessable for example.
     file_name = str(uuid1()) + '.txt'
     file_alias = getUtility(ILibraryFileAliasSet).create(
-        file_name, len(raw_mail), cStringIO(raw_mail), 'message/rfc822',
+        file_name, len(raw_mail), BytesIO(raw_mail), 'message/rfc822',
         restricted=restricted)
     return file_alias
 
diff --git a/lib/lp/services/mail/sendmail.py b/lib/lp/services/mail/sendmail.py
index 86d5ed1..fea6424 100644
--- a/lib/lp/services/mail/sendmail.py
+++ b/lib/lp/services/mail/sendmail.py
@@ -141,8 +141,8 @@ def format_address(name, address):
         >>> format_address(u'F\xf4\xf4 Bar', 'foo.bar@xxxxxxxxxxxxx')
         '=?utf-8?b?RsO0w7QgQmFy?= <foo.bar@xxxxxxxxxxxxx>'
 
-        >>> format_address('Foo [Baz] Bar', 'foo.bar@xxxxxxxxxxxxx')
-        '"Foo \\[Baz\\] Bar" <foo.bar@xxxxxxxxxxxxx>'
+        >>> format_address('Foo "Baz" Bar', 'foo.bar@xxxxxxxxxxxxx')
+        '"Foo \\"Baz\\" Bar" <foo.bar@xxxxxxxxxxxxx>'
 
     Really long names doesn't get folded, since we're not constructing
     an email header here.
@@ -257,7 +257,7 @@ class MailController(object):
         # previously specified, then do nothing.
         if 'Content-Transfer-Encoding' in part:
             return
-        orig_payload = part.get_payload()
+        orig_payload = part.get_payload(decode=True)
         if not exact and is_ascii_only(orig_payload):
             return
         # Payloads which are completely ascii need no encoding.
@@ -444,11 +444,14 @@ def sendmail(message, to_addrs=None, bulk=True):
     # helps security, but still exposes us to a replay attack; we consider the
     # risk low.
     del message['X-Launchpad-Hash']
-    hash = hashlib.sha1(config.mailman.shared_secret)
-    hash.update(str(message['message-id']))
+    hash = hashlib.sha1(config.mailman.shared_secret.encode('UTF-8'))
+    hash.update(six.ensure_binary(message['message-id']))
     message['X-Launchpad-Hash'] = hash.hexdigest()
 
-    raw_message = message.as_string()
+    if sys.version_info[:2] >= (3, 4):
+        raw_message = message.as_bytes()
+    else:
+        raw_message = message.as_string()
     message_detail = message['Subject']
     if not isinstance(message_detail, six.string_types):
         # Might be a Header object; can be squashed.
@@ -513,8 +516,11 @@ def raw_sendmail(from_addr, to_addrs, raw_message, message_detail):
     """
     assert not isinstance(to_addrs, six.string_types), \
         'to_addrs must be a sequence'
-    assert isinstance(raw_message, str), 'Not a plain string'
-    assert raw_message.decode('ascii'), 'Not ASCII - badly encoded message'
+    assert isinstance(raw_message, str), 'Not a native string'
+    if isinstance(raw_message, bytes):  # Python 2
+        assert raw_message.decode('ascii'), 'Not ASCII - badly encoded message'
+    else:  # Python 3
+        assert raw_message.encode('ascii'), 'Not ASCII - badly encoded message'
     mailer = getUtility(IMailDelivery, 'Mail')
     request = get_current_browser_request()
     timeline = get_request_timeline(request)
diff --git a/lib/lp/services/mail/tests/test_sendmail.py b/lib/lp/services/mail/tests/test_sendmail.py
index 8206ad7..508925a 100644
--- a/lib/lp/services/mail/tests/test_sendmail.py
+++ b/lib/lp/services/mail/tests/test_sendmail.py
@@ -75,7 +75,7 @@ class TestMailController(TestCase):
         self.assertEqual(
             'attachment', attachment['Content-Disposition'])
         self.assertEqual(
-            'content1', attachment.get_payload(decode=True))
+            b'content1', attachment.get_payload(decode=True))
         ctrl.addAttachment(
             'content2', 'text/plain', inline=True, filename='name1')
         attachment = ctrl.attachments[1]
@@ -84,7 +84,7 @@ class TestMailController(TestCase):
         self.assertEqual(
             'inline; filename="name1"', attachment['Content-Disposition'])
         self.assertEqual(
-            'content2', attachment.get_payload(decode=True))
+            b'content2', attachment.get_payload(decode=True))
         ctrl.addAttachment(
             'content2', 'text/plain', inline=True, filename='name1')
 
@@ -99,7 +99,7 @@ class TestMailController(TestCase):
         self.assertEqual('=?utf-8?b?4YSAdG9AZXhhbXBsZS5jb20=?=',
             message['To'])
         self.assertEqual('subject', message['Subject'])
-        self.assertEqual('body', message.get_payload(decode=True))
+        self.assertEqual(b'body', message.get_payload(decode=True))
 
     def test_MakeMessage_long_address(self):
         # Long email addresses are not wrapped if very long.  These are due to
@@ -124,7 +124,7 @@ class TestMailController(TestCase):
         self.assertEqual('from@xxxxxxxxxxx', message['From'])
         self.assertEqual('to@xxxxxxxxxxx', message['To'])
         self.assertEqual('subject', message['Subject'])
-        self.assertEqual('body', message.get_payload(decode=True))
+        self.assertEqual(b'body', message.get_payload(decode=True))
 
     def test_MakeMessage_unicode_body(self):
         # A message without an attachment with a unicode body gets sent as
@@ -136,7 +136,7 @@ class TestMailController(TestCase):
         # Make sure that the message can be flattened to a string as sendmail
         # does without raising a UnicodeEncodeError.
         message.as_string()
-        self.assertEqual('Bj\xc3\xb6rn', message.get_payload(decode=True))
+        self.assertEqual(b'Bj\xc3\xb6rn', message.get_payload(decode=True))
 
     def test_MakeMessage_unicode_body_with_attachment(self):
         # A message with an attachment with a unicode body gets sent as
@@ -150,7 +150,7 @@ class TestMailController(TestCase):
         # does without raising a UnicodeEncodeError.
         message.as_string()
         body, attachment = message.get_payload()
-        self.assertEqual('Bj\xc3\xb6rn', body.get_payload(decode=True))
+        self.assertEqual(b'Bj\xc3\xb6rn', body.get_payload(decode=True))
         self.assertTrue(is_ascii_only(message.as_string()))
 
     def test_MakeMessage_with_binary_attachment(self):
@@ -170,7 +170,8 @@ class TestMailController(TestCase):
         message = ctrl.makeMessage()
         body, attachment = message.get_payload()
         self.assertEqual(
-            attachment.get_payload(), attachment.get_payload(decode=True))
+            attachment.get_payload().encode('UTF-8'),
+            attachment.get_payload(decode=True))
 
     def test_MakeMessage_with_attachment(self):
         """A message with an attachment should be multipart."""
@@ -182,8 +183,8 @@ class TestMailController(TestCase):
         self.assertEqual('to@xxxxxxxxxxx', message['To'])
         self.assertEqual('subject', message['Subject'])
         body, attachment = message.get_payload()
-        self.assertEqual('body', body.get_payload(decode=True))
-        self.assertEqual('attach', attachment.get_payload(decode=True))
+        self.assertEqual(b'body', body.get_payload(decode=True))
+        self.assertEqual(b'attach', attachment.get_payload(decode=True))
         self.assertEqual(
             'application/octet-stream', attachment['Content-Type'])
         self.assertEqual('attachment', attachment['Content-Disposition'])
@@ -196,7 +197,7 @@ class TestMailController(TestCase):
             'attach', 'text/plain', inline=True, filename='README')
         message = ctrl.makeMessage()
         attachment = message.get_payload()[1]
-        self.assertEqual('attach', attachment.get_payload(decode=True))
+        self.assertEqual(b'attach', attachment.get_payload(decode=True))
         self.assertEqual(
             'text/plain', attachment['Content-Type'])
         self.assertEqual(
@@ -208,12 +209,13 @@ class TestMailController(TestCase):
         part = Message()
         part.set_payload(text)
         MailController.encodeOptimally(part, exact=False)
-        self.assertEqual(part.get_payload(), part.get_payload(decode=True))
+        self.assertEqual(
+            part.get_payload().encode('UTF-8'), part.get_payload(decode=True))
         self.assertIs(None, part['Content-Transfer-Encoding'])
 
     def test_encodeOptimally_with_7_bit_binary(self):
         """Mostly-ascii attachments should be encoded as quoted-printable."""
-        text = 'I went to the cafe today.\n\r'
+        text = b'I went to the cafe today.\n\r'
         part = Message()
         part.set_payload(text)
         MailController.encodeOptimally(part)
@@ -235,7 +237,7 @@ class TestMailController(TestCase):
 
     def test_encodeOptimally_with_binary(self):
         """Significantly non-ascii attachments should be base64-encoded."""
-        bytes = '\x00\xff\x44\x55\xaa\x99'
+        bytes = b'\x00\xff\x44\x55\xaa\x99'
         part = Message()
         part.set_payload(bytes)
         MailController.encodeOptimally(part)
diff --git a/lib/lp/services/messages/model/message.py b/lib/lp/services/messages/model/message.py
index 98c1f2a..94f91da 100644
--- a/lib/lp/services/messages/model/message.py
+++ b/lib/lp/services/messages/model/message.py
@@ -10,7 +10,6 @@ __all__ = [
     'UserToUserEmail',
     ]
 
-from cStringIO import StringIO as cStringIO
 from datetime import datetime
 import email
 from email.header import (
@@ -23,6 +22,7 @@ from email.utils import (
     parseaddr,
     parsedate_tz,
     )
+from io import BytesIO
 import logging
 from operator import attrgetter
 import os.path
@@ -460,7 +460,7 @@ class MessageSet:
                 if len(content) > 0:
                     blob = getUtility(ILibraryFileAliasSet).create(
                         name=filename, size=len(content),
-                        file=cStringIO(content), contentType=content_type,
+                        file=BytesIO(content), contentType=content_type,
                         restricted=restricted)
                     MessageChunk(message=message, sequence=sequence, blob=blob)
                     sequence += 1