cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #04992
[Merge] ~chad.smith/cloud-init:fix/1768600-utf8-in-user-data into cloud-init:master
Chad Smith has proposed merging ~chad.smith/cloud-init:fix/1768600-utf8-in-user-data into cloud-init:master.
Commit message:
Be more safe on string/bytes when writing multipart user-data to disk.
When creating the multipart mime message that is written as
user-data.txt.i, cloud-init losing data on conversion to some things
as a string.
LP: #1768600
Requested reviews:
cloud-init commiters (cloud-init-dev)
Related bugs:
Bug #1768600 in cloud-init: "UTF-8 support in User Data (text/x-shellscript) is broken"
https://bugs.launchpad.net/cloud-init/+bug/1768600
For more details, see:
https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/347782
--
Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:fix/1768600-utf8-in-user-data into cloud-init:master.
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py
index 8f6aba1..ed83d2d 100644
--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@@ -337,8 +337,10 @@ def is_skippable(part):
# Coverts a raw string into a mime message
def convert_string(raw_data, content_type=NOT_MULTIPART_TYPE):
+ """convert a string (more likely bytes) or a message into
+ a mime message."""
if not raw_data:
- raw_data = ''
+ raw_data = b''
def create_binmsg(data, content_type):
maintype, subtype = content_type.split("/", 1)
@@ -346,15 +348,17 @@ def convert_string(raw_data, content_type=NOT_MULTIPART_TYPE):
msg.set_payload(data)
return msg
- try:
- data = util.decode_binary(util.decomp_gzip(raw_data))
- if "mime-version:" in data[0:4096].lower():
- msg = util.message_from_string(data)
- else:
- msg = create_binmsg(data, content_type)
- except UnicodeDecodeError:
- msg = create_binmsg(raw_data, content_type)
+ if isinstance(raw_data, six.text_type):
+ bdata = raw_data.encode('utf-8')
+ else:
+ bdata = raw_data
+ bdata = util.decomp_gzip(bdata, decode=False)
+ if b"mime-version:" in bdata[0:4096].lower():
+ msg = util.message_from_string(bdata.decode('utf-8'))
+ else:
+ msg = create_binmsg(bdata, content_type)
return msg
+
# vi: ts=4 expandtab
diff --git a/tests/unittests/test_data.py b/tests/unittests/test_data.py
index 91d35cb..bee31c1 100644
--- a/tests/unittests/test_data.py
+++ b/tests/unittests/test_data.py
@@ -606,8 +606,10 @@ class TestUDProcess(helpers.ResourceUsingTestCase):
class TestConvertString(helpers.TestCase):
+
def test_handles_binary_non_utf8_decodable(self):
- blob = b'\x32\x99'
+ """Printable unicode Ä (not utf8-decodable) is safely converted."""
+ blob = b'#!/bin/bash\necho \xc3\x84\n'
msg = ud.convert_string(blob)
self.assertEqual(blob, msg.get_payload(decode=True))
@@ -621,6 +623,14 @@ class TestConvertString(helpers.TestCase):
msg = ud.convert_string(text)
self.assertEqual(text, msg.get_payload(decode=False))
+ def test_handle_mime_parts(self):
+ """Mime parts are properly returned as a mime message."""
+ message = MIMEBase("text", "plain")
+ message.set_payload("Just text")
+ msg = ud.convert_string(str(message))
+ self.assertEqual("Just text", msg.get_payload(decode=False))
+
+
class TestFetchBaseConfig(helpers.TestCase):
def test_only_builtin_gets_builtin(self):
Follow ups