← Back to team overview

cloud-init-dev team mailing list archive

[Merge] ~chad.smith/cloud-init:fix/1768600-utf8-in-user-data into cloud-init:master

 

Chad Smith has proposed merging ~chad.smith/cloud-init:fix/1768600-utf8-in-user-data into cloud-init:master.

Commit message:
Be more safe on string/bytes when writing multipart user-data to disk.

When creating the multipart mime message that is written as
user-data.txt.i, cloud-init losing data on conversion to some things
as a string.

LP: #1768600

Requested reviews:
  cloud-init commiters (cloud-init-dev)
Related bugs:
  Bug #1768600 in cloud-init: "UTF-8 support in User Data (text/x-shellscript) is broken"
  https://bugs.launchpad.net/cloud-init/+bug/1768600

For more details, see:
https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/347782
-- 
Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:fix/1768600-utf8-in-user-data into cloud-init:master.
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py
index 8f6aba1..ed83d2d 100644
--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@@ -337,8 +337,10 @@ def is_skippable(part):
 
 # Coverts a raw string into a mime message
 def convert_string(raw_data, content_type=NOT_MULTIPART_TYPE):
+    """convert a string (more likely bytes) or a message into
+    a mime message."""
     if not raw_data:
-        raw_data = ''
+        raw_data = b''
 
     def create_binmsg(data, content_type):
         maintype, subtype = content_type.split("/", 1)
@@ -346,15 +348,17 @@ def convert_string(raw_data, content_type=NOT_MULTIPART_TYPE):
         msg.set_payload(data)
         return msg
 
-    try:
-        data = util.decode_binary(util.decomp_gzip(raw_data))
-        if "mime-version:" in data[0:4096].lower():
-            msg = util.message_from_string(data)
-        else:
-            msg = create_binmsg(data, content_type)
-    except UnicodeDecodeError:
-        msg = create_binmsg(raw_data, content_type)
+    if isinstance(raw_data, six.text_type):
+        bdata = raw_data.encode('utf-8')
+    else:
+        bdata = raw_data
+    bdata = util.decomp_gzip(bdata, decode=False)
+    if b"mime-version:" in bdata[0:4096].lower():
+        msg = util.message_from_string(bdata.decode('utf-8'))
+    else:
+        msg = create_binmsg(bdata, content_type)
 
     return msg
 
+
 # vi: ts=4 expandtab
diff --git a/tests/unittests/test_data.py b/tests/unittests/test_data.py
index 91d35cb..bee31c1 100644
--- a/tests/unittests/test_data.py
+++ b/tests/unittests/test_data.py
@@ -606,8 +606,10 @@ class TestUDProcess(helpers.ResourceUsingTestCase):
 
 
 class TestConvertString(helpers.TestCase):
+
     def test_handles_binary_non_utf8_decodable(self):
-        blob = b'\x32\x99'
+        """Printable unicode Ä (not utf8-decodable) is safely converted."""
+        blob = b'#!/bin/bash\necho \xc3\x84\n'
         msg = ud.convert_string(blob)
         self.assertEqual(blob, msg.get_payload(decode=True))
 
@@ -621,6 +623,14 @@ class TestConvertString(helpers.TestCase):
         msg = ud.convert_string(text)
         self.assertEqual(text, msg.get_payload(decode=False))
 
+    def test_handle_mime_parts(self):
+        """Mime parts are properly returned as a mime message."""
+        message = MIMEBase("text", "plain")
+        message.set_payload("Just text")
+        msg = ud.convert_string(str(message))
+        self.assertEqual("Just text", msg.get_payload(decode=False))
+
+
 
 class TestFetchBaseConfig(helpers.TestCase):
     def test_only_builtin_gets_builtin(self):

Follow ups