← Back to team overview

cloud-init-dev team mailing list archive

[Merge] ~chad.smith/cloud-init:bug/180134-openstack-random-seed-encoding into cloud-init:master

 

Chad Smith has proposed merging ~chad.smith/cloud-init:bug/180134-openstack-random-seed-encoding into cloud-init:master.

Commit message:
util: json.dumps on python 2.7 will handle unicodedecodeerror on binary

Since python 2.7 doesn't handle UnicodeDecodeErrors with the default
handler

LP: #1801364


Requested reviews:
  Server Team CI bot (server-team-bot): continuous-integration
  cloud-init Commiters (cloud-init-dev)
Related bugs:
  Bug #1801364 in cloud-init: "persisting OpenStack metadata fails"
  https://bugs.launchpad.net/cloud-init/+bug/1801364

For more details, see:
https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/373291
-- 
Your team cloud-init Commiters is requested to review the proposed merge of ~chad.smith/cloud-init:bug/180134-openstack-random-seed-encoding into cloud-init:master.
diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py
index 6378e98..d93210a 100644
--- a/cloudinit/sources/tests/test_init.py
+++ b/cloudinit/sources/tests/test_init.py
@@ -456,21 +456,6 @@ class TestDataSource(CiTestCase):
             {'key1': 'val1', 'key2': {'key2.1': '\x123'}},
             instance_json['ds']['meta_data'])
 
-    @skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8")
-    def test_non_utf8_encoding_logs_warning(self):
-        """When non-utf-8 values exist in py2 instance-data is not written."""
-        tmp = self.tmp_dir()
-        datasource = DataSourceTestSubclassNet(
-            self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
-            custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}})
-        self.assertTrue(datasource.get_data())
-        json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
-        self.assertFalse(os.path.exists(json_file))
-        self.assertIn(
-            "WARNING: Error persisting instance-data.json: 'utf8' codec can't"
-            " decode byte 0xaa in position 2: invalid start byte",
-            self.logs.getvalue())
-
     def test_get_hostname_subclass_support(self):
         """Validate get_hostname signature on all subclasses of DataSource."""
         # Use inspect.getfullargspec when we drop py2.6 and py2.7
diff --git a/cloudinit/tests/test_util.py b/cloudinit/tests/test_util.py
index e3d2dba..7302f4a 100644
--- a/cloudinit/tests/test_util.py
+++ b/cloudinit/tests/test_util.py
@@ -2,7 +2,9 @@
 
 """Tests for cloudinit.util"""
 
+import base64
 import logging
+import json
 import platform
 
 import cloudinit.util as util
@@ -528,6 +530,25 @@ class TestGetLinuxDistro(CiTestCase):
         self.assertEqual(('foo', '1.1', 'aarch64'), dist)
 
 
+class TestJsonDumps(CiTestCase):
+    def test_is_str(self):
+        """json_dumps should return a string."""
+        self.assertTrue(isinstance(util.json_dumps({'abc': '123'}), str))
+
+    def test_utf8(self):
+        smiley = '\\ud83d\\ude03'
+        self.assertEqual(
+            {'smiley': smiley},
+            json.loads(util.json_dumps({'smiley': smiley})))
+
+    def test_non_utf8(self):
+        blob = b'\xba\x03Qx-#y\xea'
+        self.assertEqual(
+            {'blob': 'ci-b64:' + base64.b64encode(blob).decode('utf-8')},
+            json.loads(util.json_dumps({'blob': blob})))
+
+
+
 @mock.patch('os.path.exists')
 class TestIsLXD(CiTestCase):
 
diff --git a/cloudinit/util.py b/cloudinit/util.py
index aa23b3f..49433dc 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -1599,10 +1599,36 @@ def json_serialize_default(_obj):
         return 'Warning: redacted unserializable type {0}'.format(type(_obj))
 
 
+def json_preserialize_binary(data):
+    """Preserialize any discovered binary values to avoid json.dumps issues.
+
+    Used only on python 2.7 where default type handling is not honored for
+    failure to encode binary data. LP: #1801364.
+    TODO(Drop this function when py2.7 support is dropped from cloud-init)
+    """
+    data = obj_copy.deepcopy(data)
+    for key, value in data.items():
+        if isinstance(value, (dict)):
+            data[key] = json_preserialize_binary(value)
+        if isinstance(value, bytes):
+            data[key] = 'ci-b64:{0}'.format(b64e(value))
+    return data
+
+
 def json_dumps(data):
     """Return data in nicely formatted json."""
-    return json.dumps(data, indent=1, sort_keys=True,
-                      separators=(',', ': '), default=json_serialize_default)
+    try:
+        return json.dumps(
+            data, indent=1, sort_keys=True, separators=(',', ': '),
+            default=json_serialize_default)
+    except UnicodeDecodeError as e:
+        from cloudinit.sources import process_instance_metadata
+        if sys.version_info[:2] == (2, 7):
+            data = json_preserialize_binary(data)
+            data = process_instance_metadata(data)
+            return json.dumps(
+                data, indent=1, sort_keys=True, separators=(',', ': '),
+                default=json_serialize_default)
 
 
 def yaml_dumps(obj, explicit_start=True, explicit_end=True, noalias=False):