← Back to team overview

cloud-init-dev team mailing list archive

[Merge] ~chad.smith/cloud-init:cleanup/schema-annotate-invalid-yaml into cloud-init:master

 

Chad Smith has proposed merging ~chad.smith/cloud-init:cleanup/schema-annotate-invalid-yaml into cloud-init:master.

Commit message:
yaml_load/schema: On invalid yaml, add errored line and column nums

Yaml tracebacks are generally hard to read for average users. Add a bit of
logic to util.yaml_load and schema validation to look for
YAMLError.context_marker or problem_marker line and column counts.

No longer log the full expception traceback from the yaml_load error,
instead just LOG.warning for the specific error and point to the offending
line and column where the problem exists.


Requested reviews:
  cloud-init commiters (cloud-init-dev)

For more details, see:
https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/346415
-- 
Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:cleanup/schema-annotate-invalid-yaml into cloud-init:master.
diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py
index 3bad8e2..f7501ec 100644
--- a/cloudinit/config/schema.py
+++ b/cloudinit/config/schema.py
@@ -93,20 +93,33 @@ def validate_cloudconfig_schema(config, schema, strict=False):
 def annotated_cloudconfig_file(cloudconfig, original_content, schema_errors):
     """Return contents of the cloud-config file annotated with schema errors.
 
-    @param cloudconfig: YAML-loaded object from the original_content.
+    @param cloudconfig: YAML-loaded dict from the original_content or empty
+        dict if unparseable.
     @param original_content: The contents of a cloud-config file
     @param schema_errors: List of tuples from a JSONSchemaValidationError. The
         tuples consist of (schemapath, error_message).
     """
     if not schema_errors:
         return original_content
-    schemapaths = _schemapath_for_cloudconfig(cloudconfig, original_content)
+    schemapaths = {}
+    if cloudconfig:
+        schemapaths = _schemapath_for_cloudconfig(
+            cloudconfig, original_content)
     errors_by_line = defaultdict(list)
     error_count = 1
     error_footer = []
     annotated_content = []
     for path, msg in schema_errors:
-        errors_by_line[schemapaths[path]].append(msg)
+        match = re.match(r'format-l(?P<line>\d+)\.c(?P<col>\d+).*', path)
+        if match:
+            line, col = match.groups()
+            errors_by_line[int(line)].append(msg)
+        else:
+            col = None
+            errors_by_line[schemapaths[path]].append(msg)
+        if col is not None:
+            msg = 'Line {line} column {col}: {msg}'.format(
+                line=line, col=col, msg=msg)
         error_footer.append('# E{0}: {1}'.format(error_count, msg))
         error_count += 1
     lines = original_content.decode().split('\n')
@@ -142,18 +155,31 @@ def validate_cloudconfig_file(config_path, schema, annotate=False):
     content = load_file(config_path, decode=False)
     if not content.startswith(CLOUD_CONFIG_HEADER):
         errors = (
-            ('header', 'File {0} needs to begin with "{1}"'.format(
+            ('format-l1.c1', 'File {0} needs to begin with "{1}"'.format(
                 config_path, CLOUD_CONFIG_HEADER.decode())),)
-        raise SchemaValidationError(errors)
-
+        error = SchemaValidationError(errors)
+        if annotate:
+            print(annotated_cloudconfig_file({}, content, error.schema_errors))
+        raise error
     try:
         cloudconfig = yaml.safe_load(content)
-    except yaml.parser.ParserError as e:
-        errors = (
-            ('format', 'File {0} is not valid yaml. {1}'.format(
-                config_path, str(e))),)
-        raise SchemaValidationError(errors)
-
+    except (yaml.YAMLError) as e:
+        line = column = 1
+        mark = None
+        if hasattr(e, 'context_mark') and getattr(e, 'context_mark'):
+            mark = getattr(e, 'context_mark')
+        elif hasattr(e, 'problem_mark') and getattr(e, 'problem_mark'):
+            mark = getattr(e, 'problem_mark')
+        if mark:
+            line = mark.line + 1
+            column = mark.column + 1
+        errors = (('format-l{line}.c{col}'.format(line=line, col=column),
+                   'File {0} is not valid yaml. {1}'.format(
+                       config_path, str(e))),)
+        error = SchemaValidationError(errors)
+        if annotate:
+            print(annotated_cloudconfig_file({}, content, error.schema_errors))
+        raise error
     try:
         validate_cloudconfig_schema(
             cloudconfig, schema, strict=True)
@@ -176,7 +202,7 @@ def _schemapath_for_cloudconfig(config, original_content):
     list_index = 0
     RE_YAML_INDENT = r'^(\s*)'
     scopes = []
-    for line_number, line in enumerate(content_lines):
+    for line_number, line in enumerate(content_lines, 1):
         indent_depth = len(re.match(RE_YAML_INDENT, line).groups()[0])
         line = line.strip()
         if not line or line.startswith('#'):
@@ -208,8 +234,8 @@ def _schemapath_for_cloudconfig(config, original_content):
                 scopes.append((indent_depth + 2, key + '.0'))
                 for inner_list_index in range(0, len(yaml.safe_load(value))):
                     list_key = key + '.' + str(inner_list_index)
-                    schema_line_numbers[list_key] = line_number + 1
-        schema_line_numbers[key] = line_number + 1
+                    schema_line_numbers[list_key] = line_number
+        schema_line_numbers[key] = line_number
     return schema_line_numbers
 
 
diff --git a/cloudinit/util.py b/cloudinit/util.py
index edfedc7..97cd794 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -874,8 +874,20 @@ def load_yaml(blob, default=None, allowed=(dict,)):
                              " but got %s instead") %
                             (allowed, type_utils.obj_name(converted)))
         loaded = converted
-    except (yaml.YAMLError, TypeError, ValueError):
-        logexc(LOG, "Failed loading yaml blob")
+    except (yaml.YAMLError, TypeError, ValueError) as e:
+        msg = 'Failed loading yaml blob'
+        mark = None
+        if hasattr(e, 'context_mark') and getattr(e, 'context_mark'):
+            mark = getattr(e, 'context_mark')
+        elif hasattr(e, 'problem_mark') and getattr(e, 'problem_mark'):
+            mark = getattr(e, 'problem_mark')
+        if mark:
+            msg += (
+                '. Invalid format at line {line} column {col}: "{err}"'.format(
+                    line=mark.line + 1, col=mark.column + 1, err=e))
+        else:
+            msg += '. {err}'.format(err=e)
+        LOG.warning(msg)
     return loaded
 
 
diff --git a/tests/unittests/test_handler/test_schema.py b/tests/unittests/test_handler/test_schema.py
index ac41f12..7a685b1 100644
--- a/tests/unittests/test_handler/test_schema.py
+++ b/tests/unittests/test_handler/test_schema.py
@@ -134,17 +134,30 @@ class ValidateCloudConfigFileTest(CiTestCase):
         with self.assertRaises(SchemaValidationError) as context_mgr:
             validate_cloudconfig_file(self.config_file, {})
         self.assertEqual(
-            'Cloud config schema errors: header: File {0} needs to begin with '
-            '"{1}"'.format(self.config_file, CLOUD_CONFIG_HEADER.decode()),
+            'Cloud config schema errors: format-l1.c1: File {0} needs to begin'
+            ' with "{1}"'.format(
+                self.config_file, CLOUD_CONFIG_HEADER.decode()),
             str(context_mgr.exception))
 
-    def test_validateconfig_file_error_on_non_yaml_format(self):
-        """On non-yaml format, validate_cloudconfig_file errors."""
+    def test_validateconfig_file_error_on_non_yaml_scanner_error(self):
+        """On non-yaml scan issues, validate_cloudconfig_file errors."""
+        # Generate a scanner error by providing text on a single line with
+        # improper indent.
+        write_file(self.config_file, '#cloud-config\nasdf:\nasdf')
+        with self.assertRaises(SchemaValidationError) as context_mgr:
+            validate_cloudconfig_file(self.config_file, {})
+        self.assertIn(
+            'schema errors: format-l3.c1: File {0} is not valid yaml.'.format(
+                self.config_file),
+            str(context_mgr.exception))
+
+    def test_validateconfig_file_error_on_non_yaml_parser_error(self):
+        """On non-yaml parser issues, validate_cloudconfig_file errors."""
         write_file(self.config_file, '#cloud-config\n{}}')
         with self.assertRaises(SchemaValidationError) as context_mgr:
             validate_cloudconfig_file(self.config_file, {})
         self.assertIn(
-            'schema errors: format: File {0} is not valid yaml.'.format(
+            'schema errors: format-l2.c3: File {0} is not valid yaml.'.format(
                 self.config_file),
             str(context_mgr.exception))
 
diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
index 84941c7..417f3e6 100644
--- a/tests/unittests/test_util.py
+++ b/tests/unittests/test_util.py
@@ -265,26 +265,47 @@ class TestGetCmdline(helpers.TestCase):
         self.assertEqual("abcd 123", ret)
 
 
-class TestLoadYaml(helpers.TestCase):
+class TestLoadYaml(helpers.CiTestCase):
     mydefault = "7b03a8ebace993d806255121073fed52"
+    with_logs = True
 
     def test_simple(self):
         mydata = {'1': "one", '2': "two"}
         self.assertEqual(util.load_yaml(yaml.dump(mydata)), mydata)
 
     def test_nonallowed_returns_default(self):
+        '''Any unallowed types result in returning default; log the issue.'''
         # for now, anything not in the allowed list just returns the default.
         myyaml = yaml.dump({'1': "one"})
         self.assertEqual(util.load_yaml(blob=myyaml,
                                         default=self.mydefault,
                                         allowed=(str,)),
                          self.mydefault)
+        self.assertIn(
+            "Yaml load allows (<type 'str'>,) root types, but got dict",
+            self.logs.getvalue())
 
-    def test_bogus_returns_default(self):
+    def test_bogus_scan_error_returns_default(self):
+        '''On Yaml scan error, load_yaml returns the default and logs issue.'''
         badyaml = "1\n 2:"
         self.assertEqual(util.load_yaml(blob=badyaml,
                                         default=self.mydefault),
                          self.mydefault)
+        self.assertIn(
+            'Failed loading yaml blob. Invalid format at line 2 column 3:'
+            ' "mapping values are not allowed here',
+            self.logs.getvalue())
+
+    def test_bogus_parse_error_returns_default(self):
+        '''On Yaml parse error, load_yaml returns default and logs issue.'''
+        badyaml = "{}}"
+        self.assertEqual(util.load_yaml(blob=badyaml,
+                                        default=self.mydefault),
+                         self.mydefault)
+        self.assertIn(
+            'Failed loading yaml blob. Invalid format at line 1 column 3:'
+            " \"expected \'<document start>\', but found \'}\'",
+            self.logs.getvalue())
 
     def test_unsafe_types(self):
         # should not load complex types

Follow ups