cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #02324
[Merge] ~chad.smith/cloud-init:cc-ntp-schema-validation into cloud-init:master
Chad Smith has proposed merging ~chad.smith/cloud-init:cc-ntp-schema-validation into cloud-init:master.
Commit message:
cc_ntp: Add schema definition and passive schema validation.
cloud-config files are very flexible and permissive. This branch adds a supported schema definition to cc_ntp so that we can properly inform users of potential misconfiguration in their cloud-config yaml.
This first branch adds a jsonsschema definition to the cc_ntp module and validation functions in cloudinit/config/schema which will log warnings about invalid configuration values in the ntp section.
A cmdline tools/cloudconfig-schema is added which can be used in our dev environments to quickly attempt to exercise the ntp schema. Eventually this cloudconfig-schema tool will evolve into a cmdline tool we can provide to users or a validation service which will allow folks to test their cloud-config files without having to attempt deployment to find errors or inconsistencies.
LP: #1692916
Requested reviews:
Server Team CI bot (server-team-bot): continuous-integration
cloud-init commiters (cloud-init-dev)
Related bugs:
Bug #1692916 in cloud-init: "Cloudinit modules should provide schema validation to better alert consumers to unsupported config "
https://bugs.launchpad.net/cloud-init/+bug/1692916
For more details, see:
https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/324640
cc_ntp: Add schema definition and passive schema validation.
cloud-config files are very flexible and permissive. This branch adds a supported schema definition to cc_ntp so that we can properly inform users of potential misconfiguration in their cloud-config yaml.
This first branch adds a jsonsschema definition to the cc_ntp module and validation functions in cloudinit/config/schema which will log warnings about invalid configuration values in the ntp section.
A cmdline tools/cloudconfig-schema is added which can be used in our dev environments to quickly attempt to exercise the ntp schema. Eventually this cloudconfig-schema tool will evolve into a cmdline tool we can provide to users or a validation service which will allow folks to test their cloud-config files without having to attempt deployment to find errors or inconsistencies.
LP: #1692916
To test:
# download the attached schemas.tar
tar xf schemas.tar
./tools/cloudconfig-schema --doc # to dump rst from the schema definition
./tools/cloudconfig-schema --doc | rst2man | man -l - # manpage
for file in `ls schemas`; do echo '--------------------------- BEGIN ' $file; cat schemas/$file; echo -e '--------------------------- OUTPUT: '; ./tools/cloudconfig-schema --config-file schemas/$file; echo -e '--------------------------- END ' $file '\n\n'; done;
#run unit tests
tox -- --tests tests/unittests/test_handlers/test_handler_ntp.py
--
Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:cc-ntp-schema-validation into cloud-init:master.
diff --git a/cloudinit/config/cc_ntp.py b/cloudinit/config/cc_ntp.py
index 5cc5453..573cd28 100644
--- a/cloudinit/config/cc_ntp.py
+++ b/cloudinit/config/cc_ntp.py
@@ -36,6 +36,7 @@ servers or pools are provided, 4 pools will be used in the format
- 192.168.23.2
"""
+from cloudinit.config.schema import validate_cloudconfig_schema
from cloudinit import log as logging
from cloudinit.settings import PER_INSTANCE
from cloudinit import templater
@@ -52,21 +53,78 @@ NR_POOL_SERVERS = 4
distros = ['centos', 'debian', 'fedora', 'opensuse', 'ubuntu']
+# The schema definition for each cloud-config module is a strict contract for
+# describing supported configuration parameters for each cloud-config section.
+# It allows cloud-config to validate and alert users to invalid or ignored
+# configuration options before actually attempting to deploy with said
+# configuration.
+
+schema = {
+ 'id': 'cc_ntp',
+ 'name': 'NTP',
+ 'title': 'enable and configure ntp',
+ 'description': (
+ 'Handle ntp configuration. If ntp is not installed on the system and '
+ 'ntp configuration is specified, ntp will be installed. If there is a '
+ 'default ntp config file in the image or one is present in the '
+ 'distro\'s ntp package, it will be copied to ``/etc/ntp.conf.dist`` '
+ 'before any changes are made. A list of ntp pools and ntp servers can '
+ 'be provided under the ``ntp`` config key. If no ntp servers or pools '
+ 'are provided, 4 pools will be used in the format '
+ '``{0-3}.{distro}.pool.ntp.org``.'),
+ 'distros': distros,
+ 'frequency': PER_INSTANCE,
+ 'type': 'object',
+ 'properties': {
+ 'ntp': {
+ 'type': ['object', 'null'],
+ 'properties': {
+ 'pools': {
+ 'type': 'array',
+ 'items': {
+ 'type': 'string',
+ 'format': 'hostname'
+ },
+ 'uniqueItems': True,
+ 'description': (
+ 'List of ntp pools. If both pools and servers are '
+ 'empty, 4 default pool servers will be provided of '
+ 'the format \{0-3\}.\{distro\}.pool.ntp.org.')
+ },
+ 'servers': {
+ 'type': 'array',
+ 'items': {
+ 'type': 'string',
+ 'format': 'hostname'
+ },
+ 'uniqueItems': True,
+ 'description': (
+ 'List of ntp servers. If both pools and servers are '
+ 'empty, 4 default pool servers will be provided with '
+ 'the format \{0-3\}.\{distro\}.pool.ntp.org.')
+ }
+ },
+ 'required': [],
+ 'additionalProperties': False
+ }
+ }
+}
+
def handle(name, cfg, cloud, log, _args):
"""Enable and configure ntp."""
-
if 'ntp' not in cfg:
LOG.debug(
"Skipping module named %s, not present or disabled by cfg", name)
return
-
ntp_cfg = cfg.get('ntp', {})
+ # TODO drop this when validate_cloudconfig_schema is strict=True
if not isinstance(ntp_cfg, (dict)):
raise RuntimeError(("'ntp' key existed in config,"
" but not a dictionary type,"
" is a %s %instead"), type_utils.obj_name(ntp_cfg))
+ validate_cloudconfig_schema(cfg, schema)
rename_ntp_conf()
# ensure when ntp is installed it has a configuration file
# to use instead of starting up with packaged defaults
diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py
new file mode 100644
index 0000000..9a732fd
--- /dev/null
+++ b/cloudinit/config/schema.py
@@ -0,0 +1,141 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+"""schema.py: Set of module functions for processing cloud-config schema."""
+
+from cloudinit.util import read_file_or_url
+import logging
+from jsonschema import Draft4Validator, FormatChecker
+import os
+import yaml
+
+
+CLOUD_CONFIG_HEADER = b'#cloud-config'
+SCHEMA_DOC_TMPL = """
+{name}
+---
+**Summary:** {title}
+
+{description}
+
+**Internal name:** ``{id}``
+
+**Module frequency:** {frequency}
+
+**Supported distros:** {distros}
+
+**Config keys**::
+
+{property_doc}
+"""
+SCHEMA_PROPERTY_TMPL = "{prop_name} ({type}): {description}"
+
+
+class SchemaValidationError(Exception):
+ """Raised when validating a cloud-config file against a schema."""
+
+ def __init__(self, schema_errors=()):
+ """Init the exception an n-tuple of schema errors.
+
+ @param schema_errors: An n-tuple of the format:
+ ((flat.config.key, msg),)
+ """
+ self.schema_errors = schema_errors
+ error_messages = [
+ '{}: {}'.format(config_key, message)
+ for config_key, message in schema_errors]
+ message = "Cloud config schema errors: {}".format(
+ ', '.join(error_messages))
+ super(SchemaValidationError, self).__init__(message)
+
+
+def validate_cloudconfig_schema(config, schema, strict=False):
+ """Validate provided config meets the schema definition.
+
+ @param config: Dict of cloud configuration settings validated against
+ schema.
+ @param schema: jsonschema dict describing the supported schema definition
+ for the cloud config module (config.cc_*).
+ @param strict: Boolen, when True raise SchemaValidationErrors instead of
+ logging warnings.
+
+ @raises: SchemaValidationError when provided config does not validate
+ against the provided schema.
+ """
+ validator = Draft4Validator(schema, format_checker=FormatChecker())
+ errors = ()
+ for error in sorted(validator.iter_errors(config), key=lambda e: e.path):
+ path = '.'.join([str(p) for p in error.path])
+ errors += ((path, error.message),)
+ if errors:
+ if strict:
+ raise SchemaValidationError(errors)
+ else:
+ messages = ['{}: {}'.format(k, msg) for k, msg in errors]
+ logging.warning('Invalid schema:\n%s', '\n'.join(messages))
+
+
+def validate_cloudconfig_file(config_path, schema):
+ """Validate cloudconfig file adheres to a specific jsonschema.
+
+ @param config_path: Path to the yaml cloud-config file to parse.
+ @param schema: Dict describing a valid jsonschema to validate against.
+
+ @raises SchemaValidationError containing any of schema_errors encountered.
+ @raises RuntimeError when config_path does not exist.
+ """
+ if not os.path.exists(config_path):
+ raise RuntimeError('Configfile {} does not exist'.format(config_path))
+ content = read_file_or_url('file://{}'.format(config_path)).contents
+ if not content.startswith(CLOUD_CONFIG_HEADER):
+ errors = (
+ ('header', 'File {} needs to begin with "{}"'.format(
+ config_path, CLOUD_CONFIG_HEADER.decode())),)
+ raise SchemaValidationError(errors)
+
+ try:
+ cloudconfig = yaml.safe_load(content)
+ except yaml.parser.ParserError as e:
+ errors = (
+ ('format', 'File {} is not valid yaml. {}'.format(
+ config_path, str(e))),)
+ raise SchemaValidationError(errors)
+ validate_cloudconfig_schema(
+ cloudconfig, schema, strict=True)
+
+
+def _get_property_doc(schema, prefix=' '):
+ """Return restructured text describing the supported schema properties."""
+ indent = prefix + ' '
+ properties = []
+ for prop_key, prop_config in schema.get('properties', {}).items():
+ # Define prop_name and dscription for SCHEMA_PROPERTY_TMPL
+ prop_config['prop_name'] = prefix + prop_key
+ if 'description' not in prop_config:
+ prop_config['description'] = ''
+ properties.append(SCHEMA_PROPERTY_TMPL.format(**prop_config))
+ if 'properties' in prop_config:
+ properties.append(_get_property_doc(prop_config, prefix=indent))
+ return '\n'.join(properties)
+
+
+def get_schema_doc(schema):
+ """Return reStructured text rendering the provided jsonschema.
+
+ @param schema: Dict of jsonschema to render.
+ @raise KeyError: If schema lacks an expected key.
+ """
+ schema['property_doc'] = _get_property_doc(schema)
+ return SCHEMA_DOC_TMPL.format(**schema)
+
+
+def get_schema(section_key=None):
+ """Return a dict of jsonschema defined in any cc_* module.
+
+ @param: section_key: Optionally limit schema to a specific top-level key.
+ """
+ # TODO use util.find_modules in subsequent branch
+ from cloudinit.config.cc_ntp import schema
+ return schema
+
+
+# vi: ts=4 expandtab
diff --git a/tests/unittests/test_handler/test_handler_ntp.py b/tests/unittests/test_handler/test_handler_ntp.py
index bc4277b..994d5e3 100644
--- a/tests/unittests/test_handler/test_handler_ntp.py
+++ b/tests/unittests/test_handler/test_handler_ntp.py
@@ -212,4 +212,113 @@ class TestNtp(FilesystemMockingTestCase):
'Skipping module named cc_ntp, not present or disabled by cfg\n',
self.logs.getvalue())
+ def test_ntp_handler_schema_validation_allows_empty_ntp_config(self):
+ """Ntp schema validation allows for an empty ntp: configuration."""
+ invalid_config = {'ntp': {}}
+ distro = 'ubuntu'
+ cc = self._get_cloud(distro)
+ ntp_conf = os.path.join(self.new_root, 'ntp.conf')
+ with open('{0}.tmpl'.format(ntp_conf), 'wb') as stream:
+ stream.write(NTP_TEMPLATE)
+ with mock.patch('cloudinit.config.cc_ntp.NTP_CONF', ntp_conf):
+ cc_ntp.handle('cc_ntp', invalid_config, cc, None, [])
+ self.assertNotIn('Invalid schema:', self.logs.getvalue())
+ with open(ntp_conf) as stream:
+ content = stream.read()
+ default_pools = [
+ "{0}.{1}.pool.ntp.org".format(x, distro)
+ for x in range(0, cc_ntp.NR_POOL_SERVERS)]
+ self.assertEqual(
+ "servers []\npools {0}\n".format(default_pools),
+ content)
+
+ def test_ntp_handler_schema_validation_warns_non_string_item_type(self):
+ """Ntp schema validation warns of non-strings in pools or servers.
+
+ Schema validation is not strict, so ntp config is still be rendered.
+ """
+ invalid_config = {'ntp': {'pools': [123], 'servers': ['valid', None]}}
+ cc = self._get_cloud('ubuntu')
+ ntp_conf = os.path.join(self.new_root, 'ntp.conf')
+ with open('{0}.tmpl'.format(ntp_conf), 'wb') as stream:
+ stream.write(NTP_TEMPLATE)
+ with mock.patch('cloudinit.config.cc_ntp.NTP_CONF', ntp_conf):
+ cc_ntp.handle('cc_ntp', invalid_config, cc, None, [])
+ self.assertIn(
+ "Invalid schema:\nntp.pools.0: 123 is not of type 'string'\n"
+ "ntp.servers.1: None is not of type 'string'",
+ self.logs.getvalue())
+ with open(ntp_conf) as stream:
+ content = stream.read()
+ self.assertEqual("servers ['valid', None]\npools [123]\n", content)
+
+ def test_ntp_handler_schema_validation_warns_of_non_array_type(self):
+ """Ntp schema validation warns of non-array pools or servers types.
+
+ Schema validation is not strict, so ntp config is still be rendered.
+ """
+ invalid_config = {'ntp': {'pools': 123, 'servers': 'non-array'}}
+ cc = self._get_cloud('ubuntu')
+ ntp_conf = os.path.join(self.new_root, 'ntp.conf')
+ with open('{0}.tmpl'.format(ntp_conf), 'wb') as stream:
+ stream.write(NTP_TEMPLATE)
+ with mock.patch('cloudinit.config.cc_ntp.NTP_CONF', ntp_conf):
+ cc_ntp.handle('cc_ntp', invalid_config, cc, None, [])
+ self.assertIn(
+ "Invalid schema:\nntp.pools: 123 is not of type 'array'\n"
+ "ntp.servers: 'non-array' is not of type 'array'",
+ self.logs.getvalue())
+ with open(ntp_conf) as stream:
+ content = stream.read()
+ self.assertEqual("servers non-array\npools 123\n", content)
+
+ def test_ntp_handler_schema_validation_warns_invalid_key_present(self):
+ """Ntp schema validation warns of invalid keys present in ntp config.
+
+ Schema validation is not strict, so ntp config is still be rendered.
+ """
+ invalid_config = {
+ 'ntp': {'invalidkey': 1, 'pools': ['0.mycompany.pool.ntp.org']}}
+ cc = self._get_cloud('ubuntu')
+ ntp_conf = os.path.join(self.new_root, 'ntp.conf')
+ with open('{0}.tmpl'.format(ntp_conf), 'wb') as stream:
+ stream.write(NTP_TEMPLATE)
+ with mock.patch('cloudinit.config.cc_ntp.NTP_CONF', ntp_conf):
+ cc_ntp.handle('cc_ntp', invalid_config, cc, None, [])
+ self.assertIn(
+ "Invalid schema:\nntp: Additional properties are not allowed "
+ "('invalidkey' was unexpected)",
+ self.logs.getvalue())
+ with open(ntp_conf) as stream:
+ content = stream.read()
+ self.assertEqual(
+ "servers []\npools ['0.mycompany.pool.ntp.org']\n",
+ content)
+
+ def test_ntp_handler_schema_validation_warns_of_duplicates(self):
+ """Ntp schema validation warns of duplicates in servers or pools.
+
+ Schema validation is not strict, so ntp config is still be rendered.
+ """
+ invalid_config = {
+ 'ntp': {'pools': ['0.mypool.org', '0.mypool.org'],
+ 'servers': ['10.0.0.1', '10.0.0.1']}}
+ cc = self._get_cloud('ubuntu')
+ ntp_conf = os.path.join(self.new_root, 'ntp.conf')
+ with open('{0}.tmpl'.format(ntp_conf), 'wb') as stream:
+ stream.write(NTP_TEMPLATE)
+ with mock.patch('cloudinit.config.cc_ntp.NTP_CONF', ntp_conf):
+ cc_ntp.handle('cc_ntp', invalid_config, cc, None, [])
+ self.assertIn(
+ "Invalid schema:\nntp.pools: ['0.mypool.org', '0.mypool.org'] has "
+ "non-unique elements\nntp.servers: ['10.0.0.1', '10.0.0.1'] has "
+ "non-unique elements",
+ self.logs.getvalue())
+ with open(ntp_conf) as stream:
+ content = stream.read()
+ self.assertEqual(
+ "servers ['10.0.0.1', '10.0.0.1']\n"
+ "pools ['0.mypool.org', '0.mypool.org']\n",
+ content)
+
# vi: ts=4 expandtab
diff --git a/tests/unittests/test_handler/test_schema.py b/tests/unittests/test_handler/test_schema.py
new file mode 100644
index 0000000..7569326
--- /dev/null
+++ b/tests/unittests/test_handler/test_schema.py
@@ -0,0 +1,141 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+from cloudinit.config.schema import (
+ CLOUD_CONFIG_HEADER, SchemaValidationError, get_schema_doc,
+ validate_cloudconfig_file, validate_cloudconfig_schema)
+from cloudinit.util import write_file
+
+from ..helpers import CiTestCase
+
+from copy import copy
+
+
+class SchemaValidationErrorTest(CiTestCase):
+ """Test validate_cloudconfig_schema"""
+
+ def test_schema_validation_error_expects_schema_errors(self):
+ """SchemaValidationError is initialized from schema_errors."""
+ errors = (('key.path', 'unexpected key "junk"'),
+ ('key2.path', '"-123" is not a valid "hostname" format'))
+ exception = SchemaValidationError(schema_errors=errors)
+ self.assertIsInstance(exception, Exception)
+ self.assertEqual(exception.schema_errors, errors)
+ self.assertEqual(
+ 'Cloud config schema errors: key.path: unexpected key "junk", '
+ 'key2.path: "-123" is not a valid "hostname" format',
+ str(exception))
+
+
+class ValidateCloudConfigSchemaTest(CiTestCase):
+ """Tests for validate_cloudconfig_schema."""
+
+ with_logs = True
+
+ def test_validateconfig_schema_non_strict_emits_warnings(self):
+ """When strict is False validate_cloudconfig_schema emits warnings."""
+ schema = {'properties': {'p1': {'type': 'string'}}}
+ validate_cloudconfig_schema({'p1': -1}, schema, strict=False)
+ self.assertIn(
+ "Invalid schema:\np1: -1 is not of type 'string'\n",
+ self.logs.getvalue())
+
+ def test_validateconfig_schema_strict_raises_errors(self):
+ """When strict is True validate_cloudconfig_schema raises errors."""
+ schema = {'properties': {'p1': {'type': 'string'}}}
+ with self.assertRaises(SchemaValidationError) as context_mgr:
+ validate_cloudconfig_schema({'p1': -1}, schema, strict=True)
+ self.assertEqual(
+ "Cloud config schema errors: p1: -1 is not of type 'string'",
+ str(context_mgr.exception))
+
+ def test_validateconfig_schema_honors_formats(self):
+ """When strict is True validate_cloudconfig_schema raises errors."""
+ schema = {
+ 'properties': {'p1': {'type': 'string', 'format': 'hostname'}}}
+ with self.assertRaises(SchemaValidationError) as context_mgr:
+ validate_cloudconfig_schema({'p1': '-1'}, schema, strict=True)
+ self.assertEqual(
+ "Cloud config schema errors: p1: '-1' is not a 'hostname'",
+ str(context_mgr.exception))
+
+
+class ValidateCloudConfigFileTest(CiTestCase):
+ """Tests for validate_cloudconfig_file."""
+
+ def setUp(self):
+ super(ValidateCloudConfigFileTest, self).setUp()
+ self.config_file = self.tmp_path('cloudcfg.yaml')
+
+ def test_validateconfig_file_error_on_absent_file(self):
+ """On absent config_path, validate_cloudconfig_file errors."""
+ with self.assertRaises(RuntimeError) as context_mgr:
+ validate_cloudconfig_file('/not/here', {})
+ self.assertEqual(
+ 'Configfile /not/here does not exist',
+ str(context_mgr.exception))
+
+ def test_validateconfig_file_error_on_invalid_header(self):
+ """On invalid header, validate_cloudconfig_file errors.
+
+ A SchemaValidationError is raised when the file doesn't begin with
+ CLOUD_CONFIG_HEADER.
+ """
+ write_file(self.config_file, '#junk')
+ with self.assertRaises(SchemaValidationError) as context_mgr:
+ validate_cloudconfig_file(self.config_file, {})
+ self.assertEqual(
+ 'Cloud config schema errors: header: File {0} needs to begin with '
+ '"{1}"'.format(self.config_file, CLOUD_CONFIG_HEADER.decode()),
+ str(context_mgr.exception))
+
+ def test_validateconfig_file_error_on_non_yaml_format(self):
+ """On non-yaml format, validate_cloudconfig_file errors.
+
+ A SchemaValidationError is raised when the file doesn't begin with
+ CLOUD_CONFIG_HEADER.
+ """
+ write_file(self.config_file, '#cloud-config\n{}}')
+ with self.assertRaises(SchemaValidationError) as context_mgr:
+ validate_cloudconfig_file(self.config_file, {})
+ self.assertIn(
+ 'schema errors: format: File {0} is not valid yaml.'.format(
+ self.config_file),
+ str(context_mgr.exception))
+
+ def test_validateconfig_file_sctricty_validates_schema(self):
+ """validate_cloudconfig_file raises errors on invalid schema."""
+ schema = {
+ 'properties': {'p1': {'type': 'string', 'format': 'hostname'}}}
+ write_file(self.config_file, '#cloud-config\np1: "-1"')
+ with self.assertRaises(SchemaValidationError) as context_mgr:
+ validate_cloudconfig_file(self.config_file, schema)
+ self.assertEqual(
+ "Cloud config schema errors: p1: '-1' is not a 'hostname'",
+ str(context_mgr.exception))
+
+
+class GetSchemaDocTest(CiTestCase):
+ """Tests for get_schema_doc."""
+
+ def setUp(self):
+ super(GetSchemaDocTest, self).setUp()
+ self.required_schema = {
+ 'title': 'title', 'description': 'description', 'id': 'id',
+ 'name': 'name', 'frequency': 'frequency', 'distros': 'distros'}
+
+ def test_get_schema_doc_returns_restructured_text(self):
+ """get_schema_doc returns restructured text for a cloudinit schema."""
+ self.assertEqual(
+ '\nname\n---\n**Summary:** title\n\ndescription\n\n'
+ '**Internal name:** ``id``\n\n**Module frequency:** frequency\n\n'
+ '**Supported distros:** distros\n\n**Config keys**::\n\n\n',
+ get_schema_doc(self.required_schema))
+
+ def test_get_schema_doc_raises_key_errors(self):
+ """get_schema_doc raises KeyErrors on missing keys."""
+ for key in self.required_schema:
+ invalid_schema = copy(self.required_schema)
+ invalid_schema.pop(key)
+ with self.assertRaises(KeyError) as context_mgr:
+ get_schema_doc(invalid_schema)
+ self.assertEqual("'{0}'".format(key), str(context_mgr.exception))