← Back to team overview

cloud-init-dev team mailing list archive

Re: [Merge] ~chad.smith/cloud-init:cc-ntp-schema-validation into cloud-init:master

 

I like it very  much.

Some comments inline. comments.
looking at diff i see that tools/cloudconfig-schema has some whitespace-end-of-line issues.

tools/cloudconfig-schema needs chmod +x.
tools/cloudconfig-schema: def error() should got to sys.stderr.write(message + "\n") ?
might as well write errors to stderr.


Overall this is wonderful. Thank you.

The jsonschema thing is the only sticking point.  we can add a dependency to trunk
and trunk's package build.  But I dont yet want to add a runtime dependency on it.
So somehow we have to address that.



Diff comments:

> diff --git a/cloudinit/config/cc_ntp.py b/cloudinit/config/cc_ntp.py
> index 5cc5453..573cd28 100644
> --- a/cloudinit/config/cc_ntp.py
> +++ b/cloudinit/config/cc_ntp.py
> @@ -52,21 +53,78 @@ NR_POOL_SERVERS = 4
>  distros = ['centos', 'debian', 'fedora', 'opensuse', 'ubuntu']
>  
>  
> +# The schema definition for each cloud-config module is a strict contract for
> +# describing supported configuration parameters for each cloud-config section.
> +# It allows cloud-config to validate and alert users to invalid or ignored
> +# configuration options before actually attempting to deploy with said
> +# configuration.
> +
> +schema = {
> +    'id': 'cc_ntp',
> +    'name': 'NTP',
> +    'title': 'enable and configure ntp',
> +    'description': (
> +        'Handle ntp configuration. If ntp is not installed on the system and '
> +        'ntp configuration is specified, ntp will be installed. If there is a '
> +        'default ntp config file in the image or one is present in the '
> +        'distro\'s ntp package, it will be copied to ``/etc/ntp.conf.dist`` '
> +        'before any changes are made. A list of ntp pools and ntp servers can '
> +        'be provided under the ``ntp`` config key. If no ntp servers or pools '
> +        'are provided, 4 pools will be used in the format '
> +        '``{0-3}.{distro}.pool.ntp.org``.'),

some places we use textwrap.dedent("""\
    then you can avoid
    having lots of ' at beginning and end of line. or escaping '""")

> +    'distros': distros,
> +    'frequency': PER_INSTANCE,
> +    'type': 'object',
> +    'properties': {
> +        'ntp': {
> +            'type': ['object', 'null'],
> +            'properties': {
> +                'pools': {
> +                    'type': 'array',
> +                    'items': {
> +                        'type': 'string',
> +                        'format': 'hostname'
> +                    },
> +                    'uniqueItems': True,
> +                    'description': (
> +                        'List of ntp pools. If both pools and servers are '
> +                        'empty, 4 default pool servers will be provided of '
> +                        'the format \{0-3\}.\{distro\}.pool.ntp.org.')
> +                },
> +                'servers': {
> +                    'type': 'array',
> +                    'items': {
> +                        'type': 'string',
> +                        'format': 'hostname'
> +                    },
> +                    'uniqueItems': True,
> +                    'description': (
> +                        'List of ntp servers. If both pools and servers are '
> +                        'empty, 4 default pool servers will be provided with '
> +                        'the format \{0-3\}.\{distro\}.pool.ntp.org.')
> +                }
> +            },
> +            'required': [],
> +            'additionalProperties': False
> +        }
> +    }
> +}
> +
>  def handle(name, cfg, cloud, log, _args):
>      """Enable and configure ntp."""
> -
>      if 'ntp' not in cfg:
>          LOG.debug(
>              "Skipping module named %s, not present or disabled by cfg", name)
>          return
> -
>      ntp_cfg = cfg.get('ntp', {})
>  
> +    # TODO drop this when validate_cloudconfig_schema is strict=True
>      if not isinstance(ntp_cfg, (dict)):
>          raise RuntimeError(("'ntp' key existed in config,"
>                              " but not a dictionary type,"
>                              " is a %s %instead"), type_utils.obj_name(ntp_cfg))
>  
> +    validate_cloudconfig_schema(cfg, schema)
>      rename_ntp_conf()
>      # ensure when ntp is installed it has a configuration file
>      # to use instead of starting up with packaged defaults
> diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py
> new file mode 100644
> index 0000000..9a732fd
> --- /dev/null
> +++ b/cloudinit/config/schema.py
> @@ -0,0 +1,141 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +"""schema.py: Set of module functions for processing cloud-config schema."""
> +
> +from cloudinit.util import read_file_or_url
> +import logging
> +from jsonschema import Draft4Validator, FormatChecker

## smoser
  We need to try/except this, as xenial wont have jsonschema
  and at least initially we wouldnt want to add a Depends there.
  The same is probably true for other installed targets.

  That said, you'll also need to update the requirements.txt to get jsonschema

> +import os
> +import yaml
> +
> +
> +CLOUD_CONFIG_HEADER = b'#cloud-config'
> +SCHEMA_DOC_TMPL = """
> +{name}
> +---
> +**Summary:** {title}
> +
> +{description}
> +
> +**Internal name:** ``{id}``
> +
> +**Module frequency:** {frequency}
> +
> +**Supported distros:** {distros}
> +
> +**Config keys**::
> +
> +{property_doc}
> +"""
> +SCHEMA_PROPERTY_TMPL = "{prop_name} ({type}): {description}"
> +
> +
> +class SchemaValidationError(Exception):
> +    """Raised when validating a cloud-config file against a schema."""

## smoser
    ValueError? TypeError?  just wondering why you picked Exception
    Wonder what other things would raise in similar situations.

> +
> +    def __init__(self, schema_errors=()):
> +        """Init the exception an n-tuple of schema errors.
> +
> +        @param schema_errors: An n-tuple of the format:
> +            ((flat.config.key, msg),)
> +        """
> +        self.schema_errors = schema_errors
> +        error_messages = [
> +            '{}: {}'.format(config_key, message)

## smoser
    python2.6 {0}: {1} (there are other places too. only calling out this one)

> +            for config_key, message in schema_errors]
> +        message = "Cloud config schema errors: {}".format(
> +            ', '.join(error_messages))
> +        super(SchemaValidationError, self).__init__(message)
> +
> +
> +def validate_cloudconfig_schema(config, schema, strict=False):
> +    """Validate provided config meets the schema definition.
> +
> +    @param config: Dict of cloud configuration settings validated against
> +        schema.
> +    @param schema: jsonschema dict describing the supported schema definition
> +       for the cloud config module (config.cc_*).
> +    @param strict: Boolen, when True raise SchemaValidationErrors instead of

## smoser
    Boolean (not Boolen).
    I need to start being able to type stuff like this.  Thank you for
    doing the nice docstring headers.

> +       logging warnings.
> +
> +    @raises: SchemaValidationError when provided config does not validate
> +        against the provided schema.
> +    """
> +    validator = Draft4Validator(schema, format_checker=FormatChecker())
> +    errors = ()
> +    for error in sorted(validator.iter_errors(config), key=lambda e: e.path):
> +        path = '.'.join([str(p) for p in error.path])
> +        errors += ((path, error.message),)
> +    if errors:
> +        if strict:
> +            raise SchemaValidationError(errors)
> +        else:
> +            messages = ['{}: {}'.format(k, msg) for k, msg in errors]
> +            logging.warning('Invalid schema:\n%s', '\n'.join(messages))

## smoser: invalid 'schema' ? or invalid config.

> +
> +
> +def validate_cloudconfig_file(config_path, schema):
> +    """Validate cloudconfig file adheres to a specific jsonschema.
> +
> +    @param config_path: Path to the yaml cloud-config file to parse.
> +    @param schema: Dict describing a valid jsonschema to validate against.
> +
> +    @raises SchemaValidationError containing any of schema_errors encountered.
> +    @raises RuntimeError when config_path does not exist.
> +    """
> +    if not os.path.exists(config_path):
> +        raise RuntimeError('Configfile {} does not exist'.format(config_path))
> +    content = read_file_or_url('file://{}'.format(config_path)).contents
> +    if not content.startswith(CLOUD_CONFIG_HEADER):
> +        errors = (
> +            ('header', 'File {} needs to begin with "{}"'.format(
> +                config_path, CLOUD_CONFIG_HEADER.decode())),)
> +        raise SchemaValidationError(errors)
> +
> +    try:
> +        cloudconfig = yaml.safe_load(content)
> +    except yaml.parser.ParserError as e:
> +        errors = (
> +            ('format', 'File {} is not valid yaml. {}'.format(
> +                config_path, str(e))),)
> +        raise SchemaValidationError(errors)
> +    validate_cloudconfig_schema(
> +        cloudconfig, schema, strict=True)
> +
> +
> +def _get_property_doc(schema, prefix='    '):
> +    """Return restructured text describing the supported schema properties."""
> +    indent = prefix + '    '
> +    properties = []
> +    for prop_key, prop_config in schema.get('properties', {}).items():
> +        # Define prop_name and dscription for SCHEMA_PROPERTY_TMPL
> +        prop_config['prop_name'] = prefix + prop_key
> +        if 'description' not in prop_config:
> +            prop_config['description'] = ''
> +        properties.append(SCHEMA_PROPERTY_TMPL.format(**prop_config))
> +        if 'properties' in prop_config:
> +            properties.append(_get_property_doc(prop_config, prefix=indent))
> +    return '\n'.join(properties)
> +
> +
> +def get_schema_doc(schema):
> +    """Return reStructured text rendering the provided jsonschema.
> +
> +    @param schema: Dict of jsonschema to render.
> +    @raise KeyError: If schema lacks an expected key.
> +    """
> +    schema['property_doc'] = _get_property_doc(schema)
> +    return SCHEMA_DOC_TMPL.format(**schema)
> +
> +
> +def get_schema(section_key=None):
> +    """Return a dict of jsonschema defined in any cc_* module.
> +
> +    @param: section_key: Optionally limit schema to a specific top-level key.
> +    """
> +    # TODO use util.find_modules in subsequent branch
> +    from cloudinit.config.cc_ntp import schema
> +    return schema
> +
> +
> +# vi: ts=4 expandtab
> diff --git a/tests/unittests/test_handler/test_schema.py b/tests/unittests/test_handler/test_schema.py
> new file mode 100644
> index 0000000..7569326
> --- /dev/null
> +++ b/tests/unittests/test_handler/test_schema.py
> @@ -0,0 +1,141 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +from cloudinit.config.schema import (
> +    CLOUD_CONFIG_HEADER, SchemaValidationError, get_schema_doc,
> +    validate_cloudconfig_file, validate_cloudconfig_schema)
> +from cloudinit.util import write_file
> +
> +from ..helpers import CiTestCase
> +
> +from copy import copy
> +
> +
> +class SchemaValidationErrorTest(CiTestCase):
> +    """Test validate_cloudconfig_schema"""
> +
> +    def test_schema_validation_error_expects_schema_errors(self):
> +        """SchemaValidationError is initialized from schema_errors."""
> +        errors = (('key.path', 'unexpected key "junk"'),
> +                  ('key2.path', '"-123" is not a valid "hostname" format'))
> +        exception = SchemaValidationError(schema_errors=errors)
> +        self.assertIsInstance(exception, Exception)
> +        self.assertEqual(exception.schema_errors, errors)
> +        self.assertEqual(
> +            'Cloud config schema errors: key.path: unexpected key "junk", '
> +            'key2.path: "-123" is not a valid "hostname" format',
> +            str(exception))
> +
> +
> +class ValidateCloudConfigSchemaTest(CiTestCase):
> +    """Tests for validate_cloudconfig_schema."""
> +
> +    with_logs = True
> +
> +    def test_validateconfig_schema_non_strict_emits_warnings(self):
> +        """When strict is False validate_cloudconfig_schema emits warnings."""
> +        schema = {'properties': {'p1': {'type': 'string'}}}
> +        validate_cloudconfig_schema({'p1': -1}, schema, strict=False)
> +        self.assertIn(
> +            "Invalid schema:\np1: -1 is not of type 'string'\n",
> +            self.logs.getvalue())
> +
> +    def test_validateconfig_schema_strict_raises_errors(self):
> +        """When strict is True validate_cloudconfig_schema raises errors."""
> +        schema = {'properties': {'p1': {'type': 'string'}}}
> +        with self.assertRaises(SchemaValidationError) as context_mgr:
> +            validate_cloudconfig_schema({'p1': -1}, schema, strict=True)
> +        self.assertEqual(
> +            "Cloud config schema errors: p1: -1 is not of type 'string'",
> +            str(context_mgr.exception))
> +
> +    def test_validateconfig_schema_honors_formats(self):
> +        """When strict is True validate_cloudconfig_schema raises errors."""
> +        schema = {
> +            'properties': {'p1': {'type': 'string', 'format': 'hostname'}}}
> +        with self.assertRaises(SchemaValidationError) as context_mgr:
> +            validate_cloudconfig_schema({'p1': '-1'}, schema, strict=True)

## smoser: make sure the assertRaises works in cent6 (i think it should).
       also, check the expected type of the exception? (SchemaValidationError)

> +        self.assertEqual(
> +            "Cloud config schema errors: p1: '-1' is not a 'hostname'",
> +            str(context_mgr.exception))
> +
> +
> +class ValidateCloudConfigFileTest(CiTestCase):
> +    """Tests for validate_cloudconfig_file."""
> +
> +    def setUp(self):
> +        super(ValidateCloudConfigFileTest, self).setUp()
> +        self.config_file = self.tmp_path('cloudcfg.yaml')
> +
> +    def test_validateconfig_file_error_on_absent_file(self):
> +        """On absent config_path, validate_cloudconfig_file errors."""
> +        with self.assertRaises(RuntimeError) as context_mgr:
> +            validate_cloudconfig_file('/not/here', {})
> +        self.assertEqual(
> +            'Configfile /not/here does not exist',
> +            str(context_mgr.exception))
> +
> +    def test_validateconfig_file_error_on_invalid_header(self):
> +        """On invalid header, validate_cloudconfig_file errors.
> +
> +        A SchemaValidationError is raised when the file doesn't begin with
> +        CLOUD_CONFIG_HEADER.
> +        """
> +        write_file(self.config_file, '#junk')
> +        with self.assertRaises(SchemaValidationError) as context_mgr:
> +            validate_cloudconfig_file(self.config_file, {})
> +        self.assertEqual(
> +            'Cloud config schema errors: header: File {0} needs to begin with '
> +            '"{1}"'.format(self.config_file, CLOUD_CONFIG_HEADER.decode()),
> +            str(context_mgr.exception))
> +
> +    def test_validateconfig_file_error_on_non_yaml_format(self):
> +        """On non-yaml format, validate_cloudconfig_file errors.
> +
> +        A SchemaValidationError is raised when the file doesn't begin with
> +        CLOUD_CONFIG_HEADER.

## smoser docstring copy from previous test case. ("doesn't begin with CLOUD_CONFIG_HEADER")

> +        """
> +        write_file(self.config_file, '#cloud-config\n{}}')
> +        with self.assertRaises(SchemaValidationError) as context_mgr:
> +            validate_cloudconfig_file(self.config_file, {})
> +        self.assertIn(
> +            'schema errors: format: File {0} is not valid yaml.'.format(
> +                self.config_file),
> +            str(context_mgr.exception))
> +
> +    def test_validateconfig_file_sctricty_validates_schema(self):
> +        """validate_cloudconfig_file raises errors on invalid schema."""
> +        schema = {
> +            'properties': {'p1': {'type': 'string', 'format': 'hostname'}}}
> +        write_file(self.config_file, '#cloud-config\np1: "-1"')
> +        with self.assertRaises(SchemaValidationError) as context_mgr:
> +            validate_cloudconfig_file(self.config_file, schema)
> +        self.assertEqual(
> +            "Cloud config schema errors: p1: '-1' is not a 'hostname'",
> +            str(context_mgr.exception))
> +
> +
> +class GetSchemaDocTest(CiTestCase):
> +    """Tests for get_schema_doc."""
> +
> +    def setUp(self):
> +        super(GetSchemaDocTest, self).setUp()
> +        self.required_schema = {
> +            'title': 'title', 'description': 'description', 'id': 'id',
> +            'name': 'name', 'frequency': 'frequency', 'distros': 'distros'}
> +
> +    def test_get_schema_doc_returns_restructured_text(self):
> +        """get_schema_doc returns restructured text for  a cloudinit schema."""

two spaces: for<space><space>a

> +        self.assertEqual(
> +            '\nname\n---\n**Summary:** title\n\ndescription\n\n'
> +            '**Internal name:** ``id``\n\n**Module frequency:** frequency\n\n'
> +            '**Supported distros:** distros\n\n**Config keys**::\n\n\n',
> +            get_schema_doc(self.required_schema))
> +
> +    def test_get_schema_doc_raises_key_errors(self):
> +        """get_schema_doc raises KeyErrors on missing keys."""
> +        for key in self.required_schema:
> +            invalid_schema = copy(self.required_schema)
> +            invalid_schema.pop(key)
> +            with self.assertRaises(KeyError) as context_mgr:
> +                get_schema_doc(invalid_schema)
> +            self.assertEqual("'{0}'".format(key), str(context_mgr.exception))


-- 
https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/324640
Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:cc-ntp-schema-validation into cloud-init:master.