← Back to team overview

cloud-init-dev team mailing list archive

Re: [Merge] ~chad.smith/cloud-init:aws-local-dhcp into cloud-init:master

 

This looks really good.  Just some questions and clarifications in inline comments.

Diff comments:

> diff --git a/cloudinit/net/__init__.py b/cloudinit/net/__init__.py
> index 46cb9c8..d38ea8b 100644
> --- a/cloudinit/net/__init__.py
> +++ b/cloudinit/net/__init__.py
> @@ -233,15 +231,24 @@ def generate_fallback_config(blacklist_drivers=None, config_driver=None):
>      if DEFAULT_PRIMARY_INTERFACE in names:
>          names.remove(DEFAULT_PRIMARY_INTERFACE)
>          names.insert(0, DEFAULT_PRIMARY_INTERFACE)
> -    target_name = None
> -    target_mac = None
> +
> +    # pick the first that has a address

Hrm; this seems risky; in that it may return a different answer between subsequent calls if more than one interface is up.
It seems somewhat wasteful to build a list of names with 'address' value so we can sort it but I don't like the variability in
the return value.

>      for name in names:
> -        mac = read_sys_net_safe(name, 'address')
> -        if mac:
> -            target_name = name
> -            target_mac = mac
> -            break
> -    if target_mac and target_name:
> +        if read_sys_net_safe(name, 'address'):
> +            return name
> +    return None
> +
> +
> +def generate_fallback_config(blacklist_drivers=None, config_driver=None):
> +    """Determine which attached net dev is most likely to have a connection and
> +       generate network state to run dhcp on that interface"""
> +
> +    if not config_driver:
> +        config_driver = False
> +
> +    target_name = find_fallback_nic(blacklist_drivers=blacklist_drivers)
> +    if target_name:
> +        target_mac = read_sys_net_safe(target_name, 'address')
>          nconf = {'config': [], 'version': 1}
>          cfg = {'type': 'physical', 'name': target_name,
>                 'mac_address': target_mac, 'subnets': [{'type': 'dhcp'}]}
> diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py
> new file mode 100644
> index 0000000..4d59bd0
> --- /dev/null
> +++ b/cloudinit/net/dhcp.py
> @@ -0,0 +1,118 @@
> +# Copyright (C) 2017 Canonical Ltd.
> +#
> +# Author: Chad Smith <chad.smith@xxxxxxxxxxxxx>
> +#
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +import logging
> +import os
> +import re
> +
> +from cloudinit.net import find_fallback_nic, get_devicelist
> +from cloudinit import util
> +
> +LOG = logging.getLogger(__name__)
> +
> +
> +class InvalidDHCPLeaseFileError(Exception):
> +    """Raised when parsing an empty or invalid dhcp.leases file.
> +
> +    Current uses are DataSourceAzure and DataSourceEc2 during ephemeral
> +    boot to scrape metadata.
> +    """
> +    pass
> +
> +
> +def maybe_dhcp_clean_discovery(nic=None):
> +    """Create a temporary working directory and find the nic if needed.

The function name and docstring don't seem clear to me.  What's the 'clean' part?

It doesn't always create a tmpdir, only if we have a nic and it's not up.

I think the "clean" is an implementation detail we don't need to expose in the function name.

maybe_perform_dhcp_discovery()  is't alot better name-wise (I'm happy to help bikeshed), but I think
we can say something like:

Determine if we need to perform a DHCP DISCOVERY operation on ``nic`` if it's not up.
Utilize a tempdir to prevent dhclient from disturbing the host filesystem.

Something like that.

> +
> +    If the device is already connected, do nothing.
> +
> +    @param nic: Name of the network interface we want to run dhclient on.
> +    @return: A dict of dhcp options from the dhclient discovery, otherwise an
> +        empty dict.
> +    """
> +    if nic is None:
> +        nic = find_fallback_nic()
> +        if nic is None:
> +            LOG.debug(
> +                'Skip dhcp_clean_discovery: Unable to find network interface.')
> +            return {}
> +    elif nic not in get_devicelist():
> +        LOG.debug(
> +            'Skip dhcp_clean_discovery: Invalid interface name %s.', nic)

Since this is a debug message, then maybe something like nic '%s' not found in get_devicelist() or system network device list.

> +        return {}
> +    with util.tempdir(prefix='cloud-init-dhcp-') as tmpdir:
> +        return dhcp_clean_discovery(nic, tmpdir)
> +
> +
> +def parse_dhcp_lease_file(lease_file):
> +    """Parse the given dhcp lease file for the most recent lease.
> +
> +    Return a dict of dhcp options as key value pairs for the most recent lease
> +    block.
> +
> +    @raises: InvalidDHCPLeaseFileError on empty of unparseable leasefile
> +        content.
> +    """
> +    lease_regex = re.compile(r"lease {(?P<lease>[^}]*)}\n")
> +    dhcp_leases = []
> +    with open(lease_file) as stream:
> +        lease_content = stream.read()

util.load_file()

> +    if len(lease_content) == 0:
> +        raise InvalidDHCPLeaseFileError(
> +            'Cannot parse empty dhcp lease file {0}'.format(lease_file))
> +    for lease in lease_regex.findall(lease_content):
> +        lease_options = []
> +        for line in lease.split(';'):
> +            # Strip newlines, double-quotes and option prefix
> +            line = line.strip().replace('"', '').replace('option ', '')
> +            if not line:
> +                continue
> +            lease_options.append(line.split(' ', 1))
> +        dhcp_leases.append(dict(lease_options))
> +    if not dhcp_leases:
> +        raise InvalidDHCPLeaseFileError(
> +            'Cannot parse dhcp lease file {0}. No leases found'.format(
> +                lease_file))
> +    return dhcp_leases
> +
> +
> +def dhcp_clean_discovery(interface, cleandir):
> +    """Start up dhclient on the provided inteface without side-effects.

maybe expand side-effects here even though the comments below have all of the real issues.  Something like, avoiding script execution and filesystem modifications.

> +
> +    @param interface: Name of the network inteface on which to dhclient.
> +    @param cleandir: The directory from which to run dhclient as well as store
> +        dhcp leases.
> +
> +    @return: A dict of dhcp options parsed from the dhcp.leases file or empty
> +        dict.
> +    """
> +    dhclient_script = util.which('dhclient')
> +    if not dhclient_script:
> +        LOG.debug('Skip dhclient configuration: No dhclient found.')
> +        return {}
> +    LOG.debug('Performing a clean dhcp discovery on %s', interface)
> +
> +    # XXX We copy dhclient out of /sbin/dhclient to avoid dealing with strict
> +    # app armor profiles which disallow running dhclient -sf <our-script-file>.

if this is app_armor only, should we guard/gate this code; does it need to run on selinux systems?

> +    # We want to avoid running /sbin/dhclient-script because of side-effects in
> +    # /etc/resolv.conf any any other vendor specific scripts in
> +    # /etc/dhcp/dhclient*hooks.d.
> +    dhclient_cmd = os.path.join(cleandir, 'dhclient')
> +    util.copy(dhclient_script, dhclient_cmd)
> +    pid_file = os.path.join(cleandir, 'dhclient.pid')
> +    lease_file = os.path.join(cleandir, 'dhcp.leases')
> +
> +    # dhclient needs the interface up to send initial discovery packets.

Maybe specify ISC dhclient; it's possible other dhclients do something different.

> +    # Generally dhclient relies on dhclient-script PREINIT action to bring the
> +    # link up before attempting discovery. Since we are using -sf /bin/true,
> +    # we need to do that "link up" ourselves first.
> +    util.subp(['ip', 'link', 'set', 'dev', interface, 'up'], capture=True)
> +    dhclient_cmd = [dhclient_cmd, '-1', '-v', '-lf', lease_file, '-pf',
> +                    pid_file, interface, '-sf', '/bin/true']

Is there a fallback besides /bin/true ?  Should we check for /bin/true before running?  /bin/sh -c true could work as well?

> +    util.subp(dhclient_cmd, capture=True)
> +    return parse_dhcp_lease_file(lease_file)
> +
> +
> +# vi: ts=4 expandtab
> diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py
> new file mode 100644
> index 0000000..eee2dd9
> --- /dev/null
> +++ b/cloudinit/net/tests/test_dhcp.py
> @@ -0,0 +1,142 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +import mock
> +import os
> +from textwrap import dedent
> +
> +from cloudinit.net.dhcp import (
> +    InvalidDHCPLeaseFileError, maybe_dhcp_clean_discovery,
> +    parse_dhcp_lease_file, dhcp_clean_discovery)
> +from cloudinit.util import ensure_file, write_file
> +from tests.unittests.helpers import CiTestCase
> +
> +
> +class TestParseDHCPLeasesFile(CiTestCase):
> +
> +    def test_parse_empty_lease_file_errors(self):
> +        """parse_dhcp_lease_file erros when file content is empty."""

s/erros/errors/g

> +        empty_file = self.tmp_path('leases')
> +        ensure_file(empty_file)
> +        with self.assertRaises(InvalidDHCPLeaseFileError) as context_manager:
> +            parse_dhcp_lease_file(empty_file)
> +        error = context_manager.exception
> +        self.assertIn('Cannot parse empty dhcp lease file', str(error))
> +
> +    def test_parse_malformed_lease_file_content_errors(self):
> +        """parse_dhcp_lease_file erros when file content isn't dhcp leases."""
> +        non_lease_file = self.tmp_path('leases')
> +        write_file(non_lease_file, 'hi mom.')
> +        with self.assertRaises(InvalidDHCPLeaseFileError) as context_manager:
> +            parse_dhcp_lease_file(non_lease_file)
> +        error = context_manager.exception
> +        self.assertIn('Cannot parse dhcp lease file', str(error))
> +
> +    def test_parse_multiple_leases(self):
> +        """parse_dhcp_lease_file returns a list of all leases within."""
> +        lease_file = self.tmp_path('leases')
> +        content = dedent("""
> +            lease {
> +              interface "wlp3s0";
> +              fixed-address 192.168.2.74;
> +              option subnet-mask 255.255.255.0;
> +              option routers 192.168.2.1;
> +              renew 4 2017/07/27 18:02:30;
> +              expire 5 2017/07/28 07:08:15;
> +            }
> +            lease {
> +              interface "wlp3s0";
> +              fixed-address 192.168.2.74;
> +              option subnet-mask 255.255.255.0;
> +              option routers 192.168.2.1;
> +            }
> +        """)
> +        expected = [
> +            {'interface': 'wlp3s0', 'fixed-address': '192.168.2.74',
> +             'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1',
> +             'renew': '4 2017/07/27 18:02:30',
> +             'expire': '5 2017/07/28 07:08:15'},
> +            {'interface': 'wlp3s0', 'fixed-address': '192.168.2.74',
> +             'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}]
> +        write_file(lease_file, content)
> +        self.assertItemsEqual(expected, parse_dhcp_lease_file(lease_file))
> +
> +
> +class TestDHCPDiscoveryClean(CiTestCase):
> +    with_logs = True
> +
> +    @mock.patch('cloudinit.net.dhcp.find_fallback_nic')
> +    def test_no_fallback_nic_found(self, m_fallback_nic):
> +        """Log and do nothing when nic is absent and no fallback is found."""
> +        m_fallback_nic.return_value = None  # No fallback nic found
> +        self.assertEqual({}, maybe_dhcp_clean_discovery())
> +        self.assertIn(
> +            'Skip dhcp_clean_discovery: Unable to find network interface',
> +            self.logs.getvalue())
> +
> +    def test_provided_nic_does_not_exist(self):
> +        """When the provided nic doesn't exist, log a message and no-op."""
> +        self.assertEqual({}, maybe_dhcp_clean_discovery('idontexist'))
> +        self.assertIn(
> +            'Skip dhcp_clean_discovery: Invalid interface name idontexist',
> +            self.logs.getvalue())
> +
> +    @mock.patch('cloudinit.net.dhcp.util.which')
> +    def test_absent_dhclient_command(self, m_which):
> +        """When dhclient doesn't exist in the OS, log the issue and no-op."""
> +        m_which.return_value = None  # dhclient isn't found
> +        tmpdir = self.tmp_dir()
> +        self.assertEqual({}, dhcp_clean_discovery('idontexist', tmpdir))
> +        self.assertIn(
> +            'Skip dhclient configuration: No dhclient found.',
> +            self.logs.getvalue())
> +
> +    @mock.patch('cloudinit.net.dhcp.dhcp_clean_discovery')
> +    @mock.patch('cloudinit.net.dhcp.find_fallback_nic')
> +    def test_dhclient_run_with_tmpdir(self, m_fallback, m_dhcp_discovery):
> +        """maybe_dhcp_clean_discovery passes tmpdir to dhcp_clean_discovery."""
> +        m_fallback.return_value = 'eth9'
> +        m_dhcp_discovery.return_value = {'address': '192.168.2.2'}
> +        self.assertEqual(
> +            {'address': '192.168.2.2'}, maybe_dhcp_clean_discovery())
> +        m_dhcp_discovery.assert_called_once()
> +        call = m_dhcp_discovery.call_args_list[0]
> +        self.assertEqual('eth9', call[0][0])
> +        self.assertIn('/tmp/cloud-init-dhcp-', call[0][1])
> +
> +    @mock.patch('cloudinit.net.dhcp.util.subp')
> +    @mock.patch('cloudinit.net.dhcp.util.which')
> +    def test_dhcp_clean_discovery_run_in_sandbox(self, m_which, m_subp):
> +        """dhcp_clean_discovery brings up the interface and runs dhclient.
> +
> +        It also returns the parsed dhcp.leases file generated in the sandbox.
> +        """
> +        tmpdir = self.tmp_dir()
> +        dhclient_script = os.path.join(tmpdir, 'dhclient.orig')
> +        script_content = '#!/bin/bash\necho fake-dhclient'
> +        write_file(dhclient_script, script_content, mode=0o755)
> +        lease_content = dedent("""
> +            lease {
> +              interface "eth9";
> +              fixed-address 192.168.2.74;
> +              option subnet-mask 255.255.255.0;
> +              option routers 192.168.2.1;
> +            }
> +        """)
> +        lease_file = os.path.join(tmpdir, 'dhcp.leases')
> +        write_file(lease_file, lease_content)
> +        m_which.return_value = dhclient_script  # dhclient command is present
> +        self.assertItemsEqual(
> +            [{'interface': 'eth9', 'fixed-address': '192.168.2.74',
> +              'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}],
> +            dhcp_clean_discovery('eth9', tmpdir))
> +        # dhclient script got copied
> +        with open(os.path.join(tmpdir, 'dhclient')) as stream:
> +            self.assertEqual(script_content, stream.read())
> +        # Interface was brought up before dhclient called from sandbox
> +        m_subp.assert_has_calls([
> +            mock.call(
> +                ['ip', 'link', 'set', 'dev', 'eth9', 'up'], capture=True),
> +            mock.call(
> +                [os.path.join(tmpdir, 'dhclient'), '-1', '-v', '-lf',
> +                 lease_file, '-pf', os.path.join(tmpdir, 'dhclient.pid'),
> +                 'eth9', '-sf', '/bin/true'], capture=True)])
> diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py
> index 4ec9592..ae0fe26 100644
> --- a/cloudinit/sources/DataSourceEc2.py
> +++ b/cloudinit/sources/DataSourceEc2.py
> @@ -21,7 +23,7 @@ from cloudinit import warnings
>  LOG = logging.getLogger(__name__)
>  
>  # Which version we are requesting of the ec2 metadata apis
> -DEF_MD_VERSION = '2009-04-04'
> +DEF_MD_VERSION = '2016-09-02'

What's the implications of changing this value?  Would we leave the old-one around?

>  
>  STRICT_ID_PATH = ("datasource", "Ec2", "strict_id")
>  STRICT_ID_DEFAULT = "warn"
> @@ -73,21 +77,25 @@ class DataSourceEc2(sources.DataSource):
>          elif self.cloud_platform == Platforms.NO_EC2_METADATA:
>              return False
>  
> -        try:
> -            if not self.wait_for_metadata_service():
> +        if self.get_network_metadata:  # Setup networking in init-local stage.
> +            if util.is_FreeBSD():
> +                LOG.debug("FreeBSD doesn't support running dhclient with -sf")

But is there an apparmor or selinux issue?

>                  return False
> -            start_time = time.time()
> -            self.userdata_raw = \
> -                ec2.get_instance_userdata(self.api_ver, self.metadata_address)
> -            self.metadata = ec2.get_instance_metadata(self.api_ver,
> -                                                      self.metadata_address)
> -            LOG.debug("Crawl of metadata service took %.3f seconds",
> -                      time.time() - start_time)
> -            return True
> -        except Exception:
> -            util.logexc(LOG, "Failed reading from metadata address %s",
> -                        self.metadata_address)
> -            return False
> +            dhcp_leases = dhcp.maybe_dhcp_clean_discovery()
> +            if not dhcp_leases:
> +                # DataSourceEc2Local failed in init-local stage. DataSourceEc2
> +                # will still run in init-network stage.
> +                return False
> +            dhcp_opts = dhcp_leases[-1]
> +            net_params = {'interface': dhcp_opts.get('interface'),
> +                          'ip': dhcp_opts.get('fixed-address'),
> +                          'prefix_or_mask': dhcp_opts.get('subnet-mask'),
> +                          'broadcast': dhcp_opts.get('broadcast-address'),
> +                          'router': dhcp_opts.get('routers')}
> +            with net.EphemeralIPv4Network(**net_params):
> +                return self._crawl_metadata()
> +        else:
> +            return self._crawl_metadata()
>  
>      @property
>      def launch_index(self):
> diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py
> index 12230ae..4bb04d2 100644
> --- a/tests/unittests/test_datasource/test_ec2.py
> +++ b/tests/unittests/test_datasource/test_ec2.py
> @@ -123,8 +157,9 @@ class TestEc2(test_helpers.HttprettyTestCase):
>  
>      def setUp(self):
>          super(TestEc2, self).setUp()
> -        self.metadata_addr = ec2.DataSourceEc2.metadata_urls[0]
> -        self.api_ver = '2009-04-04'
> +        self.datasource = ec2.DataSourceEc2
> +        self.metadata_addr = self.datasource.metadata_urls[0]
> +        self.api_ver = '2016-09-02'

Shouldn't this be ec2.DataSourceEc2.DEF_MD_VERSION ?

>  
>      @property
>      def metadata_url(self):


-- 
https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/328241
Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:aws-local-dhcp into cloud-init:master.