cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #03036
Re: [Merge] ~chad.smith/cloud-init:aws-local-dhcp into cloud-init:master
This looks really good. Just some questions and clarifications in inline comments.
Diff comments:
> diff --git a/cloudinit/net/__init__.py b/cloudinit/net/__init__.py
> index 46cb9c8..d38ea8b 100644
> --- a/cloudinit/net/__init__.py
> +++ b/cloudinit/net/__init__.py
> @@ -233,15 +231,24 @@ def generate_fallback_config(blacklist_drivers=None, config_driver=None):
> if DEFAULT_PRIMARY_INTERFACE in names:
> names.remove(DEFAULT_PRIMARY_INTERFACE)
> names.insert(0, DEFAULT_PRIMARY_INTERFACE)
> - target_name = None
> - target_mac = None
> +
> + # pick the first that has a address
Hrm; this seems risky; in that it may return a different answer between subsequent calls if more than one interface is up.
It seems somewhat wasteful to build a list of names with 'address' value so we can sort it but I don't like the variability in
the return value.
> for name in names:
> - mac = read_sys_net_safe(name, 'address')
> - if mac:
> - target_name = name
> - target_mac = mac
> - break
> - if target_mac and target_name:
> + if read_sys_net_safe(name, 'address'):
> + return name
> + return None
> +
> +
> +def generate_fallback_config(blacklist_drivers=None, config_driver=None):
> + """Determine which attached net dev is most likely to have a connection and
> + generate network state to run dhcp on that interface"""
> +
> + if not config_driver:
> + config_driver = False
> +
> + target_name = find_fallback_nic(blacklist_drivers=blacklist_drivers)
> + if target_name:
> + target_mac = read_sys_net_safe(target_name, 'address')
> nconf = {'config': [], 'version': 1}
> cfg = {'type': 'physical', 'name': target_name,
> 'mac_address': target_mac, 'subnets': [{'type': 'dhcp'}]}
> diff --git a/cloudinit/net/dhcp.py b/cloudinit/net/dhcp.py
> new file mode 100644
> index 0000000..4d59bd0
> --- /dev/null
> +++ b/cloudinit/net/dhcp.py
> @@ -0,0 +1,118 @@
> +# Copyright (C) 2017 Canonical Ltd.
> +#
> +# Author: Chad Smith <chad.smith@xxxxxxxxxxxxx>
> +#
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +import logging
> +import os
> +import re
> +
> +from cloudinit.net import find_fallback_nic, get_devicelist
> +from cloudinit import util
> +
> +LOG = logging.getLogger(__name__)
> +
> +
> +class InvalidDHCPLeaseFileError(Exception):
> + """Raised when parsing an empty or invalid dhcp.leases file.
> +
> + Current uses are DataSourceAzure and DataSourceEc2 during ephemeral
> + boot to scrape metadata.
> + """
> + pass
> +
> +
> +def maybe_dhcp_clean_discovery(nic=None):
> + """Create a temporary working directory and find the nic if needed.
The function name and docstring don't seem clear to me. What's the 'clean' part?
It doesn't always create a tmpdir, only if we have a nic and it's not up.
I think the "clean" is an implementation detail we don't need to expose in the function name.
maybe_perform_dhcp_discovery() is't alot better name-wise (I'm happy to help bikeshed), but I think
we can say something like:
Determine if we need to perform a DHCP DISCOVERY operation on ``nic`` if it's not up.
Utilize a tempdir to prevent dhclient from disturbing the host filesystem.
Something like that.
> +
> + If the device is already connected, do nothing.
> +
> + @param nic: Name of the network interface we want to run dhclient on.
> + @return: A dict of dhcp options from the dhclient discovery, otherwise an
> + empty dict.
> + """
> + if nic is None:
> + nic = find_fallback_nic()
> + if nic is None:
> + LOG.debug(
> + 'Skip dhcp_clean_discovery: Unable to find network interface.')
> + return {}
> + elif nic not in get_devicelist():
> + LOG.debug(
> + 'Skip dhcp_clean_discovery: Invalid interface name %s.', nic)
Since this is a debug message, then maybe something like nic '%s' not found in get_devicelist() or system network device list.
> + return {}
> + with util.tempdir(prefix='cloud-init-dhcp-') as tmpdir:
> + return dhcp_clean_discovery(nic, tmpdir)
> +
> +
> +def parse_dhcp_lease_file(lease_file):
> + """Parse the given dhcp lease file for the most recent lease.
> +
> + Return a dict of dhcp options as key value pairs for the most recent lease
> + block.
> +
> + @raises: InvalidDHCPLeaseFileError on empty of unparseable leasefile
> + content.
> + """
> + lease_regex = re.compile(r"lease {(?P<lease>[^}]*)}\n")
> + dhcp_leases = []
> + with open(lease_file) as stream:
> + lease_content = stream.read()
util.load_file()
> + if len(lease_content) == 0:
> + raise InvalidDHCPLeaseFileError(
> + 'Cannot parse empty dhcp lease file {0}'.format(lease_file))
> + for lease in lease_regex.findall(lease_content):
> + lease_options = []
> + for line in lease.split(';'):
> + # Strip newlines, double-quotes and option prefix
> + line = line.strip().replace('"', '').replace('option ', '')
> + if not line:
> + continue
> + lease_options.append(line.split(' ', 1))
> + dhcp_leases.append(dict(lease_options))
> + if not dhcp_leases:
> + raise InvalidDHCPLeaseFileError(
> + 'Cannot parse dhcp lease file {0}. No leases found'.format(
> + lease_file))
> + return dhcp_leases
> +
> +
> +def dhcp_clean_discovery(interface, cleandir):
> + """Start up dhclient on the provided inteface without side-effects.
maybe expand side-effects here even though the comments below have all of the real issues. Something like, avoiding script execution and filesystem modifications.
> +
> + @param interface: Name of the network inteface on which to dhclient.
> + @param cleandir: The directory from which to run dhclient as well as store
> + dhcp leases.
> +
> + @return: A dict of dhcp options parsed from the dhcp.leases file or empty
> + dict.
> + """
> + dhclient_script = util.which('dhclient')
> + if not dhclient_script:
> + LOG.debug('Skip dhclient configuration: No dhclient found.')
> + return {}
> + LOG.debug('Performing a clean dhcp discovery on %s', interface)
> +
> + # XXX We copy dhclient out of /sbin/dhclient to avoid dealing with strict
> + # app armor profiles which disallow running dhclient -sf <our-script-file>.
if this is app_armor only, should we guard/gate this code; does it need to run on selinux systems?
> + # We want to avoid running /sbin/dhclient-script because of side-effects in
> + # /etc/resolv.conf any any other vendor specific scripts in
> + # /etc/dhcp/dhclient*hooks.d.
> + dhclient_cmd = os.path.join(cleandir, 'dhclient')
> + util.copy(dhclient_script, dhclient_cmd)
> + pid_file = os.path.join(cleandir, 'dhclient.pid')
> + lease_file = os.path.join(cleandir, 'dhcp.leases')
> +
> + # dhclient needs the interface up to send initial discovery packets.
Maybe specify ISC dhclient; it's possible other dhclients do something different.
> + # Generally dhclient relies on dhclient-script PREINIT action to bring the
> + # link up before attempting discovery. Since we are using -sf /bin/true,
> + # we need to do that "link up" ourselves first.
> + util.subp(['ip', 'link', 'set', 'dev', interface, 'up'], capture=True)
> + dhclient_cmd = [dhclient_cmd, '-1', '-v', '-lf', lease_file, '-pf',
> + pid_file, interface, '-sf', '/bin/true']
Is there a fallback besides /bin/true ? Should we check for /bin/true before running? /bin/sh -c true could work as well?
> + util.subp(dhclient_cmd, capture=True)
> + return parse_dhcp_lease_file(lease_file)
> +
> +
> +# vi: ts=4 expandtab
> diff --git a/cloudinit/net/tests/test_dhcp.py b/cloudinit/net/tests/test_dhcp.py
> new file mode 100644
> index 0000000..eee2dd9
> --- /dev/null
> +++ b/cloudinit/net/tests/test_dhcp.py
> @@ -0,0 +1,142 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +import mock
> +import os
> +from textwrap import dedent
> +
> +from cloudinit.net.dhcp import (
> + InvalidDHCPLeaseFileError, maybe_dhcp_clean_discovery,
> + parse_dhcp_lease_file, dhcp_clean_discovery)
> +from cloudinit.util import ensure_file, write_file
> +from tests.unittests.helpers import CiTestCase
> +
> +
> +class TestParseDHCPLeasesFile(CiTestCase):
> +
> + def test_parse_empty_lease_file_errors(self):
> + """parse_dhcp_lease_file erros when file content is empty."""
s/erros/errors/g
> + empty_file = self.tmp_path('leases')
> + ensure_file(empty_file)
> + with self.assertRaises(InvalidDHCPLeaseFileError) as context_manager:
> + parse_dhcp_lease_file(empty_file)
> + error = context_manager.exception
> + self.assertIn('Cannot parse empty dhcp lease file', str(error))
> +
> + def test_parse_malformed_lease_file_content_errors(self):
> + """parse_dhcp_lease_file erros when file content isn't dhcp leases."""
> + non_lease_file = self.tmp_path('leases')
> + write_file(non_lease_file, 'hi mom.')
> + with self.assertRaises(InvalidDHCPLeaseFileError) as context_manager:
> + parse_dhcp_lease_file(non_lease_file)
> + error = context_manager.exception
> + self.assertIn('Cannot parse dhcp lease file', str(error))
> +
> + def test_parse_multiple_leases(self):
> + """parse_dhcp_lease_file returns a list of all leases within."""
> + lease_file = self.tmp_path('leases')
> + content = dedent("""
> + lease {
> + interface "wlp3s0";
> + fixed-address 192.168.2.74;
> + option subnet-mask 255.255.255.0;
> + option routers 192.168.2.1;
> + renew 4 2017/07/27 18:02:30;
> + expire 5 2017/07/28 07:08:15;
> + }
> + lease {
> + interface "wlp3s0";
> + fixed-address 192.168.2.74;
> + option subnet-mask 255.255.255.0;
> + option routers 192.168.2.1;
> + }
> + """)
> + expected = [
> + {'interface': 'wlp3s0', 'fixed-address': '192.168.2.74',
> + 'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1',
> + 'renew': '4 2017/07/27 18:02:30',
> + 'expire': '5 2017/07/28 07:08:15'},
> + {'interface': 'wlp3s0', 'fixed-address': '192.168.2.74',
> + 'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}]
> + write_file(lease_file, content)
> + self.assertItemsEqual(expected, parse_dhcp_lease_file(lease_file))
> +
> +
> +class TestDHCPDiscoveryClean(CiTestCase):
> + with_logs = True
> +
> + @mock.patch('cloudinit.net.dhcp.find_fallback_nic')
> + def test_no_fallback_nic_found(self, m_fallback_nic):
> + """Log and do nothing when nic is absent and no fallback is found."""
> + m_fallback_nic.return_value = None # No fallback nic found
> + self.assertEqual({}, maybe_dhcp_clean_discovery())
> + self.assertIn(
> + 'Skip dhcp_clean_discovery: Unable to find network interface',
> + self.logs.getvalue())
> +
> + def test_provided_nic_does_not_exist(self):
> + """When the provided nic doesn't exist, log a message and no-op."""
> + self.assertEqual({}, maybe_dhcp_clean_discovery('idontexist'))
> + self.assertIn(
> + 'Skip dhcp_clean_discovery: Invalid interface name idontexist',
> + self.logs.getvalue())
> +
> + @mock.patch('cloudinit.net.dhcp.util.which')
> + def test_absent_dhclient_command(self, m_which):
> + """When dhclient doesn't exist in the OS, log the issue and no-op."""
> + m_which.return_value = None # dhclient isn't found
> + tmpdir = self.tmp_dir()
> + self.assertEqual({}, dhcp_clean_discovery('idontexist', tmpdir))
> + self.assertIn(
> + 'Skip dhclient configuration: No dhclient found.',
> + self.logs.getvalue())
> +
> + @mock.patch('cloudinit.net.dhcp.dhcp_clean_discovery')
> + @mock.patch('cloudinit.net.dhcp.find_fallback_nic')
> + def test_dhclient_run_with_tmpdir(self, m_fallback, m_dhcp_discovery):
> + """maybe_dhcp_clean_discovery passes tmpdir to dhcp_clean_discovery."""
> + m_fallback.return_value = 'eth9'
> + m_dhcp_discovery.return_value = {'address': '192.168.2.2'}
> + self.assertEqual(
> + {'address': '192.168.2.2'}, maybe_dhcp_clean_discovery())
> + m_dhcp_discovery.assert_called_once()
> + call = m_dhcp_discovery.call_args_list[0]
> + self.assertEqual('eth9', call[0][0])
> + self.assertIn('/tmp/cloud-init-dhcp-', call[0][1])
> +
> + @mock.patch('cloudinit.net.dhcp.util.subp')
> + @mock.patch('cloudinit.net.dhcp.util.which')
> + def test_dhcp_clean_discovery_run_in_sandbox(self, m_which, m_subp):
> + """dhcp_clean_discovery brings up the interface and runs dhclient.
> +
> + It also returns the parsed dhcp.leases file generated in the sandbox.
> + """
> + tmpdir = self.tmp_dir()
> + dhclient_script = os.path.join(tmpdir, 'dhclient.orig')
> + script_content = '#!/bin/bash\necho fake-dhclient'
> + write_file(dhclient_script, script_content, mode=0o755)
> + lease_content = dedent("""
> + lease {
> + interface "eth9";
> + fixed-address 192.168.2.74;
> + option subnet-mask 255.255.255.0;
> + option routers 192.168.2.1;
> + }
> + """)
> + lease_file = os.path.join(tmpdir, 'dhcp.leases')
> + write_file(lease_file, lease_content)
> + m_which.return_value = dhclient_script # dhclient command is present
> + self.assertItemsEqual(
> + [{'interface': 'eth9', 'fixed-address': '192.168.2.74',
> + 'subnet-mask': '255.255.255.0', 'routers': '192.168.2.1'}],
> + dhcp_clean_discovery('eth9', tmpdir))
> + # dhclient script got copied
> + with open(os.path.join(tmpdir, 'dhclient')) as stream:
> + self.assertEqual(script_content, stream.read())
> + # Interface was brought up before dhclient called from sandbox
> + m_subp.assert_has_calls([
> + mock.call(
> + ['ip', 'link', 'set', 'dev', 'eth9', 'up'], capture=True),
> + mock.call(
> + [os.path.join(tmpdir, 'dhclient'), '-1', '-v', '-lf',
> + lease_file, '-pf', os.path.join(tmpdir, 'dhclient.pid'),
> + 'eth9', '-sf', '/bin/true'], capture=True)])
> diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py
> index 4ec9592..ae0fe26 100644
> --- a/cloudinit/sources/DataSourceEc2.py
> +++ b/cloudinit/sources/DataSourceEc2.py
> @@ -21,7 +23,7 @@ from cloudinit import warnings
> LOG = logging.getLogger(__name__)
>
> # Which version we are requesting of the ec2 metadata apis
> -DEF_MD_VERSION = '2009-04-04'
> +DEF_MD_VERSION = '2016-09-02'
What's the implications of changing this value? Would we leave the old-one around?
>
> STRICT_ID_PATH = ("datasource", "Ec2", "strict_id")
> STRICT_ID_DEFAULT = "warn"
> @@ -73,21 +77,25 @@ class DataSourceEc2(sources.DataSource):
> elif self.cloud_platform == Platforms.NO_EC2_METADATA:
> return False
>
> - try:
> - if not self.wait_for_metadata_service():
> + if self.get_network_metadata: # Setup networking in init-local stage.
> + if util.is_FreeBSD():
> + LOG.debug("FreeBSD doesn't support running dhclient with -sf")
But is there an apparmor or selinux issue?
> return False
> - start_time = time.time()
> - self.userdata_raw = \
> - ec2.get_instance_userdata(self.api_ver, self.metadata_address)
> - self.metadata = ec2.get_instance_metadata(self.api_ver,
> - self.metadata_address)
> - LOG.debug("Crawl of metadata service took %.3f seconds",
> - time.time() - start_time)
> - return True
> - except Exception:
> - util.logexc(LOG, "Failed reading from metadata address %s",
> - self.metadata_address)
> - return False
> + dhcp_leases = dhcp.maybe_dhcp_clean_discovery()
> + if not dhcp_leases:
> + # DataSourceEc2Local failed in init-local stage. DataSourceEc2
> + # will still run in init-network stage.
> + return False
> + dhcp_opts = dhcp_leases[-1]
> + net_params = {'interface': dhcp_opts.get('interface'),
> + 'ip': dhcp_opts.get('fixed-address'),
> + 'prefix_or_mask': dhcp_opts.get('subnet-mask'),
> + 'broadcast': dhcp_opts.get('broadcast-address'),
> + 'router': dhcp_opts.get('routers')}
> + with net.EphemeralIPv4Network(**net_params):
> + return self._crawl_metadata()
> + else:
> + return self._crawl_metadata()
>
> @property
> def launch_index(self):
> diff --git a/tests/unittests/test_datasource/test_ec2.py b/tests/unittests/test_datasource/test_ec2.py
> index 12230ae..4bb04d2 100644
> --- a/tests/unittests/test_datasource/test_ec2.py
> +++ b/tests/unittests/test_datasource/test_ec2.py
> @@ -123,8 +157,9 @@ class TestEc2(test_helpers.HttprettyTestCase):
>
> def setUp(self):
> super(TestEc2, self).setUp()
> - self.metadata_addr = ec2.DataSourceEc2.metadata_urls[0]
> - self.api_ver = '2009-04-04'
> + self.datasource = ec2.DataSourceEc2
> + self.metadata_addr = self.datasource.metadata_urls[0]
> + self.api_ver = '2016-09-02'
Shouldn't this be ec2.DataSourceEc2.DEF_MD_VERSION ?
>
> @property
> def metadata_url(self):
--
https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/328241
Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:aws-local-dhcp into cloud-init:master.