cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #05158
Re: [Merge] ~chad.smith/cloud-init:feature/azure-network-on-boot into cloud-init:master
This is coming together.
Diff comments:
> diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
> index 7007d9e..87f15b2 100644
> --- a/cloudinit/sources/DataSourceAzure.py
> +++ b/cloudinit/sources/DataSourceAzure.py
> @@ -252,6 +263,10 @@ class DataSourceAzure(sources.DataSource):
>
> dsname = 'Azure'
> _negotiated = False
> + _metadata_imds = sources.UNSET
> +
> + # Regenerate network config new_instance boot and every boot
> + update_events = {'network': [EventType.BOOT_NEW_INSTANCE, EventType.BOOT]}
The prototype yaml included a version and 'when' as the attribute which accepted the list of events.
Is this meant to model that?
updates:
policy-version: [1] # default to 1
network:
when: [never, per-instance, boot, boot-change, always]
watch-url: http://..../
storage:
when:
>
> def __init__(self, sys_cfg, distro, paths):
> sources.DataSource.__init__(self, sys_cfg, distro, paths)
> @@ -373,41 +390,86 @@ class DataSourceAzure(sources.DataSource):
> except NonAzureDataSource:
> continue
> except BrokenAzureDataSource as exc:
> - raise exc
> + msg = 'BrokenAzureDataSource: %s' % exc
> + raise sources.InvalidMetaDataException(msg)
> except util.MountFailedError:
> LOG.warning("%s was not mountable", cdev)
> continue
>
> if reprovision or self._should_reprovision(ret):
> ret = self._reprovision()
> - (md, self.userdata_raw, cfg, files) = ret
> + imds_md = get_metadata_from_imds(
> + self.fallback_interface, retries=3)
> + (md, userdata_raw, cfg, files) = ret
> self.seed = cdev
> - self.metadata = util.mergemanydict([md, DEFAULT_METADATA])
> - self.cfg = util.mergemanydict([cfg, BUILTIN_CLOUD_CONFIG])
> + crawled_data.update({
> + 'cfg': cfg,
> + 'files': files,
> + 'metadata': util.mergemanydict(
> + [md, {'imds': imds_md}]),
> + 'userdata_raw': userdata_raw})
> found = cdev
>
> LOG.debug("found datasource in %s", cdev)
> break
>
> if not found:
> - return False
> + raise sources.InvalidMetaDataException('No Azure metadata found')
>
> if found == ddir:
> LOG.debug("using files cached in %s", ddir)
>
> seed = _get_random_seed()
> if seed:
> - self.metadata['random_seed'] = seed
> + crawled_data['metadata']['random_seed'] = seed
> + crawled_data['metadata']['instance-id'] = util.read_dmi_data(
> + 'system-uuid')
> + return crawled_data
> +
> + def is_platform_viable(self):
check_platform_id() ? we have a check_instance_id()
> + """Check platform environment to report if this datasource may run."""
> + asset_tag = util.read_dmi_data('chassis-asset-tag')
> + if asset_tag != AZURE_CHASSIS_ASSET_TAG:
> + LOG.debug("Non-Azure DMI asset tag '%s' discovered.", asset_tag)
> + return False
> + return True
> +
> + def clear_cached_attrs(self):
> + """Reset any cached class attributes to defaults."""
> + super(DataSourceAzure, self).clear_cached_attrs()
> + self._metadata_imds = sources.UNSET
> +
Do you think we should deprecate the get_data property() so callers can be notified to change?
That is, I think we want to stop using only get_data() since we're breaking this into the various
stages (crawl, process, cache) right?
> + def _get_data(self):
> + """Crawl and process datasource metadata caching metadata as attrs.
> +
> + @return: True on success, False on error, invalid or disabled
> + datasource.
> + """
> + if not self.is_platform_viable():
> + return False
> + try:
> + crawled_data = util.log_time(
> + logfunc=LOG.debug, msg='Crawl of metadata service',
> + func=self.crawl_metadata)
> + except sources.InvalidMetaDataException as e:
> + LOG.warning('Could not crawl Azure metadata: %s', e)
> + return False
> +
> + # Process crawled data and augment with various config defaults
This looks like it should be a class function passing in the crawled data right?
> + self.cfg = util.mergemanydict(
> + [crawled_data['cfg'], BUILTIN_CLOUD_CONFIG])
> + self._metadata_imds = crawled_data['metadata']['imds']
> + self.metadata = util.mergemanydict(
> + [crawled_data['metadata'], DEFAULT_METADATA])
> + self.userdata_raw = crawled_data['userdata_raw']
>
> user_ds_cfg = util.get_cfg_by_path(self.cfg, DS_CFG_PATH, {})
> self.ds_cfg = util.mergemanydict([user_ds_cfg, self.ds_cfg])
>
> # walinux agent writes files world readable, but expects
> # the directory to be protected.
> - write_files(ddir, files, dirmode=0o700)
> -
> - self.metadata['instance-id'] = util.read_dmi_data('system-uuid')
> -
> + write_files(
> + self.ds_cfg['data_dir'], crawled_data['files'], dirmode=0o700)
> return True
>
> def device_name_to_device(self, name):
> @@ -543,22 +605,56 @@ class DataSourceAzure(sources.DataSource):
> @property
> def network_config(self):
> """Generate a network config like net.generate_fallback_network() with
> - the following execptions.
> + the following execeptions.
heh, exceptions
>
> 1. Probe the drivers of the net-devices present and inject them in
> the network configuration under params: driver: <driver> value
> 2. Generate a fallback network config that does not include any of
> the blacklisted devices.
> """
> - blacklist = ['mlx4_core']
> if not self._network_config:
> - LOG.debug('Azure: generating fallback configuration')
> - # generate a network config, blacklist picking any mlx4_core devs
> - netconfig = net.generate_fallback_config(
> - blacklist_drivers=blacklist, config_driver=True)
> + if self._metadata_imds != sources.UNSET and self._metadata_imds:
> + netconfig = {'version': 2, 'ethernets': {}}
This needs to be a function for easier testing.
> + LOG.debug('Azure: generating network configuration from IMDS')
> + if self.distro and self.distro.name == 'ubuntu':
> + remove_ubuntu_network_config_scripts()
> + network_metadata = self._metadata_imds['network']
> + for idx, intf in enumerate(network_metadata['interface']):
> + nicname = 'eth{idx}'.format(idx=idx)
> + dev_config = {}
> + for addr4 in intf['ipv4']['ipAddress']:
> + privateIpv4 = addr4['privateIpAddress']
> + if privateIpv4:
> + if dev_config.get('dhcp4', False):
> + # Append static address config for nic > 1
> + netPrefix = intf['ipv4']['subnet'][0].get(
> + 'prefix', '24')
> + if not dev_config.get('addresses'):
> + dev_config['addresses'] = []
> + dev_config['addresses'].append(
> + '{ip}/{prefix}'.format(
> + ip=privateIpv4, prefix=netPrefix))
> + else:
> + dev_config['dhcp4'] = True
> + for addr6 in intf['ipv6']['ipAddress']:
> + privateIpv6 = addr6['privateIpAddress']
> + if privateIpv6:
> + dev_config['dhcp6'] = True
> + break
> + if dev_config:
> + mac = ':'.join(re.findall(r'..', intf['macAddress']))
> + dev_config.update(
> + {'match': {'macaddress': mac.lower()},
> + 'set-name': nicname})
> + netconfig['ethernets'][nicname] = dev_config
> + else:
> + blacklist = ['mlx4_core']
> + LOG.debug('Azure: generating fallback configuration')
> + # generate a network config, blacklist picking mlx4_core devs
> + netconfig = net.generate_fallback_config(
> + blacklist_drivers=blacklist, config_driver=True)
>
> self._network_config = netconfig
> -
> return self._network_config
>
>
> @@ -1025,6 +1121,89 @@ def load_azure_ds_dir(source_dir):
> return (md, ud, cfg, {'ovf-env.xml': contents})
>
>
> +def get_metadata_from_imds(fallback_nic, retries):
> + """Query Azure's network metadata service, returning a dictionary.
> +
> + If network is not up, setup ephemeral dhcp on fallback_nic to talk to the
> + IMDS. For more info on IMDS:
> + https://docs.microsoft.com/en-us/azure/virtual-machines/windows/instance-metadata-service
> +
> + @param fallback_nic: String. The name of the nic which requires active
> + networ in order to query IMDS.
> + @param retries: The number of retries of the IMDS_URL.
> +
> + @return: A dict of instance metadata containing compute and network
> + info.
> + """
> + if net.is_up(fallback_nic):
> + return util.log_time(
> + logfunc=LOG.debug,
> + msg='Crawl of Azure Instance Metadata Service (IMDS)',
> + func=_get_metadata_from_imds, args=(retries,))
> + else:
> + with EphemeralDHCPv4(fallback_nic):
> + return util.log_time(
> + logfunc=LOG.debug,
> + msg='Crawl of Azure Instance Metadata Service (IMDS)',
> + func=_get_metadata_from_imds, args=(retries,))
> +
> +
> +def _get_metadata_from_imds(retries):
> +
> + def retry_on_url_error(msg, exception):
> + if isinstance(exception, UrlError) and exception.code == 404:
> + return True # Continue retries
> + return False # Stop retries on all other exceptions, including 404s
> +
> + url = IMDS_URL + "instance?api-version=2017-12-01"
I see at least two api version values, can we centralize these into variables that
explain what the data means or the expected format of this version?
> + headers = {"Metadata": "true"}
> + try:
> + response = readurl(
> + url, timeout=1, headers=headers, retries=retries,
timeout=1 ? Is the default timeout too long? to short?
> + exception_cb=retry_on_url_error)
> + except Exception as e:
> + LOG.debug('Ignoring IMDS instance metadata: %s', e)
> + return {}
> + try:
> + return util.load_json(str(response))
> + except json.decoder.JSONDecodeError:
> + LOG.warning(
> + 'Ignoring non-json IMDS instance metadata: %s', str(response))
> + return {}
> +
> +
> +def remove_ubuntu_network_config_scripts(paths=None):
We typically prepend maybe_ since it's possible that in new images we don't need to do this.
Would it be useful to mark a flag on the ds object to indicate whether we've removed those or not?
> + """Remove Azure-specific ubuntu network config for non-primary nics.
> +
> + @param paths: List of networking scripts or directories to remove when
> + present.
> +
> + In certain supported ubuntu images, static udev rules or netplan yaml
> + config is delivered in the base ubuntu image to support dhcp on any
> + additional interfaces which get attached by a customer at some point
> + after initial boot. Since the Azure datasource can now regenerate
> + network configuration as metadata reports these new devices, we no longer
> + want the udev rules or netplan's 90-azure-hotplug.yaml to configure
> + networking on eth1 or greater as it might collide with cloud-init's
> + configuration.
> +
> + Remove the any existing extended network scripts if the datasource is
> + enabled to write network per-boot.
> + """
> + if not paths:
> + paths = UBUNTU_EXTENDED_NETWORK_SCRIPTS
> + LOG.debug(
> + 'Removing Ubuntu extended network scripts because cloud-init updates'
> + ' Azure network configuration on the following event: %s.',
> + EventType.BOOT)
> + for path in paths:
> + if os.path.exists(path):
> + if os.path.isdir(path):
> + util.del_dir(path)
> + else:
> + util.del_file(path)
> +
> +
> class BrokenAzureDataSource(Exception):
> pass
>
> diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
> index e82716e..fbc9d2d 100644
> --- a/tests/unittests/test_datasource/test_azure.py
> +++ b/tests/unittests/test_datasource/test_azure.py
> @@ -77,9 +83,110 @@ def construct_valid_ovf_env(data=None, pubkeys=None,
> return content
>
>
> +NETWORK_METADATA = {
What URL path and api-version is this?
> + "network": {
> + "interface": [
> + {
> + "macAddress": "000D3A047598",
> + "ipv6": {
> + "ipAddress": []
> + },
> + "ipv4": {
> + "subnet": [
> + {
> + "prefix": "24",
> + "address": "10.0.0.0"
> + }
> + ],
> + "ipAddress": [
> + {
> + "privateIpAddress": "10.0.0.4",
> + "publicIpAddress": "104.46.124.81"
> + }
> + ]
> + }
> + }
> + ]
> + }
> +}
> +
> +
> +class TestGetMetadataFromIMDS(HttprettyTestCase):
> +
> + with_logs = True
> +
> + def setUp(self):
> + super(TestGetMetadataFromIMDS, self).setUp()
> + self.network_md_url = dsaz.IMDS_URL + "instance?api-version=2017-12-01"
> +
> + @mock.patch('cloudinit.sources.DataSourceAzure.readurl')
> + @mock.patch('cloudinit.sources.DataSourceAzure.EphemeralDHCPv4')
> + @mock.patch('cloudinit.sources.DataSourceAzure.net.is_up')
> + def test_get_metadata_does_not_dhcp_if_network_is_up(
> + self, m_net_is_up, m_dhcp, m_readurl):
> + """Do not perform DHCP setup when nic is already up."""
> + m_net_is_up.return_value = True
> + m_readurl.return_value = url_helper.StringResponse(
> + json.dumps(NETWORK_METADATA).encode('utf-8'))
> + self.assertEqual(
> + NETWORK_METADATA,
> + dsaz.get_metadata_from_imds('eth9', retries=3))
> +
> + m_net_is_up.assert_called_with('eth9')
> + m_dhcp.assert_not_called()
> + self.assertIn(
> + "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time
> + self.logs.getvalue())
> +
> + @mock.patch('cloudinit.sources.DataSourceAzure.readurl')
> + @mock.patch('cloudinit.sources.DataSourceAzure.EphemeralDHCPv4')
> + @mock.patch('cloudinit.sources.DataSourceAzure.net.is_up')
> + def test_get_metadata_performs_dhcp_when_network_is_down(
> + self, m_net_is_up, m_dhcp, m_readurl):
> + """Do not perform DHCP setup when nic is already up."""
comment doesn't match test-case.
> + m_net_is_up.return_value = False
> + m_readurl.return_value = url_helper.StringResponse(
> + json.dumps(NETWORK_METADATA).encode('utf-8'))
> +
> + self.assertEqual(
> + NETWORK_METADATA,
> + dsaz.get_metadata_from_imds('eth9', retries=2))
> +
> + m_net_is_up.assert_called_with('eth9')
> + m_dhcp.assert_called_with('eth9')
> + self.assertIn(
> + "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time
> + self.logs.getvalue())
> +
> + m_readurl.assert_called_with(
> + self.network_md_url, exception_cb=mock.ANY,
> + headers={'Metadata': 'true'}, retries=2, timeout=1)
> +
> + @mock.patch('cloudinit.url_helper.time.sleep')
> + @mock.patch('cloudinit.sources.DataSourceAzure.net.is_up')
> + def test_get_metadata_from_imds_empty_when_no_imds_present(
> + self, m_net_is_up, m_sleep):
> + """Return empty dict when IMDS network metadata is absent."""
> + httpretty.register_uri(
> + httpretty.GET,
> + dsaz.IMDS_URL + 'instance?api-version=2017-12-01',
> + body={}, status=404)
> +
> + m_net_is_up.return_value = True # skips dhcp
> +
> + self.assertEqual({}, dsaz.get_metadata_from_imds('eth9', retries=2))
> +
> + m_net_is_up.assert_called_with('eth9')
> + self.assertEqual([mock.call(1), mock.call(1)], m_sleep.call_args_list)
> + self.assertIn(
> + "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time
> + self.logs.getvalue())
> +
> +
> class TestAzureDataSource(CiTestCase):
>
> with_logs = True
> + maxDiff = None
Should this go in CiTestCase by default?
>
> def setUp(self):
> super(TestAzureDataSource, self).setUp()
> @@ -1188,6 +1430,24 @@ class TestCanDevBeReformatted(CiTestCase):
> "(datasource.Azure.never_destroy_ntfs)", msg)
>
>
> +class TestClearCachedData(CiTestCase):
> +
> + def test_clear_cached_attrs_clears_imds(self):
> + """All class attributes are reset to defaults, including imds data."""
> + tmp = self.tmp_dir()
> + paths = helpers.Paths(
> + {'cloud_dir': tmp, 'run_dir': tmp})
> + dsrc = dsaz.DataSourceAzure({}, distro=None, paths=paths)
> + dsrc.metadata = 'md'
> + dsrc.userdata = 'ud'
> + dsrc._metadata_imds = 'imds'
> + dsrc._dirty_cache = True
> + dsrc.clear_cached_attrs()
> + self.assertEqual({}, dsrc.metadata)
> + self.assertEqual(None, dsrc.userdata)
> + self.assertEqual(UNSET, dsrc._metadata_imds)
I'm not super happy that we have *three* different values as the default.
I'd like to not have to know this in the unittest.
Can we instantiate a new ds instance. deep_copy it, and then assert
the 3 fields are equal after clear_cache() instead?
> +
> +
> class TestAzureNetExists(CiTestCase):
>
> def test_azure_net_must_exist_for_legacy_objpkl(self):
> @@ -1398,4 +1658,52 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
> self.assertEqual(m_net.call_count, 1)
>
>
> +class TestRemoveUbuntuNetworkConfigScripts(CiTestCase):
> +
> + with_logs = True
> +
> + def setUp(self):
> + super(TestRemoveUbuntuNetworkConfigScripts, self).setUp()
> + self.tmp = self.tmp_dir()
> +
> + def test_remove_network_scripts_removes_both_files_and_directories(self):
> + """Any files or directories in paths are removed when present."""
> + file1 = self.tmp_path('file1', dir=self.tmp)
> + subdir = self.tmp_path('sub1', dir=self.tmp)
> + subfile = self.tmp_path('leaf1', dir=subdir)
> + write_file(file1, 'file1content')
Can we instead mock the os.listdirs os.path.exists?
> + write_file(subfile, 'leafcontent')
> + dsaz.remove_ubuntu_network_config_scripts(paths=[subdir, file1])
> +
> + for path in (file1, subdir, subfile):
> + self.assertFalse(os.path.exists(path),
> + 'Found unremoved: %s' % path)
> +
> + expected_logs = [
> + 'Removing Ubuntu extended network scripts because cloud-init'
> + ' updates Azure network configuration on the following event:'
> + ' System boot.',
> + 'Recursively deleting %s' % subdir,
> + 'Attempting to remove %s' % file1]
> + for log in expected_logs:
> + self.assertIn(log, self.logs.getvalue())
> +
> + def test_remove_network_scripts_only_attemts_removal_if_path_exists(self):
> + """Any files or directories absent are skipped without error."""
> + dsaz.remove_ubuntu_network_config_scripts(paths=[
> + '/not/a/dir/', '/not/a/file'])
> + self.assertNotIn('/not/a', self.logs.getvalue()) # No delete logs
> +
> + @mock.patch('cloudinit.sources.DataSourceAzure.os.path.exists')
> + def test_remove_network_scripts_default_removes_stock_scripts(self,
> + m_exists):
> + """Azure's stock ubuntu image scripts and artifacts are removed."""
> + # Report path absent on all to avoid delete operation
> + m_exists.return_value = False
> + dsaz.remove_ubuntu_network_config_scripts()
> + calls = m_exists.call_args_list
> + for path in dsaz.UBUNTU_EXTENDED_NETWORK_SCRIPTS:
> + self.assertIn(mock.call(path), calls)
> +
> +
> # vi: ts=4 expandtab
--
https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/348704
Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:feature/azure-network-on-boot into cloud-init:master.
References