← Back to team overview

cloud-init-dev team mailing list archive

[Merge] ~chad.smith/cloud-init:feature/azure-network-per-boot into cloud-init:master

 

Chad Smith has proposed merging ~chad.smith/cloud-init:feature/azure-network-per-boot into cloud-init:master.

Commit message:
azure: allow azure to generate network configurati from IMDS per boot

Azure datasource now queries IMDS metadata service for network
configuration at link local address
http://169.254.169.254/metadata/instance?api-version=2017-12-01. The
azure metadata service presents a list of macs and allocated ip addresses
associated with this instance. Azure will now also regenerate network
configuration on every boot because it subscribes to EventType.BOOT
maintenance events as well as the 'first boot'
EventType.BOOT_NEW_INSTANCE.

For testing add azure-imds --kind to cloud-init devel net_convert tool
for debugging IMDS metadata.

Also refactor _get_data into 3 discrete methods:
  - is_platform_viable: check quickly whether the datasource is
    potentially compatible with the platform on which is is running
  - crawl_metadata: walk all potential metadata candidates, returning a
    structured dict of all metadata and userdata. Raise InvalidMetaData on
    error.
  - _get_data: call crawl_metadata and process results or error. Cache
    instance data on class attributes: metadata, userdata_raw etc.

Requested reviews:
  cloud-init commiters (cloud-init-dev)

For more details, see:
https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/352660
-- 
Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:feature/azure-network-per-boot into cloud-init:master.
diff --git a/cloudinit/cmd/devel/net_convert.py b/cloudinit/cmd/devel/net_convert.py
index 1ec08a3..271dc5e 100755
--- a/cloudinit/cmd/devel/net_convert.py
+++ b/cloudinit/cmd/devel/net_convert.py
@@ -8,6 +8,7 @@ import sys
 import yaml
 
 from cloudinit.sources.helpers import openstack
+from cloudinit.sources import DataSourceAzure as azure
 
 from cloudinit.net import eni, netplan, network_state, sysconfig
 from cloudinit import log
@@ -28,7 +29,8 @@ def get_parser(parser=None):
     parser.add_argument("-p", "--network-data", type=open,
                         metavar="PATH", required=True)
     parser.add_argument("-k", "--kind",
-                        choices=['eni', 'network_data.json', 'yaml'],
+                        choices=['eni', 'network_data.json', 'yaml',
+                                 'azure-imds'],
                         required=True)
     parser.add_argument("-d", "--directory",
                         metavar="PATH",
@@ -78,10 +80,13 @@ def handle_args(name, args):
                 ["Input YAML",
                  yaml.dump(pre_ns, default_flow_style=False, indent=4), ""]))
         ns = network_state.parse_net_config_data(pre_ns)
-    else:
+    elif args.kind == 'network_data.json':
         pre_ns = openstack.convert_net_json(
             json.loads(net_data), known_macs=known_macs)
         ns = network_state.parse_net_config_data(pre_ns)
+    elif args.kind == 'azure-imds':
+        pre_ns = azure.parse_network_config(json.loads(net_data))
+        ns = network_state.parse_net_config_data(pre_ns)
 
     if not ns:
         raise RuntimeError("No valid network_state object created from"
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index 7007d9e..d8170c3 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -8,6 +8,7 @@ import base64
 import contextlib
 import crypt
 from functools import partial
+import json
 import os
 import os.path
 import re
@@ -17,6 +18,7 @@ import xml.etree.ElementTree as ET
 
 from cloudinit import log as logging
 from cloudinit import net
+from cloudinit.event import EventType
 from cloudinit.net.dhcp import EphemeralDHCPv4
 from cloudinit import sources
 from cloudinit.sources.helpers.azure import get_metadata_from_fabric
@@ -49,7 +51,17 @@ DEFAULT_FS = 'ext4'
 AZURE_CHASSIS_ASSET_TAG = '7783-7084-3265-9085-8269-3286-77'
 REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds"
 REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready"
-IMDS_URL = "http://169.254.169.254/metadata/reprovisiondata";
+AGENT_SEED_DIR = '/var/lib/waagent'
+IMDS_URL = "http://169.254.169.254/metadata/";
+
+# List of static scripts and network config artifacts created by
+# stock ubuntu suported images.
+UBUNTU_EXTENDED_NETWORK_SCRIPTS = [
+    '/etc/netplan/90-azure-hotplug.yaml',
+    '/usr/local/sbin/ephemeral_eth.sh',
+    '/etc/udev/rules.d/10-net-device-added.rules',
+    '/run/network/interfaces.ephemeral.d',
+]
 
 
 def find_storvscid_from_sysctl_pnpinfo(sysctl_out, deviceid):
@@ -185,7 +197,7 @@ if util.is_FreeBSD():
 
 BUILTIN_DS_CONFIG = {
     'agent_command': AGENT_START_BUILTIN,
-    'data_dir': "/var/lib/waagent",
+    'data_dir': AGENT_SEED_DIR,
     'set_hostname': True,
     'hostname_bounce': {
         'interface': DEFAULT_PRIMARY_NIC,
@@ -252,6 +264,10 @@ class DataSourceAzure(sources.DataSource):
 
     dsname = 'Azure'
     _negotiated = False
+    _metadata_imds = sources.UNSET
+
+    # Regenerate network config new_instance boot and every boot
+    update_events = {'network': [EventType.BOOT_NEW_INSTANCE, EventType.BOOT]}
 
     def __init__(self, sys_cfg, distro, paths):
         sources.DataSource.__init__(self, sys_cfg, distro, paths)
@@ -336,15 +352,17 @@ class DataSourceAzure(sources.DataSource):
         metadata['public-keys'] = key_value or pubkeys_from_crt_files(fp_files)
         return metadata
 
-    def _get_data(self):
+    def crawl_metadata(self):
+        """Walk all instance metadata sources returning a dict on success.
+
+        @return: A dictionary of any metadata content for this instance.
+        @raise: InvalidMetaDataException when the expected metadata service is
+            unavailable, broken or disabled.
+        """
+        crawled_data = {}
         # azure removes/ejects the cdrom containing the ovf-env.xml
         # file on reboot.  So, in order to successfully reboot we
         # need to look in the datadir and consider that valid
-        asset_tag = util.read_dmi_data('chassis-asset-tag')
-        if asset_tag != AZURE_CHASSIS_ASSET_TAG:
-            LOG.debug("Non-Azure DMI asset tag '%s' discovered.", asset_tag)
-            return False
-
         ddir = self.ds_cfg['data_dir']
 
         candidates = [self.seed_dir]
@@ -373,41 +391,84 @@ class DataSourceAzure(sources.DataSource):
             except NonAzureDataSource:
                 continue
             except BrokenAzureDataSource as exc:
-                raise exc
+                msg = 'BrokenAzureDataSource: %s' % exc
+                raise sources.InvalidMetaDataException(msg)
             except util.MountFailedError:
                 LOG.warning("%s was not mountable", cdev)
                 continue
 
             if reprovision or self._should_reprovision(ret):
                 ret = self._reprovision()
-            (md, self.userdata_raw, cfg, files) = ret
+            imds_md = get_metadata_from_imds(
+                self.fallback_interface, retries=3)
+            (md, userdata_raw, cfg, files) = ret
             self.seed = cdev
-            self.metadata = util.mergemanydict([md, DEFAULT_METADATA])
-            self.cfg = util.mergemanydict([cfg, BUILTIN_CLOUD_CONFIG])
+            crawled_data.update({
+                'cfg': cfg,
+                'files': files,
+                'metadata': util.mergemanydict(
+                    [md, {'imds': imds_md}]),
+                'userdata_raw': userdata_raw})
             found = cdev
 
             LOG.debug("found datasource in %s", cdev)
             break
 
         if not found:
-            return False
+            raise sources.InvalidMetaDataException('No Azure metadata found')
 
         if found == ddir:
             LOG.debug("using files cached in %s", ddir)
 
         seed = _get_random_seed()
         if seed:
-            self.metadata['random_seed'] = seed
+            crawled_data['metadata']['random_seed'] = seed
+        crawled_data['metadata']['instance-id'] = util.read_dmi_data(
+            'system-uuid')
+        return crawled_data
+
+    def _is_platform_viable(self):
+        """Check platform environment to report if this datasource may run."""
+        return _is_platform_viable(self.seed_dir)
+
+    def clear_cached_attrs(self, attr_defaults=()):
+        """Reset any cached class attributes to defaults."""
+        super(DataSourceAzure, self).clear_cached_attrs(attr_defaults)
+        self._metadata_imds = sources.UNSET
+
+    def _get_data(self):
+        """Crawl and process datasource metadata caching metadata as attrs.
+
+        @return: True on success, False on error, invalid or disabled
+            datasource.
+        """
+        if not self._is_platform_viable():
+            return False
+        if self.distro and self.distro.name == 'ubuntu':
+            maybe_remove_ubuntu_network_config_scripts()
+        try:
+            crawled_data = util.log_time(
+                        logfunc=LOG.debug, msg='Crawl of metadata service',
+                        func=self.crawl_metadata)
+        except sources.InvalidMetaDataException as e:
+            LOG.warning('Could not crawl Azure metadata: %s', e)
+            return False
+
+        # Process crawled data and augment with various config defaults
+        self.cfg = util.mergemanydict(
+            [crawled_data['cfg'], BUILTIN_CLOUD_CONFIG])
+        self._metadata_imds = crawled_data['metadata']['imds']
+        self.metadata = util.mergemanydict(
+            [crawled_data['metadata'], DEFAULT_METADATA])
+        self.userdata_raw = crawled_data['userdata_raw']
 
         user_ds_cfg = util.get_cfg_by_path(self.cfg, DS_CFG_PATH, {})
         self.ds_cfg = util.mergemanydict([user_ds_cfg, self.ds_cfg])
 
         # walinux agent writes files world readable, but expects
         # the directory to be protected.
-        write_files(ddir, files, dirmode=0o700)
-
-        self.metadata['instance-id'] = util.read_dmi_data('system-uuid')
-
+        write_files(
+            self.ds_cfg['data_dir'], crawled_data['files'], dirmode=0o700)
         return True
 
     def device_name_to_device(self, name):
@@ -436,7 +497,7 @@ class DataSourceAzure(sources.DataSource):
     def _poll_imds(self):
         """Poll IMDS for the new provisioning data until we get a valid
         response. Then return the returned JSON object."""
-        url = IMDS_URL + "?api-version=2017-04-02"
+        url = IMDS_URL + "reprovisiondata?api-version=2017-04-02"
         headers = {"Metadata": "true"}
         report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE))
         LOG.debug("Start polling IMDS")
@@ -487,7 +548,7 @@ class DataSourceAzure(sources.DataSource):
         jump back into the polling loop in order to retrieve the ovf_env."""
         if not ret:
             return False
-        (_md, self.userdata_raw, cfg, _files) = ret
+        (_md, _userdata_raw, cfg, _files) = ret
         path = REPROVISION_MARKER_FILE
         if (cfg.get('PreprovisionedVm') is True or
                 os.path.isfile(path)):
@@ -543,22 +604,15 @@ class DataSourceAzure(sources.DataSource):
     @property
     def network_config(self):
         """Generate a network config like net.generate_fallback_network() with
-           the following execptions.
+           the following exceptions.
 
            1. Probe the drivers of the net-devices present and inject them in
               the network configuration under params: driver: <driver> value
            2. Generate a fallback network config that does not include any of
               the blacklisted devices.
         """
-        blacklist = ['mlx4_core']
         if not self._network_config:
-            LOG.debug('Azure: generating fallback configuration')
-            # generate a network config, blacklist picking any mlx4_core devs
-            netconfig = net.generate_fallback_config(
-                blacklist_drivers=blacklist, config_driver=True)
-
-            self._network_config = netconfig
-
+            self._network_config = parse_network_config(self._metadata_imds)
         return self._network_config
 
 
@@ -1025,6 +1079,162 @@ def load_azure_ds_dir(source_dir):
     return (md, ud, cfg, {'ovf-env.xml': contents})
 
 
+def parse_network_config(imds_metadata):
+    """Convert imds_metadata dictionary to network v2 configuration.
+
+    Parses network configuration from imds metadata if present or generate
+    fallback network config excluding mlx4_core devices.
+
+    @param: imds_metadata: Dict of content read from IMDS network service.
+    @return: Dictionary containing network version 2 standard configuration.
+    """
+    if imds_metadata != sources.UNSET and imds_metadata:
+        netconfig = {'version': 2, 'ethernets': {}}
+        LOG.debug('Azure: generating network configuration from IMDS')
+        network_metadata = imds_metadata['network']
+        for idx, intf in enumerate(network_metadata['interface']):
+            nicname = 'eth{idx}'.format(idx=idx)
+            dev_config = {}
+            for addr4 in intf['ipv4']['ipAddress']:
+                privateIpv4 = addr4['privateIpAddress']
+                if privateIpv4:
+                    if dev_config.get('dhcp4', False):
+                        # Append static address config for nic > 1
+                        netPrefix = intf['ipv4']['subnet'][0].get(
+                            'prefix', '24')
+                        if not dev_config.get('addresses'):
+                            dev_config['addresses'] = []
+                        dev_config['addresses'].append(
+                            '{ip}/{prefix}'.format(
+                                ip=privateIpv4, prefix=netPrefix))
+                    else:
+                        dev_config['dhcp4'] = True
+            for addr6 in intf['ipv6']['ipAddress']:
+                privateIpv6 = addr6['privateIpAddress']
+                if privateIpv6:
+                    dev_config['dhcp6'] = True
+                    break
+            if dev_config:
+                mac = ':'.join(re.findall(r'..', intf['macAddress']))
+                dev_config.update(
+                    {'match': {'macaddress': mac.lower()},
+                     'set-name': nicname})
+                netconfig['ethernets'][nicname] = dev_config
+    else:
+        blacklist = ['mlx4_core']
+        LOG.debug('Azure: generating fallback configuration')
+        # generate a network config, blacklist picking mlx4_core devs
+        netconfig = net.generate_fallback_config(
+            blacklist_drivers=blacklist, config_driver=True)
+    return netconfig
+
+
+def get_metadata_from_imds(fallback_nic, retries):
+    """Query Azure's network metadata service, returning a dictionary.
+
+    If network is not up, setup ephemeral dhcp on fallback_nic to talk to the
+    IMDS. For more info on IMDS:
+        https://docs.microsoft.com/en-us/azure/virtual-machines/windows/instance-metadata-service
+
+    @param fallback_nic: String. The name of the nic which requires active
+        networ in order to query IMDS.
+    @param retries: The number of retries of the IMDS_URL.
+
+    @return: A dict of instance metadata containing compute and network
+        info.
+    """
+    if net.is_up(fallback_nic):
+        return util.log_time(
+            logfunc=LOG.debug,
+            msg='Crawl of Azure Instance Metadata Service (IMDS)',
+            func=_get_metadata_from_imds, args=(retries,))
+    else:
+        with EphemeralDHCPv4(fallback_nic):
+            return util.log_time(
+                logfunc=LOG.debug,
+                msg='Crawl of Azure Instance Metadata Service (IMDS)',
+                func=_get_metadata_from_imds, args=(retries,))
+
+
+def _get_metadata_from_imds(retries):
+
+    def retry_on_url_error(msg, exception):
+        if isinstance(exception, UrlError) and exception.code == 404:
+            return True  # Continue retries
+        return False  # Stop retries on all other exceptions, including 404s
+
+    url = IMDS_URL + "instance?api-version=2017-12-01"
+    headers = {"Metadata": "true"}
+    try:
+        response = readurl(
+            url, timeout=1, headers=headers, retries=retries,
+            exception_cb=retry_on_url_error)
+    except Exception as e:
+        LOG.debug('Ignoring IMDS instance metadata: %s', e)
+        return {}
+    try:
+        return util.load_json(str(response))
+    except json.decoder.JSONDecodeError:
+        LOG.warning(
+            'Ignoring non-json IMDS instance metadata: %s', str(response))
+    return {}
+
+
+def maybe_remove_ubuntu_network_config_scripts(paths=None):
+    """Remove Azure-specific ubuntu network config for non-primary nics.
+
+    @param paths: List of networking scripts or directories to remove when
+        present.
+
+    In certain supported ubuntu images, static udev rules or netplan yaml
+    config is delivered in the base ubuntu image to support dhcp on any
+    additional interfaces which get attached by a customer at some point
+    after initial boot. Since the Azure datasource can now regenerate
+    network configuration as metadata reports these new devices, we no longer
+    want the udev rules or netplan's 90-azure-hotplug.yaml to configure
+    networking on eth1 or greater as it might collide with cloud-init's
+    configuration.
+
+    Remove the any existing extended network scripts if the datasource is
+    enabled to write network per-boot.
+    """
+    if not paths:
+        paths = UBUNTU_EXTENDED_NETWORK_SCRIPTS
+    logged = False
+    for path in paths:
+        if os.path.exists(path):
+            if not logged:
+                LOG.info(
+                    'Removing Ubuntu extended network scripts because'
+                    ' cloud-init updates Azure network configuration on the'
+                    ' following event: %s.',
+                    EventType.BOOT)
+                logged = True
+            if os.path.isdir(path):
+                util.del_dir(path)
+            else:
+                util.del_file(path)
+
+
+def _is_platform_viable(seed_dir):
+    """Check platform environment to report if this datasource may run."""
+    asset_tag = util.read_dmi_data('chassis-asset-tag')
+    if asset_tag == AZURE_CHASSIS_ASSET_TAG:
+        return True
+    LOG.debug("Non-Azure DMI asset tag '%s' discovered.", asset_tag)
+    for path in (AGENT_SEED_DIR, seed_dir):
+        if os.path.exists(os.path.join(path, 'ovf-env.xml')):
+            return True
+    if util.which('systemd-detect-virt'):
+        (virt_type, _err) = util.subp(
+            ['systemd-detect-virt'], rcs=[0, 1], capture=True)
+        if virt_type.strip() == 'microsoft':
+            return True
+    if util.find_devs_with(criteria='LABEL=rd_rdfe_*'):
+        return True
+    return False
+
+
 class BrokenAzureDataSource(Exception):
     pass
 
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
index e82716e..5eae247 100644
--- a/tests/unittests/test_datasource/test_azure.py
+++ b/tests/unittests/test_datasource/test_azure.py
@@ -1,15 +1,21 @@
 # This file is part of cloud-init. See LICENSE file for license information.
 
+from cloudinit import distros
 from cloudinit import helpers
-from cloudinit.sources import DataSourceAzure as dsaz
+from cloudinit import url_helper
+from cloudinit.sources import (
+    UNSET, DataSourceAzure as dsaz, InvalidMetaDataException)
 from cloudinit.util import (b64e, decode_binary, load_file, write_file,
                             find_freebsd_part, get_path_dev_freebsd,
                             MountFailedError)
 from cloudinit.version import version_string as vs
-from cloudinit.tests.helpers import (CiTestCase, TestCase, populate_dir, mock,
-                                     ExitStack, PY26, SkipTest)
+from cloudinit.tests.helpers import (
+    HttprettyTestCase, CiTestCase, populate_dir, mock, wrap_and_call,
+    ExitStack, PY26, SkipTest)
 
 import crypt
+import httpretty
+import json
 import os
 import stat
 import xml.etree.ElementTree as ET
@@ -77,6 +83,106 @@ def construct_valid_ovf_env(data=None, pubkeys=None,
     return content
 
 
+NETWORK_METADATA = {
+    "network": {
+        "interface": [
+            {
+                "macAddress": "000D3A047598",
+                "ipv6": {
+                    "ipAddress": []
+                },
+                "ipv4": {
+                    "subnet": [
+                        {
+                           "prefix": "24",
+                           "address": "10.0.0.0"
+                        }
+                    ],
+                    "ipAddress": [
+                        {
+                           "privateIpAddress": "10.0.0.4",
+                           "publicIpAddress": "104.46.124.81"
+                        }
+                    ]
+                }
+            }
+        ]
+    }
+}
+
+
+class TestGetMetadataFromIMDS(HttprettyTestCase):
+
+    with_logs = True
+
+    def setUp(self):
+        super(TestGetMetadataFromIMDS, self).setUp()
+        self.network_md_url = dsaz.IMDS_URL + "instance?api-version=2017-12-01"
+
+    @mock.patch('cloudinit.sources.DataSourceAzure.readurl')
+    @mock.patch('cloudinit.sources.DataSourceAzure.EphemeralDHCPv4')
+    @mock.patch('cloudinit.sources.DataSourceAzure.net.is_up')
+    def test_get_metadata_does_not_dhcp_if_network_is_up(
+            self, m_net_is_up, m_dhcp, m_readurl):
+        """Do not perform DHCP setup when nic is already up."""
+        m_net_is_up.return_value = True
+        m_readurl.return_value = url_helper.StringResponse(
+            json.dumps(NETWORK_METADATA).encode('utf-8'))
+        self.assertEqual(
+            NETWORK_METADATA,
+            dsaz.get_metadata_from_imds('eth9', retries=3))
+
+        m_net_is_up.assert_called_with('eth9')
+        m_dhcp.assert_not_called()
+        self.assertIn(
+            "Crawl of Azure Instance Metadata Service (IMDS) took",  # log_time
+            self.logs.getvalue())
+
+    @mock.patch('cloudinit.sources.DataSourceAzure.readurl')
+    @mock.patch('cloudinit.sources.DataSourceAzure.EphemeralDHCPv4')
+    @mock.patch('cloudinit.sources.DataSourceAzure.net.is_up')
+    def test_get_metadata_performs_dhcp_when_network_is_down(
+            self, m_net_is_up, m_dhcp, m_readurl):
+        """Perform DHCP setup when nic is not up."""
+        m_net_is_up.return_value = False
+        m_readurl.return_value = url_helper.StringResponse(
+            json.dumps(NETWORK_METADATA).encode('utf-8'))
+
+        self.assertEqual(
+            NETWORK_METADATA,
+            dsaz.get_metadata_from_imds('eth9', retries=2))
+
+        m_net_is_up.assert_called_with('eth9')
+        m_dhcp.assert_called_with('eth9')
+        self.assertIn(
+            "Crawl of Azure Instance Metadata Service (IMDS) took",  # log_time
+            self.logs.getvalue())
+
+        m_readurl.assert_called_with(
+            self.network_md_url, exception_cb=mock.ANY,
+            headers={'Metadata': 'true'}, retries=2, timeout=1)
+
+    @mock.patch('cloudinit.url_helper.time.sleep')
+    @mock.patch('cloudinit.sources.DataSourceAzure.net.is_up')
+    def test_get_metadata_from_imds_empty_when_no_imds_present(
+            self, m_net_is_up, m_sleep):
+        """Return empty dict when IMDS network metadata is absent."""
+        httpretty.register_uri(
+            httpretty.GET,
+            dsaz.IMDS_URL + 'instance?api-version=2017-12-01',
+            body={}, status=404)
+
+        m_net_is_up.return_value = True  # skips dhcp
+
+        self.assertEqual({}, dsaz.get_metadata_from_imds('eth9', retries=2))
+
+        m_net_is_up.assert_called_with('eth9')
+        self.assertEqual([mock.call(1), mock.call(1)], m_sleep.call_args_list)
+        self.assertIn(
+            "Crawl of Azure Instance Metadata Service (IMDS) took",  # log_time
+            self.logs.getvalue())
+
+
 class TestAzureDataSource(CiTestCase):
 
     with_logs = True
@@ -95,8 +201,19 @@ class TestAzureDataSource(CiTestCase):
         self.patches = ExitStack()
         self.addCleanup(self.patches.close)
 
-        self.patches.enter_context(mock.patch.object(dsaz, '_get_random_seed'))
-
+        self.patches.enter_context(mock.patch.object(
+            dsaz, '_get_random_seed', return_value='wild'))
+        self.m_get_metadata_from_imds = self.patches.enter_context(
+            mock.patch.object(
+                dsaz, 'get_metadata_from_imds',
+                mock.MagicMock(return_value=NETWORK_METADATA)))
+        self.m_fallback_nic = self.patches.enter_context(
+            mock.patch('cloudinit.sources.net.find_fallback_nic',
+                       return_value='eth9'))
+        self.m_remove_ubuntu_network_scripts = self.patches.enter_context(
+            mock.patch.object(
+                dsaz, 'maybe_remove_ubuntu_network_config_scripts',
+                mock.MagicMock()))
         super(TestAzureDataSource, self).setUp()
 
     def apply_patches(self, patches):
@@ -137,7 +254,7 @@ scbus-1 on xpt0 bus 0
         ])
         return dsaz
 
-    def _get_ds(self, data, agent_command=None):
+    def _get_ds(self, data, agent_command=None, distro=None):
 
         def dsdevs():
             return data.get('dsdevs', [])
@@ -186,8 +303,11 @@ scbus-1 on xpt0 bus 0
                 side_effect=_wait_for_files)),
         ])
 
+        if distro is not None:
+            distro_cls = distros.fetch(distro)
+            distro = distro_cls(distro, data.get('sys_cfg', {}), self.paths)
         dsrc = dsaz.DataSourceAzure(
-            data.get('sys_cfg', {}), distro=None, paths=self.paths)
+            data.get('sys_cfg', {}), distro=distro, paths=self.paths)
         if agent_command is not None:
             dsrc.ds_cfg['agent_command'] = agent_command
 
@@ -260,29 +380,20 @@ fdescfs            /dev/fd          fdescfs rw              0 0
             res = get_path_dev_freebsd('/etc', mnt_list)
             self.assertIsNotNone(res)
 
-    @mock.patch('cloudinit.sources.DataSourceAzure.util.read_dmi_data')
-    def test_non_azure_dmi_chassis_asset_tag(self, m_read_dmi_data):
-        """Report non-azure when DMI's chassis asset tag doesn't match.
-
-        Return False when the asset tag doesn't match Azure's static
-        AZURE_CHASSIS_ASSET_TAG.
-        """
+    @mock.patch('cloudinit.sources.DataSourceAzure._is_platform_viable')
+    def test_call_is_platform_viable_seed(self, m_is_platform_viable):
+        """Check seed_dir using _is_platform_viable and return False."""
         # Return a non-matching asset tag value
-        nonazure_tag = dsaz.AZURE_CHASSIS_ASSET_TAG + 'X'
-        m_read_dmi_data.return_value = nonazure_tag
+        m_is_platform_viable.return_value = False
         dsrc = dsaz.DataSourceAzure(
             {}, distro=None, paths=self.paths)
         self.assertFalse(dsrc.get_data())
-        self.assertEqual(
-            "DEBUG: Non-Azure DMI asset tag '{0}' discovered.\n".format(
-                nonazure_tag),
-            self.logs.getvalue())
+        m_is_platform_viable.assert_called_with(dsrc.seed_dir)
 
     def test_basic_seed_dir(self):
         odata = {'HostName': "myhost", 'UserName': "myuser"}
         data = {'ovfcontent': construct_valid_ovf_env(data=odata),
                 'sys_cfg': {}}
-
         dsrc = self._get_ds(data)
         ret = dsrc.get_data()
         self.assertTrue(ret)
@@ -291,6 +402,82 @@ fdescfs            /dev/fd          fdescfs rw              0 0
         self.assertTrue(os.path.isfile(
             os.path.join(self.waagent_d, 'ovf-env.xml')))
 
+    def test_get_data_non_ubuntu_will_not_remove_network_scripts(self):
+        """get_data on non-Ubuntu will not remove ubuntu net scripts."""
+        odata = {'HostName': "myhost", 'UserName': "myuser"}
+        data = {'ovfcontent': construct_valid_ovf_env(data=odata),
+                'sys_cfg': {}}
+
+        dsrc = self._get_ds(data, distro='debian')
+        dsrc.get_data()
+        self.m_remove_ubuntu_network_scripts.assert_not_called()
+
+    def test_get_data_on_ubuntu_will_remove_network_scripts(self):
+        """get_data will remove ubuntu net scripts on Ubuntu distro."""
+        odata = {'HostName': "myhost", 'UserName': "myuser"}
+        data = {'ovfcontent': construct_valid_ovf_env(data=odata),
+                'sys_cfg': {}}
+
+        dsrc = self._get_ds(data, distro='ubuntu')
+        dsrc.get_data()
+        self.m_remove_ubuntu_network_scripts.assert_called_once_with()
+
+    def test_crawl_metadata_returns_structured_data_and_caches_nothing(self):
+        """Return all structured metadata and cache no class attributes."""
+        yaml_cfg = "{agent_command: my_command}\n"
+        odata = {'HostName': "myhost", 'UserName': "myuser",
+                 'UserData': {'text': 'FOOBAR', 'encoding': 'plain'},
+                 'dscfg': {'text': yaml_cfg, 'encoding': 'plain'}}
+        data = {'ovfcontent': construct_valid_ovf_env(data=odata),
+                'sys_cfg': {}}
+        dsrc = self._get_ds(data)
+        expected_cfg = {
+            'PreprovisionedVm': False,
+            'datasource': {'Azure': {'agent_command': 'my_command'}},
+            'system_info': {'default_user': {'name': u'myuser'}}}
+        expected_metadata = {
+            'azure_data': {
+                'configurationsettype': 'LinuxProvisioningConfiguration'},
+            'imds': {'network': {'interface': [{
+                'ipv4': {'ipAddress': [
+                     {'privateIpAddress': '10.0.0.4',
+                      'publicIpAddress': '104.46.124.81'}],
+                      'subnet': [{'address': '10.0.0.0', 'prefix': '24'}]},
+                'ipv6': {'ipAddress': []},
+                'macAddress': '000D3A047598'}]}},
+            'instance-id': 'test-instance-id',
+            'local-hostname': u'myhost',
+            'random_seed': 'wild'}
+
+        crawled_metadata = dsrc.crawl_metadata()
+
+        self.assertItemsEqual(
+            crawled_metadata.keys(),
+            ['cfg', 'files', 'metadata', 'userdata_raw'])
+        self.assertEqual(crawled_metadata['cfg'], expected_cfg)
+        self.assertEqual(
+            list(crawled_metadata['files'].keys()), ['ovf-env.xml'])
+        self.assertIn(
+            b'<HostName>myhost</HostName>',
+            crawled_metadata['files']['ovf-env.xml'])
+        self.assertEqual(crawled_metadata['metadata'], expected_metadata)
+        self.assertEqual(crawled_metadata['userdata_raw'], 'FOOBAR')
+        self.assertEqual(dsrc.userdata_raw, None)
+        self.assertEqual(dsrc.metadata, {})
+        self.assertEqual(dsrc._metadata_imds, UNSET)
+        self.assertFalse(os.path.isfile(
+            os.path.join(self.waagent_d, 'ovf-env.xml')))
+
+    def test_crawl_metadata_raises_invalid_metadata_on_error(self):
+        """crawl_metadata raises an exception on invalid ovf-env.xml."""
+        data = {'ovfcontent': "BOGUS", 'sys_cfg': {}}
+        dsrc = self._get_ds(data)
+        error_msg = ('BrokenAzureDataSource: Invalid ovf-env.xml:'
+                     ' syntax error: line 1, column 0')
+        with self.assertRaises(InvalidMetaDataException) as cm:
+            dsrc.crawl_metadata()
+        self.assertEqual(str(cm.exception), error_msg)
+
     def test_waagent_d_has_0700_perms(self):
         # we expect /var/lib/waagent to be created 0700
         dsrc = self._get_ds({'ovfcontent': construct_valid_ovf_env()})
@@ -314,6 +501,20 @@ fdescfs            /dev/fd          fdescfs rw              0 0
         self.assertTrue(ret)
         self.assertEqual(data['agent_invoked'], cfg['agent_command'])
 
+    def test_network_config_set_from_imds(self):
+        """Datasource.network_config returns IMDS network data."""
+        odata = {}
+        data = {'ovfcontent': construct_valid_ovf_env(data=odata)}
+        expected_network_config = {
+            'ethernets': {
+                'eth0': {'set-name': 'eth0',
+                         'match': {'macaddress': '00:0d:3a:04:75:98'},
+                         'dhcp4': True}},
+            'version': 2}
+        dsrc = self._get_ds(data)
+        dsrc.get_data()
+        self.assertEqual(expected_network_config, dsrc.network_config)
+
     def test_user_cfg_set_agent_command(self):
         # set dscfg in via base64 encoded yaml
         cfg = {'agent_command': "my_command"}
@@ -579,12 +780,34 @@ fdescfs            /dev/fd          fdescfs rw              0 0
         self.assertEqual(
             [mock.call("/dev/cd0")], m_check_fbsd_cdrom.call_args_list)
 
+    @mock.patch('cloudinit.net.generate_fallback_config')
+    def test_imds_network_config(self, mock_fallback):
+        """Network config is generated from IMDS network data when present."""
+        odata = {'HostName': "myhost", 'UserName': "myuser"}
+        data = {'ovfcontent': construct_valid_ovf_env(data=odata),
+                'sys_cfg': {}}
+
+        dsrc = self._get_ds(data)
+        ret = dsrc.get_data()
+        self.assertTrue(ret)
+
+        expected_cfg = {
+            'ethernets': {
+                'eth0': {'dhcp4': True,
+                         'match': {'macaddress': '00:0d:3a:04:75:98'},
+                         'set-name': 'eth0'}},
+            'version': 2}
+
+        self.assertEqual(expected_cfg, dsrc.network_config)
+        mock_fallback.assert_not_called()
+
     @mock.patch('cloudinit.net.get_interface_mac')
     @mock.patch('cloudinit.net.get_devicelist')
     @mock.patch('cloudinit.net.device_driver')
     @mock.patch('cloudinit.net.generate_fallback_config')
-    def test_network_config(self, mock_fallback, mock_dd,
-                            mock_devlist, mock_get_mac):
+    def test_fallback_network_config(self, mock_fallback, mock_dd,
+                                     mock_devlist, mock_get_mac):
+        """On absent IMDS network data, generate network fallback config."""
         odata = {'HostName': "myhost", 'UserName': "myuser"}
         data = {'ovfcontent': construct_valid_ovf_env(data=odata),
                 'sys_cfg': {}}
@@ -605,6 +828,8 @@ fdescfs            /dev/fd          fdescfs rw              0 0
         mock_get_mac.return_value = '00:11:22:33:44:55'
 
         dsrc = self._get_ds(data)
+        # Represent empty response from network imds
+        self.m_get_metadata_from_imds.return_value = {}
         ret = dsrc.get_data()
         self.assertTrue(ret)
 
@@ -617,8 +842,9 @@ fdescfs            /dev/fd          fdescfs rw              0 0
     @mock.patch('cloudinit.net.get_devicelist')
     @mock.patch('cloudinit.net.device_driver')
     @mock.patch('cloudinit.net.generate_fallback_config')
-    def test_network_config_blacklist(self, mock_fallback, mock_dd,
-                                      mock_devlist, mock_get_mac):
+    def test_fallback_network_config_blacklist(self, mock_fallback, mock_dd,
+                                               mock_devlist, mock_get_mac):
+        """On absent network metadata, blacklist mlx from fallback config."""
         odata = {'HostName': "myhost", 'UserName': "myuser"}
         data = {'ovfcontent': construct_valid_ovf_env(data=odata),
                 'sys_cfg': {}}
@@ -649,6 +875,8 @@ fdescfs            /dev/fd          fdescfs rw              0 0
         mock_get_mac.return_value = '00:11:22:33:44:55'
 
         dsrc = self._get_ds(data)
+        # Represent empty response from network imds
+        self.m_get_metadata_from_imds.return_value = {}
         ret = dsrc.get_data()
         self.assertTrue(ret)
 
@@ -689,9 +917,12 @@ class TestAzureBounce(CiTestCase):
             mock.patch.object(dsaz, 'get_metadata_from_fabric',
                               mock.MagicMock(return_value={})))
         self.patches.enter_context(
-            mock.patch.object(dsaz.util, 'which', lambda x: True))
+            mock.patch.object(dsaz, 'get_metadata_from_imds',
+                              mock.MagicMock(return_value={})))
         self.patches.enter_context(
-            mock.patch.object(dsaz, '_get_random_seed'))
+            mock.patch.object(dsaz.util, 'which', lambda x: True))
+        self.patches.enter_context(mock.patch.object(
+            dsaz, '_get_random_seed', return_value='wild'))
 
         def _dmi_mocks(key):
             if key == 'system-uuid':
@@ -719,9 +950,12 @@ class TestAzureBounce(CiTestCase):
             mock.patch.object(dsaz, 'set_hostname'))
         self.subp = self.patches.enter_context(
             mock.patch('cloudinit.sources.DataSourceAzure.util.subp'))
+        self.find_fallback_nic = self.patches.enter_context(
+            mock.patch('cloudinit.net.find_fallback_nic', return_value='eth9'))
 
     def tearDown(self):
         self.patches.close()
+        super(TestAzureBounce, self).tearDown()
 
     def _get_ds(self, ovfcontent=None, agent_command=None):
         if ovfcontent is not None:
@@ -927,7 +1161,7 @@ class TestLoadAzureDsDir(CiTestCase):
             str(context_manager.exception))
 
 
-class TestReadAzureOvf(TestCase):
+class TestReadAzureOvf(CiTestCase):
 
     def test_invalid_xml_raises_non_azure_ds(self):
         invalid_xml = "<foo>" + construct_valid_ovf_env(data={})
@@ -1188,6 +1422,25 @@ class TestCanDevBeReformatted(CiTestCase):
                       "(datasource.Azure.never_destroy_ntfs)", msg)
 
 
+class TestClearCachedData(CiTestCase):
+
+    def test_clear_cached_attrs_clears_imds(self):
+        """All class attributes are reset to defaults, including imds data."""
+        tmp = self.tmp_dir()
+        paths = helpers.Paths(
+            {'cloud_dir': tmp, 'run_dir': tmp})
+        dsrc = dsaz.DataSourceAzure({}, distro=None, paths=paths)
+        clean_values = [dsrc.metadata, dsrc.userdata, dsrc._metadata_imds]
+        dsrc.metadata = 'md'
+        dsrc.userdata = 'ud'
+        dsrc._metadata_imds = 'imds'
+        dsrc._dirty_cache = True
+        dsrc.clear_cached_attrs()
+        self.assertEqual(
+            [dsrc.metadata, dsrc.userdata, dsrc._metadata_imds],
+            clean_values)
+
+
 class TestAzureNetExists(CiTestCase):
 
     def test_azure_net_must_exist_for_legacy_objpkl(self):
@@ -1398,4 +1651,128 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
         self.assertEqual(m_net.call_count, 1)
 
 
+class TestRemoveUbuntuNetworkConfigScripts(CiTestCase):
+
+    with_logs = True
+
+    def setUp(self):
+        super(TestRemoveUbuntuNetworkConfigScripts, self).setUp()
+        self.tmp = self.tmp_dir()
+
+    def test_remove_network_scripts_removes_both_files_and_directories(self):
+        """Any files or directories in paths are removed when present."""
+        file1 = self.tmp_path('file1', dir=self.tmp)
+        subdir = self.tmp_path('sub1', dir=self.tmp)
+        subfile = self.tmp_path('leaf1', dir=subdir)
+        write_file(file1, 'file1content')
+        write_file(subfile, 'leafcontent')
+        dsaz.maybe_remove_ubuntu_network_config_scripts(paths=[subdir, file1])
+
+        for path in (file1, subdir, subfile):
+            self.assertFalse(os.path.exists(path),
+                             'Found unremoved: %s' % path)
+
+        expected_logs = [
+            'INFO: Removing Ubuntu extended network scripts because cloud-init'
+            ' updates Azure network configuration on the following event:'
+            ' System boot.',
+            'Recursively deleting %s' % subdir,
+            'Attempting to remove %s' % file1]
+        for log in expected_logs:
+            self.assertIn(log, self.logs.getvalue())
+
+    def test_remove_network_scripts_only_attempts_removal_if_path_exists(self):
+        """Any files or directories absent are skipped without error."""
+        dsaz.maybe_remove_ubuntu_network_config_scripts(paths=[
+            self.tmp_path('nodirhere/', dir=self.tmp),
+            self.tmp_path('notfilehere', dir=self.tmp)])
+        self.assertNotIn('/not/a', self.logs.getvalue())  # No delete logs
+
+    @mock.patch('cloudinit.sources.DataSourceAzure.os.path.exists')
+    def test_remove_network_scripts_default_removes_stock_scripts(self,
+                                                                  m_exists):
+        """Azure's stock ubuntu image scripts and artifacts are removed."""
+        # Report path absent on all to avoid delete operation
+        m_exists.return_value = False
+        dsaz.maybe_remove_ubuntu_network_config_scripts()
+        calls = m_exists.call_args_list
+        for path in dsaz.UBUNTU_EXTENDED_NETWORK_SCRIPTS:
+            self.assertIn(mock.call(path), calls)
+
+
+class TestWBIsPlatformViable(CiTestCase):
+    """White box tests for _is_platform_viable."""
+    with_logs = True
+
+    @mock.patch('cloudinit.sources.DataSourceAzure.util.read_dmi_data')
+    def test_true_on_non_azure_chassis(self, m_read_dmi_data):
+        """Return True if DMI chassis-asset-tag is AZURE_CHASSIS_ASSET_TAG."""
+        m_read_dmi_data.return_value = dsaz.AZURE_CHASSIS_ASSET_TAG
+        self.assertTrue(dsaz._is_platform_viable('doesnotmatter'))
+
+    @mock.patch('cloudinit.sources.DataSourceAzure.os.path.exists')
+    @mock.patch('cloudinit.sources.DataSourceAzure.util.read_dmi_data')
+    def test_true_on_azure_ovf_env_in_seed_dir(self, m_read_dmi_data, m_exist):
+        """Return True if ovf-env.xml exists in known seed dirs."""
+        # Non-matching Azure chassis-asset-tag
+        m_read_dmi_data.return_value = dsaz.AZURE_CHASSIS_ASSET_TAG + 'X'
+
+        m_exist.side_effect = [False, True]
+        self.assertTrue(dsaz._is_platform_viable('/other/seed/dir'))
+        self.assertEqual(
+            [mock.call('/var/lib/waagent/ovf-env.xml'),
+             mock.call('/other/seed/dir/ovf-env.xml')],
+            m_exist.call_args_list)
+
+    @mock.patch('cloudinit.sources.DataSourceAzure.util.which')
+    @mock.patch('cloudinit.sources.DataSourceAzure.util.subp')
+    def test_true_on_detect_virt_microsoft(self, m_subp, m_which):
+        """Return True if a partition label prefix rd_rdfe is present."""
+        m_which.return_value = '/usr/bin/systemd-detect-virt'
+        m_subp.return_value = ('microsoft\n', '')
+        self.assertTrue(wrap_and_call(
+            'cloudinit.sources.DataSourceAzure',
+            {'os.path.exists': False,
+             # Non-matching Azure chassis-asset-tag
+             'util.read_dmi_data': dsaz.AZURE_CHASSIS_ASSET_TAG + 'X'},
+            dsaz._is_platform_viable, 'doesnotmatter'))
+        m_which.assert_called_once_with('systemd-detect-virt')
+        m_subp.assert_called_once_with(
+            ['systemd-detect-virt'], capture=True, rcs=[0, 1])
+
+    @mock.patch('cloudinit.sources.DataSourceAzure.util.find_devs_with')
+    def test_true_on_azure_when_fs_label_prefix_rd_rdfe(self, m_find_devs):
+        """Return True if a partition label prefix rd_rdfe is present."""
+
+        m_find_devs.return_value = ['/dev/Imatched/azure']
+        self.assertTrue(wrap_and_call(
+            'cloudinit.sources.DataSourceAzure',
+            {'os.path.exists': False,
+             # Non-matching Azure chassis-asset-tag
+             'util.read_dmi_data': dsaz.AZURE_CHASSIS_ASSET_TAG + 'X',
+             'util.which': None},
+            dsaz._is_platform_viable, 'doesnotmatter'))
+        m_find_devs.assert_called_once_with(criteria='LABEL=rd_rdfe_*')
+
+    def test_false_on_no_matching_azure_criteria(self):
+        """Report non-azure on unmatched asset tag, ovf-env absent and no dev.
+
+        Return False when the asset tag doesn't match Azure's static
+        AZURE_CHASSIS_ASSET_TAG, no ovf-env.xml files exist in known seed dirs
+        and no devices have a label starting with prefix 'rd_rdfe_'.
+        """
+        self.assertFalse(wrap_and_call(
+            'cloudinit.sources.DataSourceAzure',
+            {'os.path.exists': False,
+             # Non-matching Azure chassis-asset-tag
+             'util.read_dmi_data': dsaz.AZURE_CHASSIS_ASSET_TAG + 'X',
+             'util.which': None,
+             'util.find_devs_with': []},
+            dsaz._is_platform_viable, 'doesnotmatter'))
+        self.assertIn(
+            "DEBUG: Non-Azure DMI asset tag '{0}' discovered.\n".format(
+                dsaz.AZURE_CHASSIS_ASSET_TAG + 'X'),
+            self.logs.getvalue())
+
+
 # vi: ts=4 expandtab

Follow ups