← Back to team overview

cloud-init-dev team mailing list archive

[Merge] ~chad.smith/cloud-init:feature/os-local into cloud-init:master

 

Chad Smith has proposed merging ~chad.smith/cloud-init:feature/os-local into cloud-init:master.

Commit message:
openstack: Allow discovery in init-local using dchlient in a sandbox.

Network has not yet been configured in the init-local stage so the
openstack datasource will use dhcp-client to temporarily obtain an ipv4
address and query the metadata service at http://169.254.169.254 to get
network_data.json configuration. If present, the datasource will return
network_config version 1 config based on that network_data.json content.
Previously OpenStack datasource only setup dhcp on the fallback interface
so this represents a change in behavior to react to the full config
provided by openstack.

Also significant to OpenStack is the separation of a _crawl_data operation
from get_data(). crawl_data walks the available metadata services and
returns a dict of discovered content. get_data consumes the crawled_data,
 caches it in the datasource and reacts to that data.
/run/cloud-init/instance-data.json now published network_data.json or
ec2_metadata key if that data is present on any datasource.

The main reasons for the separation of crawl from get_data:
 * Enable performance metrics of cloud-init's metadata crawls on each
 * Enable cloud-init modules and scripts to query and consume metadata
   content which may have updated/changed after cloud-init's initial cache
   during instance boot. (Think hotplug)


Also generalize common logic to base DataSource class/module:
 * Move to a common UNSET variable up into base datasource module fix EC2,
   ConfigDrive, OpenStack, SmartOS to use the global.
 * Drop get_url_settings from Ec2, CloudStack and OpenStack and generalize
   DataSource.get_url_params(). Allow subclasses to override url_max_wait,
   url_timeout and url_retries params.

LP: #1749717

Requested reviews:
  Server Team CI bot (server-team-bot): continuous-integration
  cloud-init commiters (cloud-init-dev)
Related bugs:
  Bug #1749717 in cloud-init: "OpenStack datasource does not read network data"
  https://bugs.launchpad.net/cloud-init/+bug/1749717

For more details, see:
https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/345806
-- 
Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:feature/os-local into cloud-init:master.
diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py
index 0df545f..cfcdb1f 100644
--- a/cloudinit/sources/DataSourceCloudStack.py
+++ b/cloudinit/sources/DataSourceCloudStack.py
@@ -68,6 +68,10 @@ class DataSourceCloudStack(sources.DataSource):
 
     dsname = 'CloudStack'
 
+    # Setup read_url parameters per get_url_params.
+    url_max_wait = 120
+    url_timeout = 50
+
     def __init__(self, sys_cfg, distro, paths):
         sources.DataSource.__init__(self, sys_cfg, distro, paths)
         self.seed_dir = os.path.join(paths.seed_dir, 'cs')
@@ -80,28 +84,12 @@ class DataSourceCloudStack(sources.DataSource):
         self.metadata_address = "http://%s/"; % (self.vr_addr,)
         self.cfg = {}
 
-    def _get_url_settings(self):
-        mcfg = self.ds_cfg
-        max_wait = 120
-        try:
-            max_wait = int(mcfg.get("max_wait", max_wait))
-        except Exception:
-            util.logexc(LOG, "Failed to get max wait. using %s", max_wait)
+    def wait_for_metadata_service(self):
+        (max_wait, timeout, _retries) = self.get_url_params()
 
-        if max_wait == 0:
+        if max_wait <= 0:
             return False
 
-        timeout = 50
-        try:
-            timeout = int(mcfg.get("timeout", timeout))
-        except Exception:
-            util.logexc(LOG, "Failed to get timeout, using %s", timeout)
-
-        return (max_wait, timeout)
-
-    def wait_for_metadata_service(self):
-        (max_wait, timeout) = self._get_url_settings()
-
         urls = [uhelp.combine_url(self.metadata_address,
                                   'latest/meta-data/instance-id')]
         start_time = time.time()
diff --git a/cloudinit/sources/DataSourceConfigDrive.py b/cloudinit/sources/DataSourceConfigDrive.py
index 121cf21..4cb2897 100644
--- a/cloudinit/sources/DataSourceConfigDrive.py
+++ b/cloudinit/sources/DataSourceConfigDrive.py
@@ -43,7 +43,7 @@ class DataSourceConfigDrive(openstack.SourceMixin, sources.DataSource):
         self.version = None
         self.ec2_metadata = None
         self._network_config = None
-        self.network_json = None
+        self.network_json = sources.UNSET
         self.network_eni = None
         self.known_macs = None
         self.files = {}
@@ -149,7 +149,7 @@ class DataSourceConfigDrive(openstack.SourceMixin, sources.DataSource):
     @property
     def network_config(self):
         if self._network_config is None:
-            if self.network_json is not None:
+            if self.network_json not in (None, sources.UNSET):
                 LOG.debug("network config provided via network_json")
                 self._network_config = openstack.convert_net_json(
                     self.network_json, known_macs=self.known_macs)
diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py
index 21e9ef8..6405847 100644
--- a/cloudinit/sources/DataSourceEc2.py
+++ b/cloudinit/sources/DataSourceEc2.py
@@ -27,8 +27,6 @@ SKIP_METADATA_URL_CODES = frozenset([uhelp.NOT_FOUND])
 STRICT_ID_PATH = ("datasource", "Ec2", "strict_id")
 STRICT_ID_DEFAULT = "warn"
 
-_unset = "_unset"
-
 
 class Platforms(object):
     # TODO Rename and move to cloudinit.cloud.CloudNames
@@ -59,16 +57,17 @@ class DataSourceEc2(sources.DataSource):
     # for extended metadata content. IPv6 support comes in 2016-09-02
     extended_metadata_versions = ['2016-09-02']
 
+    # Setup read_url parameters per get_url_params.
+    url_max_wait = 120
+    url_timeout = 50
+
     _cloud_platform = None
 
-    _network_config = _unset  # Used for caching calculated network config v1
+    _network_config = sources.UNSET  # Used to cache calculated network cfg v1
 
     # Whether we want to get network configuration from the metadata service.
     get_network_metadata = False
 
-    # Track the discovered fallback nic for use in configuration generation.
-    _fallback_interface = None
-
     def __init__(self, sys_cfg, distro, paths):
         super(DataSourceEc2, self).__init__(sys_cfg, distro, paths)
         self.metadata_address = None
@@ -158,26 +157,10 @@ class DataSourceEc2(sources.DataSource):
         else:
             return self.metadata['instance-id']
 
-    def _get_url_settings(self):
-        mcfg = self.ds_cfg
-        max_wait = 120
-        try:
-            max_wait = int(mcfg.get("max_wait", max_wait))
-        except Exception:
-            util.logexc(LOG, "Failed to get max wait. using %s", max_wait)
-
-        timeout = 50
-        try:
-            timeout = max(0, int(mcfg.get("timeout", timeout)))
-        except Exception:
-            util.logexc(LOG, "Failed to get timeout, using %s", timeout)
-
-        return (max_wait, timeout)
-
     def wait_for_metadata_service(self):
         mcfg = self.ds_cfg
 
-        (max_wait, timeout) = self._get_url_settings()
+        (max_wait, timeout, _retries) = self.get_url_params()
         if max_wait <= 0:
             return False
 
@@ -310,11 +293,11 @@ class DataSourceEc2(sources.DataSource):
     @property
     def network_config(self):
         """Return a network config dict for rendering ENI or netplan files."""
-        if self._network_config != _unset:
+        if self._network_config != sources.UNSET:
             return self._network_config
 
         if self.metadata is None:
-            # this would happen if get_data hadn't been called. leave as _unset
+            # this would happen if get_data hadn't been called. leave as UNSET
             LOG.warning(
                 "Unexpected call to network_config when metadata is None.")
             return None
@@ -343,21 +326,6 @@ class DataSourceEc2(sources.DataSource):
 
         return self._network_config
 
-    @property
-    def fallback_interface(self):
-        if self._fallback_interface is None:
-            # fallback_nic was used at one point, so restored objects may
-            # have an attribute there. respect that if found.
-            _legacy_fbnic = getattr(self, 'fallback_nic', None)
-            if _legacy_fbnic:
-                self._fallback_interface = _legacy_fbnic
-                self.fallback_nic = None
-            else:
-                self._fallback_interface = net.find_fallback_nic()
-                if self._fallback_interface is None:
-                    LOG.warning("Did not find a fallback interface on EC2.")
-        return self._fallback_interface
-
     def _crawl_metadata(self):
         """Crawl metadata service when available.
 
diff --git a/cloudinit/sources/DataSourceOpenStack.py b/cloudinit/sources/DataSourceOpenStack.py
index fb166ae..b81415e 100644
--- a/cloudinit/sources/DataSourceOpenStack.py
+++ b/cloudinit/sources/DataSourceOpenStack.py
@@ -7,6 +7,7 @@
 import time
 
 from cloudinit import log as logging
+from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError
 from cloudinit import sources
 from cloudinit import url_helper
 from cloudinit import util
@@ -27,46 +28,25 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource):
 
     dsname = "OpenStack"
 
+    _network_config = sources.UNSET  # Used to cache calculated network cfg v1
+
+    # Whether we want to get network configuration from the metadata service.
+    get_network_metadata = False
+
     def __init__(self, sys_cfg, distro, paths):
         super(DataSourceOpenStack, self).__init__(sys_cfg, distro, paths)
         self.metadata_address = None
         self.ssl_details = util.fetch_ssl_details(self.paths)
         self.version = None
         self.files = {}
-        self.ec2_metadata = None
+        self.ec2_metadata = sources.UNSET
+        self.network_json = sources.UNSET
 
     def __str__(self):
         root = sources.DataSource.__str__(self)
         mstr = "%s [%s,ver=%s]" % (root, self.dsmode, self.version)
         return mstr
 
-    def _get_url_settings(self):
-        # TODO(harlowja): this is shared with ec2 datasource, we should just
-        # move it to a shared location instead...
-        # Note: the defaults here are different though.
-
-        # max_wait < 0 indicates do not wait
-        max_wait = -1
-        timeout = 10
-        retries = 5
-
-        try:
-            max_wait = int(self.ds_cfg.get("max_wait", max_wait))
-        except Exception:
-            util.logexc(LOG, "Failed to get max wait. using %s", max_wait)
-
-        try:
-            timeout = max(0, int(self.ds_cfg.get("timeout", timeout)))
-        except Exception:
-            util.logexc(LOG, "Failed to get timeout, using %s", timeout)
-
-        try:
-            retries = int(self.ds_cfg.get("retries", retries))
-        except Exception:
-            util.logexc(LOG, "Failed to get retries. using %s", retries)
-
-        return (max_wait, timeout, retries)
-
     def wait_for_metadata_service(self):
         urls = self.ds_cfg.get("metadata_urls", [DEF_MD_URL])
         filtered = [x for x in urls if util.is_resolvable_url(x)]
@@ -86,7 +66,7 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource):
             md_urls.append(md_url)
             url2base[md_url] = url
 
-        (max_wait, timeout, _retries) = self._get_url_settings()
+        (max_wait, timeout, _retries) = self.get_url_params()
         start_time = time.time()
         avail_url = url_helper.wait_for_url(urls=md_urls, max_wait=max_wait,
                                             timeout=timeout)
@@ -99,38 +79,58 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource):
         self.metadata_address = url2base.get(avail_url)
         return bool(avail_url)
 
-    def _get_data(self):
-        try:
-            if not self.wait_for_metadata_service():
-                return False
-        except IOError:
-            return False
+    def check_instance_id(self, sys_cfg):
+        # quickly (local check only) if self.instance_id is still valid
+        return sources.instance_id_matches_system_uuid(self.get_instance_id())
 
-        (_max_wait, timeout, retries) = self._get_url_settings()
+    @property
+    def network_config(self):
+        """Return a network config dict for rendering ENI or netplan files."""
+        if self._network_config != sources.UNSET:
+            return self._network_config
 
-        try:
-            results = util.log_time(LOG.debug,
-                                    'Crawl of openstack metadata service',
-                                    read_metadata_service,
-                                    args=[self.metadata_address],
-                                    kwargs={'ssl_details': self.ssl_details,
-                                            'retries': retries,
-                                            'timeout': timeout})
-        except openstack.NonReadable:
-            return False
-        except (openstack.BrokenMetadata, IOError):
-            util.logexc(LOG, "Broken metadata address %s",
-                        self.metadata_address)
-            return False
+        if self.network_json == sources.UNSET:
+            # this would happen if get_data hadn't been called. leave as UNSET
+            LOG.warning(
+                "Unexpected call to network_config when network_json is None.")
+            return None
+
+        LOG.debug("network config provided via network_json")
+        self._network_config = openstack.convert_net_json(
+            self.network_json, known_macs=None)
+        return self._network_config
+
+    def _get_data(self):
+        """Crawl metadata, parse and persist that data for this instance.
+
+        @return: True when metadata discovered indicates OpenStack datasource.
+            False when unable to contact metadata service or when metadata
+            format is invalid or disabled.
+        """
+        if self.get_network_metadata:  # Setup networking in init-local stage.
+            try:
+                with EphemeralDHCPv4(self.fallback_interface):
+                    results = util.log_time(
+                        logfunc=LOG.debug, msg='Crawl of metadata service',
+                        func=self._crawl_metadata)
+            except (NoDHCPLeaseError, sources.InvalidMetaDataException) as e:
+                util.logexc(LOG, str(e))
+                return False
+        else:
+            try:
+                results = self._crawl_metadata()
+            except sources.InvalidMetaDataException as e:
+                util.logexc(LOG, str(e))
+                return False
 
         self.dsmode = self._determine_dsmode([results.get('dsmode')])
         if self.dsmode == sources.DSMODE_DISABLED:
             return False
-
         md = results.get('metadata', {})
         md = util.mergemanydict([md, DEFAULT_METADATA])
         self.metadata = md
         self.ec2_metadata = results.get('ec2-metadata')
+        self.network_json = results.get('networkdata')
         self.userdata_raw = results.get('userdata')
         self.version = results['version']
         self.files.update(results.get('files', {}))
@@ -145,9 +145,42 @@ class DataSourceOpenStack(openstack.SourceMixin, sources.DataSource):
 
         return True
 
-    def check_instance_id(self, sys_cfg):
-        # quickly (local check only) if self.instance_id is still valid
-        return sources.instance_id_matches_system_uuid(self.get_instance_id())
+    def _crawl_metadata(self):
+        """Crawl metadata service when available.
+
+        @returns: Dictionary with all metadata discovered for this datasource.
+        @raise: InvalidMetaDataException on unreadable or broken
+            metadata.
+        """
+        try:
+            if not self.wait_for_metadata_service():
+                raise sources.InvalidMetaDataException(
+                    'No active metadata service found')
+        except IOError as e:
+            raise sources.InvalidMetaDataException(
+                'IOError contacting metadata service: {error}'.format(
+                    error=str(e)))
+
+        (_max_wait, timeout, retries) = self.get_url_params()
+
+        try:
+            result = util.log_time(
+                LOG.debug, 'Crawl of openstack metadata service',
+                read_metadata_service, args=[self.metadata_address],
+                kwargs={'ssl_details': self.ssl_details, 'retries': retries,
+                        'timeout': timeout})
+        except openstack.NonReadable as e:
+            raise sources.InvalidMetaDataException(str(e))
+        except (openstack.BrokenMetadata, IOError):
+            msg = 'Broken metadata address {addr}'.format(
+                addr=self.metadata_address)
+            raise sources.InvalidMetaDataException(msg)
+        return result
+
+
+class DataSourceOpenStackLocal(DataSourceOpenStack):
+
+    get_network_metadata = True  # Get metadata network config if present
 
 
 def read_metadata_service(base_url, ssl_details=None,
@@ -159,6 +192,7 @@ def read_metadata_service(base_url, ssl_details=None,
 
 # Used to match classes to dependencies
 datasources = [
+    (DataSourceOpenStackLocal, (sources.DEP_FILESYSTEM,)),
     (DataSourceOpenStack, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)),
 ]
 
diff --git a/cloudinit/sources/DataSourceSmartOS.py b/cloudinit/sources/DataSourceSmartOS.py
index fcb46b1..c91e4d5 100644
--- a/cloudinit/sources/DataSourceSmartOS.py
+++ b/cloudinit/sources/DataSourceSmartOS.py
@@ -165,9 +165,8 @@ class DataSourceSmartOS(sources.DataSource):
 
     dsname = "Joyent"
 
-    _unset = "_unset"
-    smartos_type = _unset
-    md_client = _unset
+    smartos_type = sources.UNSET
+    md_client = sources.UNSET
 
     def __init__(self, sys_cfg, distro, paths):
         sources.DataSource.__init__(self, sys_cfg, distro, paths)
@@ -189,12 +188,12 @@ class DataSourceSmartOS(sources.DataSource):
         return "%s [client=%s]" % (root, self.md_client)
 
     def _init(self):
-        if self.smartos_type == self._unset:
+        if self.smartos_type == sources.UNSET:
             self.smartos_type = get_smartos_environ()
             if self.smartos_type is None:
                 self.md_client = None
 
-        if self.md_client == self._unset:
+        if self.md_client == sources.UNSET:
             self.md_client = jmc_client_factory(
                 smartos_type=self.smartos_type,
                 metadata_sockfile=self.ds_cfg['metadata_sockfile'],
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index df0b374..b3ff9fb 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -17,6 +17,7 @@ import six
 from cloudinit.atomic_helper import write_json
 from cloudinit import importer
 from cloudinit import log as logging
+from cloudinit import net
 from cloudinit import type_utils
 from cloudinit import user_data as ud
 from cloudinit import util
@@ -41,6 +42,8 @@ INSTANCE_JSON_FILE = 'instance-data.json'
 # Key which can be provide a cloud's official product name to cloud-init
 METADATA_CLOUD_NAME_KEY = 'cloud-name'
 
+UNSET = "_unset"
+
 LOG = logging.getLogger(__name__)
 
 
@@ -48,6 +51,11 @@ class DataSourceNotFoundException(Exception):
     pass
 
 
+class InvalidMetaDataException(Exception):
+    """Raised when metadata is broken, unavailable or disabled."""
+    pass
+
+
 def process_base64_metadata(metadata, key_path=''):
     """Strip ci-b64 prefix and return metadata with base64-encoded-keys set."""
     md_copy = copy.deepcopy(metadata)
@@ -81,6 +89,14 @@ class DataSource(object):
     # Cached cloud_name as determined by _get_cloud_name
     _cloud_name = None
 
+    # Track the discovered fallback nic for use in configuration generation.
+    _fallback_interface = None
+
+    # read_url_params
+    url_max_wait = -1   # max_wait < 0 means do not wait
+    url_timeout = 10    # timeout for each metadata url read attempt
+    url_retries = 5     # number of times to retry url upon 404
+
     def __init__(self, sys_cfg, distro, paths, ud_proc=None):
         self.sys_cfg = sys_cfg
         self.distro = distro
@@ -128,6 +144,14 @@ class DataSource(object):
                 'meta-data': self.metadata,
                 'user-data': self.get_userdata_raw(),
                 'vendor-data': self.get_vendordata_raw()}}
+        if hasattr(self, 'network_json'):
+            network_json = getattr(self, 'network_json')
+            if network_json != UNSET:
+                instance_data['ds']['network_json'] = network_json
+        if hasattr(self, 'ec2_metadata'):
+            ec2_metadata = getattr(self, 'ec2_metadata')
+            if ec2_metadata != UNSET:
+                instance_data['ds']['ec2_metadata'] = ec2_metadata
         instance_data.update(
             self._get_standardized_metadata())
         try:
@@ -149,6 +173,41 @@ class DataSource(object):
             'Subclasses of DataSource must implement _get_data which'
             ' sets self.metadata, vendordata_raw and userdata_raw.')
 
+    def get_url_params(self):
+        """Return the Datasource's prefered url_read parameters.
+
+        Subclasses may override url_max_wait, url_timeout, url_retries.
+
+        @return: A 3-tuple of  max_wait_seconds, timeout_seconds, num_retries.
+        """
+        max_wait = self.url_max_wait
+        try:
+            max_wait = int(self.ds_cfg.get("max_wait", self.url_max_wait))
+        except ValueError:
+            util.logexc(
+                LOG, "Config max_wait '%s' is not an int, using default '%s'",
+                self.ds_cfg.get("max_wait"), max_wait)
+
+        timeout = self.url_timeout
+        try:
+            timeout = max(
+                0, int(self.ds_cfg.get("timeout", self.url_timeout)))
+        except ValueError:
+            timeout = self.url_timeout
+            util.logexc(
+                LOG, "Config timeout '%s' is not an int, using default '%s'",
+                self.ds_cfg.get('timeout'), timeout)
+
+        retries = self.url_retries
+        try:
+            retries = int(self.ds_cfg.get("retries", self.url_retries))
+        except Exception:
+            util.logexc(
+                LOG, "Config retries '%s' is not an int, using default '%s'",
+                self.ds_cfg.get('retries'), retries)
+
+        return (max_wait, timeout, retries)
+
     def get_userdata(self, apply_filter=False):
         if self.userdata is None:
             self.userdata = self.ud_proc.process(self.get_userdata_raw())
@@ -162,6 +221,24 @@ class DataSource(object):
         return self.vendordata
 
     @property
+    def fallback_interface(self):
+        """Determine the network interface used during local network config."""
+        if self._fallback_interface is None:
+            # fallback_nic was used at one point, so restored objects may
+            # have an attribute there. respect that if found.
+            _legacy_fbnic = getattr(self, 'fallback_nic', None)
+            if _legacy_fbnic:
+                self._fallback_interface = _legacy_fbnic
+                self.fallback_nic = None
+            else:
+                self._fallback_interface = net.find_fallback_nic()
+                if self._fallback_interface is None:
+                    LOG.warning(
+                        "Did not find a fallback interface on %s.",
+                        self.cloud_name)
+        return self._fallback_interface
+
+    @property
     def cloud_name(self):
         """Return lowercase cloud name as determined by the datasource.
 
diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py
index 452e921..27f0e8d 100644
--- a/cloudinit/sources/tests/test_init.py
+++ b/cloudinit/sources/tests/test_init.py
@@ -17,6 +17,7 @@ from cloudinit import util
 class DataSourceTestSubclassNet(DataSource):
 
     dsname = 'MyTestSubclass'
+    url_max_wait = 55
 
     def __init__(self, sys_cfg, distro, paths, custom_userdata=None):
         super(DataSourceTestSubclassNet, self).__init__(
@@ -70,8 +71,7 @@ class TestDataSource(CiTestCase):
         """Init uses DataSource.dsname for sourcing ds_cfg."""
         sys_cfg = {'datasource': {'MyTestSubclass': {'key2': False}}}
         distro = 'distrotest'  # generally should be a Distro object
-        paths = Paths({})
-        datasource = DataSourceTestSubclassNet(sys_cfg, distro, paths)
+        datasource = DataSourceTestSubclassNet(sys_cfg, distro, self.paths)
         self.assertEqual({'key2': False}, datasource.ds_cfg)
 
     def test_str_is_classname(self):
@@ -81,6 +81,67 @@ class TestDataSource(CiTestCase):
             'DataSourceTestSubclassNet',
             str(DataSourceTestSubclassNet('', '', self.paths)))
 
+    def test_datasource_get_url_params_defaults(self):
+        """get_url_params default url config settings for the datasource."""
+        expected = (self.datasource.url_max_wait, self.datasource.url_timeout,
+                    self.datasource.url_retries)
+        self.assertEqual(expected, self.datasource.get_url_params())
+
+    def test_datasource_get_url_params_subclassed(self):
+        """Subclasses can override get_url_params defaults."""
+        sys_cfg = {'datasource': {'MyTestSubclass': {'key2': False}}}
+        distro = 'distrotest'  # generally should be a Distro object
+        datasource = DataSourceTestSubclassNet(sys_cfg, distro, self.paths)
+        expected = (datasource.url_max_wait, datasource.url_timeout,
+                    datasource.url_retries)
+        url_params = datasource.get_url_params()
+        self.assertNotEqual(self.datasource.get_url_params(), url_params)
+        self.assertEqual(expected, url_params)
+
+    def test_datasource_get_url_params_ds_config_override(self):
+        """Datasource configuration options can override url param defaults."""
+        sys_cfg = {
+            'datasource': {
+                'MyTestSubclass': {
+                    'max_wait': '1', 'timeout': '2', 'retries': '3'}}}
+        datasource = DataSourceTestSubclassNet(
+            sys_cfg, self.distro, self.paths)
+        expected = (1, 2, 3)
+        url_params = datasource.get_url_params()
+        self.assertNotEqual(
+            (datasource.url_max_wait, datasource.url_timeout,
+             datasource.url_retries),
+            url_params)
+        self.assertEqual(expected, url_params)
+
+    def test_datasource_get_url_params_is_zero_or_greater(self):
+        """get_url_params ignores timeouts with a value below 0."""
+        # Set an override that is below 0 which gets ignored.
+        sys_cfg = {'datasource': {'_undef': {'timeout': '-1'}}}
+        datasource = DataSource(sys_cfg, self.distro, self.paths)
+        (_max_wait, timeout, _retries) = datasource.get_url_params()
+        self.assertEqual(0, timeout)
+
+    def test_datasource_get_url_uses_defaults_on_errors(self):
+        """On invalid system config values for url_params defaults are used."""
+        # All invalid values should be logged
+        sys_cfg = {'datasource': {
+            '_undef': {
+                'max_wait': 'nope', 'timeout': 'bug', 'retries': 'nonint'}}}
+        datasource = DataSource(sys_cfg, self.distro, self.paths)
+        url_params = datasource.get_url_params()
+        expected = (datasource.url_max_wait, datasource.url_timeout,
+                    datasource.url_retries)
+        self.assertEqual(expected, url_params)
+        logs = self.logs.getvalue()
+        expected_logs = [
+            "Config max_wait 'nope' is not an int, using default '-1'",
+            "Config timeout 'bug' is not an int, using default '10'",
+            "Config retries 'nonint' is not an int, using default '5'",
+        ]
+        for log in expected_logs:
+            self.assertIn(log, logs)
+
     def test__get_data_unimplemented(self):
         """Raise an error when _get_data is not implemented."""
         with self.assertRaises(NotImplementedError) as context_manager:
diff --git a/tests/unittests/test_datasource/test_common.py b/tests/unittests/test_datasource/test_common.py
index ec33388..0d35dc2 100644
--- a/tests/unittests/test_datasource/test_common.py
+++ b/tests/unittests/test_datasource/test_common.py
@@ -40,6 +40,7 @@ DEFAULT_LOCAL = [
     OVF.DataSourceOVF,
     SmartOS.DataSourceSmartOS,
     Ec2.DataSourceEc2Local,
+    OpenStack.DataSourceOpenStackLocal,
 ]
 
 DEFAULT_NETWORK = [
diff --git a/tests/unittests/test_datasource/test_openstack.py b/tests/unittests/test_datasource/test_openstack.py
index 42c3155..d577acc 100644
--- a/tests/unittests/test_datasource/test_openstack.py
+++ b/tests/unittests/test_datasource/test_openstack.py
@@ -16,7 +16,7 @@ from six import StringIO
 
 from cloudinit import helpers
 from cloudinit import settings
-from cloudinit.sources import convert_vendordata
+from cloudinit.sources import convert_vendordata, UNSET
 from cloudinit.sources import DataSourceOpenStack as ds
 from cloudinit.sources.helpers import openstack
 from cloudinit import util
@@ -129,6 +129,8 @@ def _read_metadata_service():
 
 
 class TestOpenStackDataSource(test_helpers.HttprettyTestCase):
+
+    with_logs = True
     VERSION = 'latest'
 
     def setUp(self):
@@ -232,11 +234,11 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase):
         self.assertRaises(openstack.BrokenMetadata, _read_metadata_service)
 
     @hp.activate
-    def test_datasource(self):
+    @test_helpers.mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery')
+    def test_datasource(self, m_dhcp):
         _register_uris(self.VERSION, EC2_FILES, EC2_META, OS_FILES)
-        ds_os = ds.DataSourceOpenStack(settings.CFG_BUILTIN,
-                                       None,
-                                       helpers.Paths({'run_dir': self.tmp}))
+        ds_os = ds.DataSourceOpenStack(
+            settings.CFG_BUILTIN, None, helpers.Paths({'run_dir': self.tmp}))
         self.assertIsNone(ds_os.version)
         found = ds_os.get_data()
         self.assertTrue(found)
@@ -250,6 +252,36 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase):
         self.assertEqual(2, len(ds_os.files))
         self.assertEqual(VENDOR_DATA, ds_os.vendordata_pure)
         self.assertIsNone(ds_os.vendordata_raw)
+        m_dhcp.assert_not_called()
+
+    @hp.activate
+    @test_helpers.mock.patch('cloudinit.net.dhcp.EphemeralIPv4Network')
+    @test_helpers.mock.patch('cloudinit.net.dhcp.maybe_perform_dhcp_discovery')
+    def test_local_datasource(self, m_dhcp, m_net):
+        """OpenStackLocal calls EphemeralDHCPNetwork and gets instance data."""
+        _register_uris(self.VERSION, EC2_FILES, EC2_META, OS_FILES)
+        ds_os_local = ds.DataSourceOpenStackLocal(
+            settings.CFG_BUILTIN, None, helpers.Paths({'run_dir': self.tmp}))
+        ds_os_local._fallback_interface = 'eth9'  # Monkey patch for dhcp
+        m_dhcp.return_value = [{
+            'interface': 'eth9', 'fixed-address': '192.168.2.9',
+            'routers': '192.168.2.1', 'subnet-mask': '255.255.255.0',
+            'broadcast-address': '192.168.2.255'}]
+
+        self.assertIsNone(ds_os_local.version)
+        found = ds_os_local.get_data()
+        self.assertTrue(found)
+        self.assertEqual(2, ds_os_local.version)
+        md = dict(ds_os_local.metadata)
+        md.pop('instance-id', None)
+        md.pop('local-hostname', None)
+        self.assertEqual(OSTACK_META, md)
+        self.assertEqual(EC2_META, ds_os_local.ec2_metadata)
+        self.assertEqual(USER_DATA, ds_os_local.userdata_raw)
+        self.assertEqual(2, len(ds_os_local.files))
+        self.assertEqual(VENDOR_DATA, ds_os_local.vendordata_pure)
+        self.assertIsNone(ds_os_local.vendordata_raw)
+        m_dhcp.assert_called_with('eth9')
 
     @hp.activate
     def test_bad_datasource_meta(self):
@@ -265,6 +297,10 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase):
         found = ds_os.get_data()
         self.assertFalse(found)
         self.assertIsNone(ds_os.version)
+        self.assertIn(
+            'InvalidMetaDataException: Broken metadata address'
+            ' http://169.254.169.25',
+            self.logs.getvalue())
 
     @hp.activate
     def test_no_datasource(self):
@@ -308,6 +344,35 @@ class TestOpenStackDataSource(test_helpers.HttprettyTestCase):
         self.assertFalse(found)
         self.assertIsNone(ds_os.version)
 
+    @hp.activate
+    def test_wb__crawl_metadata_does_not_persist(self):
+        """_crawl_metadata returns current metadata and does not cache."""
+        _register_uris(self.VERSION, EC2_FILES, EC2_META, OS_FILES)
+        ds_os = ds.DataSourceOpenStack(
+            settings.CFG_BUILTIN, None, helpers.Paths({'run_dir': self.tmp}))
+        crawled_data = ds_os._crawl_metadata()
+        self.assertEqual(UNSET, ds_os.ec2_metadata)
+        self.assertIsNone(ds_os.userdata_raw)
+        self.assertEqual(0, len(ds_os.files))
+        self.assertIsNone(ds_os.vendordata_raw)
+        self.assertEqual(
+            ['dsmode', 'ec2-metadata', 'files', 'metadata', 'networkdata',
+             'userdata', 'vendordata', 'version'],
+            sorted(crawled_data.keys()))
+        self.assertEqual('local', crawled_data['dsmode'])
+        self.assertEqual(EC2_META, crawled_data['ec2-metadata'])
+        self.assertEqual(2, len(crawled_data['files']))
+        md = copy.deepcopy(crawled_data['metadata'])
+        md.pop('instance-id')
+        md.pop('local-hostname')
+        self.assertEqual(OSTACK_META, md)
+        self.assertEqual(
+            json.loads(OS_FILES['openstack/latest/network_data.json']),
+            crawled_data['networkdata'])
+        self.assertEqual(USER_DATA, crawled_data['userdata'])
+        self.assertEqual(VENDOR_DATA, crawled_data['vendordata'])
+        self.assertEqual(2, crawled_data['version'])
+
 
 class TestVendorDataLoading(test_helpers.TestCase):
     def cvj(self, data):

Follow ups