← Back to team overview

cloud-init-dev team mailing list archive

[Merge] ~raharper/cloud-init:fix/net-wait-for-interfaces into cloud-init:master

 

Ryan Harper has proposed merging ~raharper/cloud-init:fix/net-wait-for-interfaces into cloud-init:master.

Commit message:
net: update net sequence, include wait on netdevs, opensuse netrules path

On systems with many interfaces, processing udev events may take a while.
Cloud-init expects devices included in a provided network-configuration
to be present when attempting to configure them.  This patch adds a step
in net configuration where it will check for devices provided in the 
configuration and if not found, issue udevadm settle commands to wait
for them to appear.

Additionally, the default path for udev persistent network rules
70-persistent-net.rules may also be written to systems which include
the 75-net-generator.rules.  During boot, cloud-init and the
generator may race and interleave values causing issues.  OpenSUSE
will now use a newer file, 85-persistent-net-cloud-init.rules which
will take precedence over values created by 75-net-generator and
avoid collisions on the same file.

LP: #1817368

Requested reviews:
  cloud-init commiters (cloud-init-dev)

For more details, see:
https://code.launchpad.net/~raharper/cloud-init/+git/cloud-init/+merge/366667
-- 
Your team cloud-init commiters is requested to review the proposed merge of ~raharper/cloud-init:fix/net-wait-for-interfaces into cloud-init:master.
diff --git a/cloudinit/distros/opensuse.py b/cloudinit/distros/opensuse.py
index 1bfe047..e41e2f7 100644
--- a/cloudinit/distros/opensuse.py
+++ b/cloudinit/distros/opensuse.py
@@ -38,6 +38,8 @@ class Distro(distros.Distro):
         'sysconfig': {
             'control': 'etc/sysconfig/network/config',
             'iface_templates': '%(base)s/network/ifcfg-%(name)s',
+            'netrules_path': (
+                'etc/udev/rules.d/85-persistent-net-cloud-init.rules'),
             'route_templates': {
                 'ipv4': '%(base)s/network/ifroute-%(name)s',
                 'ipv6': '%(base)s/network/ifroute-%(name)s',
diff --git a/cloudinit/net/__init__.py b/cloudinit/net/__init__.py
index 3642fb1..b08ff1b 100644
--- a/cloudinit/net/__init__.py
+++ b/cloudinit/net/__init__.py
@@ -9,6 +9,7 @@ import errno
 import logging
 import os
 import re
+from functools import partial
 
 from cloudinit.net.network_state import mask_to_net_prefix
 from cloudinit import util
@@ -292,18 +293,10 @@ def generate_fallback_config(blacklist_drivers=None, config_driver=None):
         return None
 
 
-def apply_network_config_names(netcfg, strict_present=True, strict_busy=True):
-    """read the network config and rename devices accordingly.
-    if strict_present is false, then do not raise exception if no devices
-    match.  if strict_busy is false, then do not raise exception if the
-    device cannot be renamed because it is currently configured.
-
-    renames are only attempted for interfaces of type 'physical'.  It is
-    expected that the network system will create other devices with the
-    correct name in place."""
+def extract_physdevs(netcfg):
 
     def _version_1(netcfg):
-        renames = []
+        physdevs = []
         for ent in netcfg.get('config', {}):
             if ent.get('type') != 'physical':
                 continue
@@ -317,11 +310,11 @@ def apply_network_config_names(netcfg, strict_present=True, strict_busy=True):
                 driver = device_driver(name)
             if not device_id:
                 device_id = device_devid(name)
-            renames.append([mac, name, driver, device_id])
-        return renames
+            physdevs.append([mac, name, driver, device_id])
+        return physdevs
 
     def _version_2(netcfg):
-        renames = []
+        physdevs = []
         for ent in netcfg.get('ethernets', {}).values():
             # only rename if configured to do so
             name = ent.get('set-name')
@@ -337,16 +330,71 @@ def apply_network_config_names(netcfg, strict_present=True, strict_busy=True):
                 driver = device_driver(name)
             if not device_id:
                 device_id = device_devid(name)
-            renames.append([mac, name, driver, device_id])
-        return renames
+            physdevs.append([mac, name, driver, device_id])
+        return physdevs
+
+    version = netcfg.get('version')
+    if version == 1:
+        return _version_1(netcfg)
+    elif version == 2:
+        return _version_2(netcfg)
+
+    raise RuntimeError('Unknown network config version: %s' % version)
+
+
+def wait_for_physdevs(netcfg, strict=True):
+    physdevs = extract_physdevs(netcfg)
+
+    # set of expected mac addrs
+    expected_ifaces = dict([(iface[0], iface[1]) for iface in physdevs])
+    expected_macs = set(expected_ifaces.keys())
+
+    # set of current macs
+    present_macs = get_interfaces_by_mac().keys()
+
+    # compare the set of expected mac address values to
+    # the current macs present; we only check MAC as cloud-init
+    # has not yet renamed interfaces and the netcfg may include
+    # such renames.
+    for _ in range(0, 5):
+        if expected_macs.issubset(present_macs):
+            LOG.debug('net: all expected physical devices present')
+            return
+
+        missing = expected_macs.difference(present_macs)
+        LOG.debug('net: waiting for expected net devices: %s', missing)
+        for mac in missing:
+            # trigger a settle, unless this interface exists
+            syspath = sys_dev_path(expected_ifaces[mac])
+            settle = partial(util.udevadm_settle, exists=syspath)
+            msg = 'Waiting for udev events to settle or %s exists' % syspath
+            util.log_time(LOG.debug, msg, func=settle)
+
+        # update present_macs after settles
+        present_macs = get_interfaces_by_mac().keys()
+
+    msg = 'Not all expected physical devices present: %s' % missing
+    LOG.warning(msg)
+    if strict:
+        raise RuntimeError(msg)
+
+
+def apply_network_config_names(netcfg, strict_present=True, strict_busy=True):
+    """read the network config and rename devices accordingly.
+    if strict_present is false, then do not raise exception if no devices
+    match.  if strict_busy is false, then do not raise exception if the
+    device cannot be renamed because it is currently configured.
+
+    renames are only attempted for interfaces of type 'physical'.  It is
+    expected that the network system will create other devices with the
+    correct name in place."""
 
-    if netcfg.get('version') == 1:
-        return _rename_interfaces(_version_1(netcfg))
-    elif netcfg.get('version') == 2:
-        return _rename_interfaces(_version_2(netcfg))
+    try:
+        _rename_interfaces(extract_physdevs(netcfg))
+    except RuntimeError as e:
+        raise RuntimeError('Failed to apply network config names: %s' % e)
 
-    raise RuntimeError('Failed to apply network config names. Found bad'
-                       ' network config version: %s' % netcfg.get('version'))
+    return
 
 
 def interface_has_own_mac(ifname, strict=False):
diff --git a/cloudinit/stages.py b/cloudinit/stages.py
index da7d349..a34ea55 100644
--- a/cloudinit/stages.py
+++ b/cloudinit/stages.py
@@ -644,18 +644,21 @@ class Init(object):
                 return (ncfg, loc)
         return (self.distro.generate_fallback_config(), "fallback")
 
-    def apply_network_config(self, bring_up):
-        netcfg, src = self._find_networking_config()
-        if netcfg is None:
-            LOG.info("network config is disabled by %s", src)
-            return
-
+    def _apply_netcfg_names(self, netcfg):
         try:
             LOG.debug("applying net config names for %s", netcfg)
             self.distro.apply_network_config_names(netcfg)
         except Exception as e:
             LOG.warning("Failed to rename devices: %s", e)
 
+    def apply_network_config(self, bring_up):
+        # get a network config
+        netcfg, src = self._find_networking_config()
+        if netcfg is None:
+            LOG.info("network config is disabled by %s", src)
+            return
+
+        # request an update if needed/available
         if self.datasource is not NULL_DATA_SOURCE:
             if not self.is_new_instance():
                 if not self.datasource.update_metadata([EventType.BOOT]):
@@ -663,8 +666,19 @@ class Init(object):
                         "No network config applied. Neither a new instance"
                         " nor datasource network update on '%s' event",
                         EventType.BOOT)
-                    return
+                    # nothing new, but ensure proper names
+                    return self._apply_netcfg_names(netcfg)
+                else:
+                    # refresh netcfg after update
+                    netcfg, src = self._find_networking_config()
+
+        # ensure all physical devices in config are present
+        net.wait_for_physdevs(netcfg)
+
+        # apply renames from config
+        self._apply_netcfg_names(netcfg)
 
+        # rendering config
         LOG.info("Applying network configuration from %s bringup=%s: %s",
                  src, bring_up, netcfg)
         try:
diff --git a/cloudinit/tests/test_stages.py b/cloudinit/tests/test_stages.py
index 94b6b25..325a1ab 100644
--- a/cloudinit/tests/test_stages.py
+++ b/cloudinit/tests/test_stages.py
@@ -37,6 +37,7 @@ class FakeDataSource(sources.DataSource):
 
 class TestInit(CiTestCase):
     with_logs = True
+    allowed_subp = True
 
     def setUp(self):
         super(TestInit, self).setUp()
@@ -166,8 +167,9 @@ class TestInit(CiTestCase):
             'INFO: network config is disabled by %s' % disable_file,
             self.logs.getvalue())
 
+    @mock.patch('cloudinit.net.get_interfaces_by_mac')
     @mock.patch('cloudinit.distros.ubuntu.Distro')
-    def test_apply_network_on_new_instance(self, m_ubuntu):
+    def test_apply_network_on_new_instance(self, m_ubuntu, m_macs):
         """Call distro apply_network_config methods on is_new_instance."""
         net_cfg = {
             'version': 1, 'config': [
@@ -177,6 +179,8 @@ class TestInit(CiTestCase):
         def fake_network_config():
             return net_cfg, 'fallback'
 
+        m_macs.return_value = {'42:42:42:42:42:42': 'eth9'}
+
         self.init._find_networking_config = fake_network_config
         self.init.apply_network_config(True)
         self.init.distro.apply_network_config_names.assert_called_with(net_cfg)
@@ -206,8 +210,9 @@ class TestInit(CiTestCase):
             " nor datasource network update on '%s' event" % EventType.BOOT,
             self.logs.getvalue())
 
+    @mock.patch('cloudinit.net.get_interfaces_by_mac')
     @mock.patch('cloudinit.distros.ubuntu.Distro')
-    def test_apply_network_on_datasource_allowed_event(self, m_ubuntu):
+    def test_apply_network_on_datasource_allowed_event(self, m_ubuntu, m_macs):
         """Apply network if datasource.update_metadata permits BOOT event."""
         old_instance_id = os.path.join(
             self.init.paths.get_cpath('data'), 'instance-id')
@@ -220,6 +225,8 @@ class TestInit(CiTestCase):
         def fake_network_config():
             return net_cfg, 'fallback'
 
+        m_macs.return_value = {'42:42:42:42:42:42': 'eth9'}
+
         self.init._find_networking_config = fake_network_config
         self.init.datasource = FakeDataSource(paths=self.init.paths)
         self.init.datasource.update_events = {'network': [EventType.BOOT]}
diff --git a/tests/unittests/test_handler/test_handler_mounts.py b/tests/unittests/test_handler/test_handler_mounts.py
index 8fea6c2..e2e36e3 100644
--- a/tests/unittests/test_handler/test_handler_mounts.py
+++ b/tests/unittests/test_handler/test_handler_mounts.py
@@ -182,6 +182,7 @@ class TestFstabHandling(test_helpers.FilesystemMockingTestCase):
             '%s\tnone\tswap\tsw,comment=cloudconfig\t'
             '0\t0\n' % (self.swap_path,)
         )
+        self.mock_util_subp.return_value = ("", "")
 
         with open(cc_mounts.FSTAB_PATH, 'w') as fd:
             fd.write(fstab_original_content)
@@ -220,10 +221,12 @@ class TestFstabHandling(test_helpers.FilesystemMockingTestCase):
             '%s\tnone\tswap\tsw,comment=cloudconfig\t'
             '0\t0\n' % (self.swap_path,)
         )
+        self.mock_util_subp.return_value = ("", "")
 
         with open(cc_mounts.FSTAB_PATH, 'w') as fd:
             fd.write(fstab_original_content)
 
+
         cc_mounts.handle(None, {}, self.mock_cloud, self.mock_log, [])
 
         with open(cc_mounts.FSTAB_PATH, 'r') as fd:

Follow ups