← Back to team overview

cloud-init-dev team mailing list archive

[Merge] ~raharper/cloud-init:fix/lp-1766287-detect-unstable-names into cloud-init:master

 

Ryan Harper has proposed merging ~raharper/cloud-init:fix/lp-1766287-detect-unstable-names into cloud-init:master.

Commit message:
net: detect unstable network names and trigger a settle if needed

The cloud-init-local.service expects that any network device name
changes have already been completed by the kernel or udev daemon.

In some situations we've found that the renaming of interfaces from
kernel names (eth0, eth1, etc) to their persistent names (eno1, ens3,
enp0s1, etc) may happen after cloud-init-local has started where it
reads values from sysfs about what network devices are present, and
which device to use as a fallback nic.

Subsequently, cloud-init-local would write out network configuration
for a kernel device name which would no longer be present by the time
that networking services start to bring up the devices. The result is
that the instance does not get networking configured. Prior to use of
systemd-networkd, the Ubuntu 'networking.service' unit included a call
to udevadm settle which is why this race is not seen on a Xenial
system.

This change adds the ability to detect if an interface has a stable
name, if if we find one without stable names and stable names have not
been disabled (net.ifnames=0 in /proc/cmdline), then cloud-init will
invoke udevadm settle.

LP: #1766287



Requested reviews:
  cloud-init commiters (cloud-init-dev)
Related bugs:
  Bug #1766287 in cloud-init: "18.04 minimal images on GCE intermittently fail to set up networking "
  https://bugs.launchpad.net/cloud-init/+bug/1766287

For more details, see:
https://code.launchpad.net/~raharper/cloud-init/+git/cloud-init/+merge/344339
-- 
Your team cloud-init commiters is requested to review the proposed merge of ~raharper/cloud-init:fix/lp-1766287-detect-unstable-names into cloud-init:master.
diff --git a/cloudinit/net/__init__.py b/cloudinit/net/__init__.py
index 8005454..d8f7dfa 100644
--- a/cloudinit/net/__init__.py
+++ b/cloudinit/net/__init__.py
@@ -107,6 +107,13 @@ def is_bond(devname):
     return os.path.exists(sys_dev_path(devname, "bonding"))
 
 
+def is_renamed(devname):
+    name_assign_type = read_sys_net_safe(devname, 'name_assign_type')
+    if name_assign_type and name_assign_type == '4':
+        return True
+    return False
+
+
 def is_vlan(devname):
     uevent = str(read_sys_net_safe(devname, "uevent"))
     return 'DEVTYPE=vlan' in uevent.splitlines()
@@ -180,6 +187,18 @@ def find_fallback_nic(blacklist_drivers=None):
     if not blacklist_drivers:
         blacklist_drivers = []
 
+    if 'net.ifnames=0' in util.get_cmdline():
+        LOG.debug('Stable ifnames disabled by net.ifnames=0 in /proc/cmdline')
+    else:
+        unstable = [device for device in get_devicelist()
+                    if device not in ['lo'] and not is_renamed(device)]
+        if len(unstable):
+            LOG.debug('Found unstable nic names: %s; calling udevadm settle',
+                      unstable)
+            msg = 'WARK: Waiting for udev events to settle'
+            util.log_time(LOG.debug, msg, func=util.subp,
+                          args=[['udevadm', 'settle']])
+
     # get list of interfaces that could have connections
     invalid_interfaces = set(['lo'])
     potential_interfaces = set([device for device in get_devicelist()
diff --git a/tests/unittests/test_net.py b/tests/unittests/test_net.py
index c12a487..7e47961 100644
--- a/tests/unittests/test_net.py
+++ b/tests/unittests/test_net.py
@@ -1405,6 +1405,7 @@ DEFAULT_DEV_ATTRS = {
         "address": "07-1C-C6-75-A4-BE",
         "device/driver": None,
         "device/device": None,
+        "name_assign_type": "4",
     }
 }
 
@@ -1452,11 +1453,14 @@ class TestGenerateFallbackConfig(CiTestCase):
             'eth0': {
                 'bridge': False, 'carrier': False, 'dormant': False,
                 'operstate': 'down', 'address': '00:11:22:33:44:55',
-                'device/driver': 'hv_netsvc', 'device/device': '0x3'},
+                'device/driver': 'hv_netsvc', 'device/device': '0x3',
+                'name_assign_type': '4'},
             'eth1': {
                 'bridge': False, 'carrier': False, 'dormant': False,
                 'operstate': 'down', 'address': '00:11:22:33:44:55',
-                'device/driver': 'mlx4_core', 'device/device': '0x7'},
+                'device/driver': 'mlx4_core', 'device/device': '0x7',
+                'name_assign_type': '4'},
+
         }
 
         tmp_dir = self.tmp_dir()
@@ -1512,11 +1516,13 @@ iface eth0 inet dhcp
             'eth1': {
                 'bridge': False, 'carrier': False, 'dormant': False,
                 'operstate': 'down', 'address': '00:11:22:33:44:55',
-                'device/driver': 'hv_netsvc', 'device/device': '0x3'},
+                'device/driver': 'hv_netsvc', 'device/device': '0x3',
+                'name_assign_type': '4'},
             'eth0': {
                 'bridge': False, 'carrier': False, 'dormant': False,
                 'operstate': 'down', 'address': '00:11:22:33:44:55',
-                'device/driver': 'mlx4_core', 'device/device': '0x7'},
+                'device/driver': 'mlx4_core', 'device/device': '0x7',
+                'name_assign_type': '4'},
         }
 
         tmp_dir = self.tmp_dir()
@@ -1565,6 +1571,65 @@ iface eth1 inet dhcp
         ]
         self.assertEqual(", ".join(expected_rule) + '\n', contents.lstrip())
 
+    @mock.patch("cloudinit.util.subp")
+    @mock.patch("cloudinit.net.sys_dev_path")
+    @mock.patch("cloudinit.net.read_sys_net")
+    @mock.patch("cloudinit.net.get_devicelist")
+    def test_unstable_names(self, mock_get_devicelist, mock_read_sys_net,
+                            mock_sys_dev_path, mock_subp):
+        """verify that udevadm settle is called when we find unstable names"""
+        devices = {
+            'eth0': {
+                'bridge': False, 'carrier': False, 'dormant': False,
+                'operstate': 'down', 'address': '00:11:22:33:44:55',
+                'device/driver': 'hv_netsvc', 'device/device': '0x3',
+                'name_assign_type': False},
+            'ens4': {
+                'bridge': False, 'carrier': False, 'dormant': False,
+                'operstate': 'down', 'address': '00:11:22:33:44:55',
+                'device/driver': 'mlx4_core', 'device/device': '0x7',
+                'name_assign_type': '4'},
+
+        }
+
+        tmp_dir = self.tmp_dir()
+        _setup_test(tmp_dir, mock_get_devicelist,
+                    mock_read_sys_net, mock_sys_dev_path,
+                    dev_attrs=devices)
+        net.generate_fallback_config(config_driver=True)
+        mock_subp.assert_called_with(['udevadm', 'settle'])
+
+    @mock.patch("cloudinit.util.get_cmdline")
+    @mock.patch("cloudinit.util.subp")
+    @mock.patch("cloudinit.net.sys_dev_path")
+    @mock.patch("cloudinit.net.read_sys_net")
+    @mock.patch("cloudinit.net.get_devicelist")
+    def test_unstable_names_disabled(self, mock_get_devicelist,
+                                     mock_read_sys_net, mock_sys_dev_path,
+                                     mock_subp, m_get_cmdline):
+        """verify udevadm settle not called when cmdline has net.ifnames=0"""
+        devices = {
+            'eth0': {
+                'bridge': False, 'carrier': False, 'dormant': False,
+                'operstate': 'down', 'address': '00:11:22:33:44:55',
+                'device/driver': 'hv_netsvc', 'device/device': '0x3',
+                'name_assign_type': False},
+            'ens4': {
+                'bridge': False, 'carrier': False, 'dormant': False,
+                'operstate': 'down', 'address': '00:11:22:33:44:55',
+                'device/driver': 'mlx4_core', 'device/device': '0x7',
+                'name_assign_type': '4'},
+
+        }
+
+        m_get_cmdline.return_value = 'net.ifnames=0'
+        tmp_dir = self.tmp_dir()
+        _setup_test(tmp_dir, mock_get_devicelist,
+                    mock_read_sys_net, mock_sys_dev_path,
+                    dev_attrs=devices)
+        net.generate_fallback_config(config_driver=True)
+        self.assertEqual(0, mock_subp.call_count)
+
 
 class TestSysConfigRendering(CiTestCase):
 

Follow ups