curtin-dev team mailing list archive
-
curtin-dev team
-
Mailing list archive
-
Message #03721
[Merge] ~ogayot/curtin:netplan-nbft into curtin:master
Olivier Gayot has proposed merging ~ogayot/curtin:netplan-nbft into curtin:master.
Commit message:
do not squash
Requested reviews:
curtin developers (curtin-dev)
Related bugs:
Bug #2084012 in curtin: "Network disconnect when booting the NVMe/TCP installed system"
https://bugs.launchpad.net/curtin/+bug/2084012
For more details, see:
https://code.launchpad.net/~ogayot/curtin/+git/curtin/+merge/475635
During NVMe/TCP boot with NBFT, dracut renames the "main" network interface to nbft0. This confuses netplan which expects the network interface to be around with its original name. Furthermore, after pivoting to the actual root FS, netplan (well, technically the backend that netplan uses) wants to reapply the network configuration. This is a problem with NVMe/TCP because the network interface should never be brought down at runtime.
To address this issue, we now instruct netplan to match the configuration to the HW address (i.e., the MAC address) instead of the network interface name. We also mark the interface as "critical: true" to ensure that the netplan backend does not bring it down.
LP: #2084012
--
Your team curtin developers is requested to review the proposed merge of ~ogayot/curtin:netplan-nbft into curtin:master.
diff --git a/curtin/commands/curthooks.py b/curtin/commands/curthooks.py
index 4bc3614..d17a597 100644
--- a/curtin/commands/curthooks.py
+++ b/curtin/commands/curthooks.py
@@ -1605,6 +1605,8 @@ Pin-Priority: -1
nvme_tcp.dracut_add_systemd_network_cmdline(target)
# Dracut will automatically call `nvme connect-all --nbft` so no need
# to generate `nvme` commands.
+
+ nvme_tcp.dracut_adapt_netplan_config(cfg, target=target)
elif nvme_tcp.need_network_in_initramfs(cfg):
nvme_tcp.initramfs_tools_configure(cfg, target)
else:
diff --git a/curtin/nvme_tcp.py b/curtin/nvme_tcp.py
index 456bbb4..b030ac3 100644
--- a/curtin/nvme_tcp.py
+++ b/curtin/nvme_tcp.py
@@ -3,6 +3,7 @@
'''Module that defines functions useful for dealing with NVMe/TCP'''
import contextlib
+import json
import pathlib
import shlex
from typing import Any, Dict, Iterator, List, Set, Tuple
@@ -11,6 +12,14 @@ import yaml
from curtin.block import nvme
from curtin.log import LOG
+from curtin.paths import target_path
+from curtin import util
+
+
+def _iter_nvme_tcp_controllers(cfg) -> Iterator[Dict[str, Any]]:
+ for controller in nvme.get_nvme_controllers_from_config(cfg):
+ if controller['transport'] == 'tcp':
+ yield controller
def get_nvme_stas_controller_directives(cfg) -> Set[str]:
@@ -18,9 +27,7 @@ def get_nvme_stas_controller_directives(cfg) -> Set[str]:
directives to write in the [Controllers] section of a nvme-stas
configuration file."""
directives = set()
- for controller in nvme.get_nvme_controllers_from_config(cfg):
- if controller['transport'] != 'tcp':
- continue
+ for controller in _iter_nvme_tcp_controllers(cfg):
controller_props = {
'transport': 'tcp',
'traddr': controller["tcp_addr"],
@@ -315,3 +322,79 @@ modprobe nvme-tcp
print(script_header, file=fh)
for cmd in get_ip_commands(cfg):
print(shlex.join(cmd), file=fh)
+
+
+class NetRuntimeError(RuntimeError):
+ pass
+
+
+def get_route_dest_ifname(dest: str) -> str:
+ try:
+ out, _ = util.subp(['ip', '-j', 'route', 'get', dest], capture=True)
+ return json.loads(out)[0]['dev']
+ except (util.ProcessExecutionError, IndexError, KeyError) as exc:
+ raise NetRuntimeError(f'could not determine route to {dest}') from exc
+
+
+def get_iface_hw_addr(ifname: str) -> str:
+ try:
+ out, _ = util.subp(['ip', '-j', 'link', 'show', 'dev', ifname],
+ capture=True)
+ return json.loads(out)[0]['address']
+ except (util.ProcessExecutionError, IndexError, KeyError) as exc:
+ raise NetRuntimeError(f'could not retrieve MAC for {ifname}') from exc
+
+
+def dracut_adapt_netplan_config(cfg, *, target: pathlib.Path):
+ '''Modify the netplan configuration (which has already been written to
+ disk at this point) so that:
+ * critical network interfaces (those handled by dracut) are not brought
+ down during boot.
+ * netplan does not panic if such an interface gets renamed.
+ '''
+ ifnames: Set[str] = set()
+ modified = False
+
+ for controller in _iter_nvme_tcp_controllers(cfg):
+ try:
+ ifnames.add(get_route_dest_ifname(controller['tcp_addr']))
+ except NetRuntimeError as exc:
+ LOG.debug('%s, ignoring', str(exc))
+
+ try:
+ netplan_conf_path = pathlib.Path(
+ target_path(
+ str(target),
+ cfg['write_files']['etc_netplan_installer']['path']))
+ except KeyError:
+ LOG.debug('could not find netplan configuration passed to cloud-init')
+ return
+
+ config = yaml.safe_load(netplan_conf_path.read_text())
+
+ try:
+ ethernets = config['network']['ethernets']
+ except KeyError:
+ LOG.debug('no ethernet interface in netplan configuration')
+ return
+
+ macaddresses: Dict[str, str] = {}
+
+ for ifname in ifnames:
+ try:
+ macaddresses[ifname] = get_iface_hw_addr(ifname)
+ except NetRuntimeError as exc:
+ LOG.debug('%s, ignoring', str(exc))
+
+ for ifname, ifconfig in ethernets.items():
+ if ifname not in ifnames:
+ continue
+ # Ensure the interface is not brought down
+ ifconfig['critical'] = True
+ modified = True
+ # Ensure we match the HW address and not the ifname.
+ if 'match' not in ifconfig:
+ ifconfig['match'] = {'macaddress': macaddresses[ifname]}
+
+ if modified:
+ netplan_conf_path.write_text(yaml.dump(config))
diff --git a/tests/unittests/test_nvme_tcp.py b/tests/unittests/test_nvme_tcp.py
index 67396d0..163d2a0 100644
--- a/tests/unittests/test_nvme_tcp.py
+++ b/tests/unittests/test_nvme_tcp.py
@@ -5,8 +5,11 @@ from unittest.mock import patch
from curtin import nvme_tcp
+from curtin.util import ProcessExecutionError
from .helpers import CiTestCase
+import yaml
+
class TestNVMeTCP(CiTestCase):
def test_no_nvme_controller(self):
@@ -337,3 +340,122 @@ nvme connect-all --transport tcp --traddr 172.16.82.77 --trsvcid 4420
controller = transport=tcp;traddr=172.16.82.77;trsvcid=4420
'''
self.assertEqual(stafd_expected_contents, stafd.read_text())
+
+ def test_get_route_dest_ifname(self):
+ out = '''\
+[{"dst":"1.2.3.4","gateway":"192.168.0.1","dev":"enp1s0",\
+"prefsrc":"192.168.0.14","flags":[],"uid":1000,"cache":[]}]
+'''
+ with patch('curtin.nvme_tcp.util.subp',
+ return_value=(out, '')) as m_subp:
+ self.assertEqual(
+ 'enp1s0', nvme_tcp.get_route_dest_ifname('1.2.3.4'))
+ m_subp.assert_called_once_with(['ip', '-j', 'route', 'get', '1.2.3.4'],
+ capture=True)
+
+ def test_get_route_dest_ifname__no_route(self):
+ err = '''\
+RTNETLINK answers: Network is unreachable
+'''
+ pee = ProcessExecutionError(stdout='', stderr=err, exit_code=2, cmd=[])
+
+ with patch('curtin.nvme_tcp.util.subp', side_effect=pee) as m_subp:
+ with self.assertRaises(nvme_tcp.NetRuntimeError):
+ nvme_tcp.get_route_dest_ifname('1.2.3.4')
+ m_subp.assert_called_once_with(['ip', '-j', 'route', 'get', '1.2.3.4'],
+ capture=True)
+
+ def test_get_iface_hw_addr(self):
+ out = '''\
+[{"ifindex":3,"ifname":"enp1s0",\
+"flags":["BROADCAST","MULTICAST","UP","LOWER_UP"],\
+"mtu":1500,"qdisc":"fq_codel","operstate":"UP","linkmode":"DEFAULT",\
+"group":"default","txqlen":1000,"link_type":"ether",\
+"address":"4a:25:e2:5b:dc:2e","broadcast":"ff:ff:ff:ff:ff:ff"}]
+'''
+ with patch('curtin.nvme_tcp.util.subp',
+ return_value=(out, '')) as m_subp:
+ self.assertEqual(
+ '4a:25:e2:5b:dc:2e', nvme_tcp.get_iface_hw_addr('enp1s0'))
+ m_subp.assert_called_once_with(
+ ['ip', '-j', 'link', 'show', 'dev', 'enp1s0'],
+ capture=True)
+
+ def test_get_iface_hw_addr__no_iface(self):
+ err = '''\
+Device "enp1s0" does not exist.
+'''
+ pee = ProcessExecutionError(stdout='', stderr=err, exit_code=1, cmd=[])
+ with patch('curtin.nvme_tcp.util.subp', side_effect=pee) as m_subp:
+ with self.assertRaises(nvme_tcp.NetRuntimeError):
+ nvme_tcp.get_iface_hw_addr('enp1s0')
+ m_subp.assert_called_once_with(
+ ['ip', '-j', 'link', 'show', 'dev', 'enp1s0'],
+ capture=True)
+
+ def test_dracut_adapt_netplan_config__ens3(self):
+ content = '''\
+# This is the network config written by 'subiquity'
+network:
+ ethernets:
+ ens3:
+ addresses:
+ - 10.0.2.15/24
+ nameservers:
+ addresses:
+ - 8.8.8.8
+ - 8.4.8.4
+ search:
+ - foo
+ - bar
+ routes:
+ - to: default
+ via: 10.0.2.2
+ version: 2
+'''
+ cfg = {
+ 'storage': {
+ 'config': [{
+ 'type': 'nvme_controller',
+ 'id': 'nvme-controller-nvme0',
+ 'transport': 'tcp',
+ 'tcp_addr': '10.0.2.144',
+ 'tcp_port': 4420,
+ }],
+ }, 'write_files': {
+ 'etc_netplan_installer': {
+ 'path': 'etc/netplan/installer.yaml'}
+ }
+ }
+
+ target = Path(self.tmp_dir())
+ netplan_conf_path = target / 'etc/netplan/installer.yaml'
+ netplan_conf_path.parent.mkdir(parents=True)
+ netplan_conf_path.write_text(content)
+
+ p_route_ifname = patch('curtin.nvme_tcp.get_route_dest_ifname',
+ return_value='ens3')
+ p_hw_addr = patch('curtin.nvme_tcp.get_iface_hw_addr',
+ return_value='aa:bb:cc:dd:ee:ff')
+ with p_route_ifname, p_hw_addr:
+ nvme_tcp.dracut_adapt_netplan_config(cfg, target=target)
+
+ new_content = yaml.safe_load(netplan_conf_path.read_text())
+ new_ens3_content = new_content['network']['ethernets']['ens3']
+
+ self.assertEqual(
+ new_ens3_content['match']['macaddress'], 'aa:bb:cc:dd:ee:ff')
+ self.assertTrue(new_ens3_content['critical'])
+
+ def test_dracut_adapt_netplan_config__no_config(self):
+ content = '''\
+# This is the network config written by 'subiquity'
+network:
+ ethernets: {}
+ version: 2
+'''
+ nvme_tcp.dracut_adapt_netplan_config({}, target=Path('/target'))
+ nvme_tcp.dracut_adapt_netplan_config(
+ {'write_files': {
+ 'etc_netplan_installer': {
+ 'content': content}}}, target=Path('/target'))
Follow ups