← Back to team overview

curtin-dev team mailing list archive

[Merge] curtin:nvme-initramfs into curtin:master

 

Olivier Gayot has proposed merging curtin:nvme-initramfs into curtin:master.

Commit message:
do not squash

Requested reviews:
  curtin developers (curtin-dev)

For more details, see:
https://code.launchpad.net/~curtin-dev/curtin/+git/curtin/+merge/461452

This is experimental work allowing to boot with the rootfs stored on a remote NVMe drive (using NVMe over TCP). The /boot and /boot/efi filesystems should still be stored on a local drive.

The implementation makes use of an initramfs-tools hook + bootscript to being up the network in the initramfs, and then run a set of `nvme connect-all` commands.

One of the main challenge is to apply the expected network configuration. Currently, subiquity does not explicitly provide the network configuration to curtin. Instead, it provides the content of a netplan configuration file to curtin  ; and expects it to pass it on to cloud init for first-boot.

As a workaround, I implemented a small parser for the netplan configuration, spitting out a set of `ip` and/or `dhcpcd` commands to be executed on early boot. It is obviously very limited. It can only work with ethernet interfaces (with or without dhcp), static IP addresses + static routes. Any other type of interface like Wi-Fi, bonds and what not are ignored. Manual DNS servers are also ignored.
-- 
Your team curtin developers is requested to review the proposed merge of curtin:nvme-initramfs into curtin:master.
diff --git a/curtin/commands/curthooks.py b/curtin/commands/curthooks.py
index 695ba30..132ccab 100644
--- a/curtin/commands/curthooks.py
+++ b/curtin/commands/curthooks.py
@@ -8,10 +8,13 @@ import pathlib
 import platform
 import re
 import sys
+import shlex
 import shutil
 import textwrap
 from typing import List, Set, Tuple
 
+import yaml
+
 from curtin import config
 from curtin import block
 from curtin import distro
@@ -1528,21 +1531,130 @@ def get_nvme_stas_controller_directives(cfg) -> Set[str]:
     return directives
 
 
-def configure_nvme_stas(cfg, target):
+def nvmeotcp_get_nvme_commands(cfg) -> Set[Tuple[str]]:
+    """Parse the storage configuration and return a set of commands
+    to run to bring up the NVMe over TCP block devices."""
+    commands: Set[Tuple[str]] = set()
+    if 'storage' not in cfg or not isinstance(cfg['storage'], dict):
+        return commands
+    storage = cfg['storage']
+    if 'config' not in storage or storage['config'] == 'disabled':
+        return commands
+    config = storage['config']
+    for item in config:
+        if item['type'] != 'nvme_controller':
+            continue
+        if item['transport'] != 'tcp':
+            continue
+
+        commands.add((
+            'nvme', 'connect-all',
+            '--transport', 'tcp',
+            '--traddr', item['tcp_addr'],
+            '--trsvcid', str(item['tcp_port']),
+        ))
+
+    return commands
+
+
+def nvmeotcp_need_network_in_initramfs(cfg) -> bool:
+    """Parse the storage configuration and check if any of the mountpoints
+    essential for booting requires network."""
+    if 'storage' not in cfg or not isinstance(cfg['storage'], dict):
+        return False
+    storage = cfg['storage']
+    if 'config' not in storage or storage['config'] == 'disabled':
+        return False
+    config = storage['config']
+    for item in config:
+        if item['type'] != 'mount':
+            continue
+        if not '_netdev' in item.get('options', '').split(','):
+            continue
+
+        # We found a mountpoint that requires network. Let's check if it is
+        # essential for booting.
+        path = item['path']
+        if path == '/' or path.startswith('/usr') or path.startswith('/var'):
+            return True
+
+    return False
+
+
+def nvmeotcp_get_ip_commands(cfg) -> List[Tuple[str]]:
+    """Look for the netplan configuration (supplied by subiquity using
+    write_files directives) and attempt to extrapolate a set of 'ip' + 'dhcpcd'
+    commands that would produce more or less the expected network
+    configuration. At the moment, only trivial network configurations are
+    supported, which are ethernet interfaces with or without DHCP and optional
+    static routes."""
+    commands: List[Tuple[str]] = []
+
+    try:
+        content = cfg['write_files']['etc_netplan_installer']['content']
+    except KeyError:
+        return []
+
+    config = yaml.safe_load(content)
+
+    try:
+        ethernets = config['network']['ethernets']
+    except KeyError:
+        return []
+
+    for ifname, ifconfig in ethernets.items():
+        # Handle static IP addresses
+        for address in ifconfig.get('addresses', []):
+            commands.append(('ip', 'address', 'add', address, 'dev', ifname))
+
+        # Handle DHCPv4 and DHCPv6
+        dhcp4 = ifconfig.get('dhcp4', False)
+        dhcp6 = ifconfig.get('dhcp6', False)
+        if dhcp4 and dhcp6:
+            commands.append(('dhcpcd', ifname))
+        elif dhcp4:
+            commands.append(('dhcpcd', '-4', ifname))
+        elif dhcp6:
+            commands.append(('dhcpcd', '-6', ifname))
+        else:
+            commands.append(('ip', 'link', 'set', ifname, 'up'))
+
+        # Handle static routes
+        for route in ifconfig.get('routes', []):
+            cmd = ['ip', 'route', 'add', route['to']]
+            with contextlib.suppress(KeyError):
+                cmd += ['via', route['via']]
+            if route.get('on-link', False):
+                cmd += ['dev', ifconfig]
+            commands.append(tuple(cmd))
+
+    return commands
+
+def configure_nvme_over_tcp(cfg, target):
     """If any NVMe controller using the TCP transport is present in the storage
-    configuration, create a nvme-stas configuration so that the remote drives
-    can be made available at boot."""
+    configuration, create a nvme-stas configuration and configure the initramfs
+    so that the remote drives can be made available at boot.
+    Please note that the NVMe over TCP support in curtin is experimental and in
+    active development. Currently, it only works with trivial network
+    configurations ; supplied by Subiquity."""
     controllers = get_nvme_stas_controller_directives(cfg)
 
     if not controllers:
         return
 
-    LOG.info('NVMe-over-TCP configuration found'
-             ' , writing nvme-stas configuration')
+    LOG.info('NVMe-over-TCP configuration found')
+    LOG.info('writing nvme-stas configuration')
     target = pathlib.Path(target)
     stas_dir = target / 'etc' / 'stas'
     stas_dir.mkdir(parents=True, exist_ok=True)
     with (stas_dir / 'stafd-curtin.conf').open('w', encoding='utf-8') as fh:
+        header = '''\
+# This file was created by curtin.
+# If you make modifications to it, please remember to also update
+# scripts in etc/curtin-nvme-over-tcp and then regenerate the initramfs using
+# the command `update-initramfs -u`.
+'''
+        print(header, file=fh)
         print('[Controllers]', file=fh)
         for controller in controllers:
             print(controller, file=fh)
@@ -1551,6 +1663,97 @@ def configure_nvme_stas(cfg, target):
         (stas_dir / 'stafd.conf').replace(stas_dir / '.stafd.conf.bak')
     (stas_dir / 'stafd.conf').symlink_to('stafd-curtin.conf')
 
+    if not nvmeotcp_need_network_in_initramfs(cfg):
+        # nvme-stas should be enough to boot.
+        return
+
+    LOG.info('configuring network in initramfs for NVMe over TCP')
+
+    hook_contents = '''\
+#!/bin/sh
+
+PREREQ="udev"
+
+prereqs()
+{
+    echo "$PREREQ"
+}
+
+case "$1" in
+prereqs)
+    prereqs
+    exit 0
+    ;;
+esac
+
+. /usr/share/initramfs-tools/hook-functions
+
+copy_exec /usr/sbin/nvme /usr/sbin
+copy_file config /etc/nvme/hostid /etc/nvme/
+copy_file config /etc/nvme/hostnqn /etc/nvme/
+copy_file config /etc/curtin-nvme-over-tcp/network-up /etc/curtin-nvme-over-tcp/
+copy_file config /etc/curtin-nvme-over-tcp/connect-nvme /etc/curtin-nvme-over-tcp/
+
+manual_add_modules nvme-tcp
+'''
+
+    initramfs_hooks_dir = target / 'etc' / 'initramfs-tools' / 'hooks'
+    initramfs_hooks_dir.mkdir(parents=True, exist_ok=True)
+    with (initramfs_hooks_dir / 'curtin-nvme-over-tcp').open('w', encoding='utf-8') as fh:
+        print(hook_contents, file=fh)
+    (initramfs_hooks_dir / 'curtin-nvme-over-tcp').chmod(0o755)
+
+    bootscript_contents = '''\
+#!/bin/sh
+
+    PREREQ=""
+prereqs() { echo "$PREREQ"; }
+case "$1" in
+prereqs)
+    prereqs
+    exit 0
+    ;;
+esac
+
+. /etc/curtin-nvme-over-tcp/network-up
+
+modprobe nvme-tcp
+
+. /etc/curtin-nvme-over-tcp/connect-nvme
+
+'''
+
+    initramfs_init_premount_dir = target / 'etc' / 'initramfs-tools' / 'scripts' / 'init-premount'
+    initramfs_init_premount_dir.mkdir(parents=True, exist_ok=True)
+    bootscript = initramfs_init_premount_dir / 'curtin-nvme-over-tcp'
+    with bootscript.open('w', encoding='utf-8') as fh:
+        print(bootscript_contents, file=fh)
+    bootscript.chmod(0o755)
+
+
+
+    curtin_nvme_over_tcp_dir = target / 'etc' / 'curtin-nvme-over-tcp'
+    curtin_nvme_over_tcp_dir.mkdir(parents=True, exist_ok=True)
+    network_up_script = curtin_nvme_over_tcp_dir / 'network-up'
+    connect_nvme_script =  curtin_nvme_over_tcp_dir / 'connect-nvme'
+
+    script_header = '''\
+#!/bin/sh
+
+# This file was created by curtin.
+# If you make modifications to it, please remember to regenerate the initramfs
+# using the command `update-initramfs -u`.
+'''
+    with open(connect_nvme_script, 'w', encoding='utf-8') as fh:
+        print(script_header, file=fh)
+        for cmd in nvmeotcp_get_nvme_commands(cfg):
+            print(shlex.join(cmd), file=fh)
+
+    with open(network_up_script, 'w', encoding='utf-8') as fh:
+        print(script_header, file=fh)
+        for cmd in nvmeotcp_get_ip_commands(cfg):
+            print(shlex.join(cmd), file=fh)
+
 
 def handle_cloudconfig(cfg, base_dir=None):
     """write cloud-init configuration files into base_dir.
@@ -1815,10 +2018,10 @@ def builtin_curthooks(cfg, target, state):
         configure_mdadm(cfg, state_etcd, target, osfamily=osfamily)
 
     with events.ReportEventStack(
-            name=stack_prefix + '/configuring-nvme-stas-service',
+            name=stack_prefix + '/configuring-nvme-over-tcp',
             reporting_enabled=True, level="INFO",
-            description="configuring NVMe STorage Appliance Services"):
-        configure_nvme_stas(cfg, target)
+            description="configuring NVMe over TCP"):
+        configure_nvme_over_tcp(cfg, target)
 
     if osfamily == DISTROS.debian:
         with events.ReportEventStack(
diff --git a/tests/unittests/test_curthooks.py b/tests/unittests/test_curthooks.py
index e615b38..87e0ad0 100644
--- a/tests/unittests/test_curthooks.py
+++ b/tests/unittests/test_curthooks.py
@@ -2017,7 +2017,7 @@ class TestCurthooksGrubDebconf(CiTestCase):
         self.m_debconf.assert_called_with(expectedcfg, target)
 
 
-class TestCurthooksNVMeStas(CiTestCase):
+class TestCurthooksNVMeOverTCP(CiTestCase):
     def test_get_nvme_stas_controller_directives__no_nvme_controller(self):
         self.assertFalse(curthooks.get_nvme_stas_controller_directives({
             "storage": {
@@ -2093,6 +2093,219 @@ class TestCurthooksNVMeStas(CiTestCase):
             },
         }))
 
+    def test_nvmeotcp_get_nvme_commands__no_nvme_controller(self):
+        self.assertFalse(curthooks.nvmeotcp_get_nvme_commands({
+            "storage": {
+                "config": [
+                    {"type": "partition"},
+                    {"type": "mount"},
+                    {"type": "disk"},
+                ],
+            },
+        }))
+
+    def test_nvmeotcp_get_nvme_commands__pcie_controller(self):
+        self.assertFalse(curthooks.nvmeotcp_get_nvme_commands({
+            "storage": {
+                "config": [
+                    {"type": "nvme_controller", "transport": "pcie"},
+                ],
+            },
+        }))
+
+    def test_nvmeotcp_get_nvme_commands__tcp_controller(self):
+        expected = {(
+            "nvme", "connect-all",
+            "--transport", "tcp",
+            "--traddr", "1.2.3.4",
+            "--trsvcid", "1111",
+            )
+        }
+
+        result = curthooks.nvmeotcp_get_nvme_commands({
+            "storage": {
+                "config": [
+                    {
+                        "type": "nvme_controller",
+                        "transport": "tcp",
+                        "tcp_addr": "1.2.3.4",
+                        "tcp_port": "1111",
+                    },
+                ],
+            },
+        })
+        self.assertEqual(expected, result)
+
+    def test_nvmeotcp_get_nvme_commands__three_nvme_controllers(self):
+        expected = {(
+            "nvme", "connect-all",
+            "--transport", "tcp",
+            "--traddr", "1.2.3.4",
+            "--trsvcid", "1111",
+            ), (
+            "nvme", "connect-all",
+            "--transport", "tcp",
+            "--traddr", "4.5.6.7",
+            "--trsvcid", "1212",
+            ),
+        }
+
+        result = curthooks.nvmeotcp_get_nvme_commands({
+            "storage": {
+                "config": [
+                    {
+                        "type": "nvme_controller",
+                        "transport": "tcp",
+                        "tcp_addr": "1.2.3.4",
+                        "tcp_port": "1111",
+                    }, {
+                        "type": "nvme_controller",
+                        "transport": "tcp",
+                        "tcp_addr": "4.5.6.7",
+                        "tcp_port": "1212",
+                    }, {
+                        "type": "nvme_controller",
+                        "transport": "pcie",
+                    },
+                ],
+            },
+        })
+        self.assertEqual(expected, result)
+
+    def test_nvmeotcp_get_nvme_commands__empty_conf(self):
+        self.assertFalse(curthooks.nvmeotcp_get_nvme_commands({}))
+        self.assertFalse(curthooks.nvmeotcp_get_nvme_commands(
+            {"storage": False}))
+        self.assertFalse(curthooks.nvmeotcp_get_nvme_commands(
+            {"storage": {}}))
+        self.assertFalse(curthooks.nvmeotcp_get_nvme_commands({
+            "storage": {
+                "config": "disabled",
+            },
+        }))
+
+    def test_nvmeotcp_get_ip_commands__ethernet_static(self):
+        netcfg = """\
+# This is the network config written by 'subiquity'
+network:
+  ethernets:
+    ens3:
+     addresses:
+     - 10.0.2.15/24
+     nameservers:
+       addresses:
+       - 8.8.8.8
+       - 8.4.8.4
+       search:
+       - foo
+       - bar
+     routes:
+     - to: default
+       via: 10.0.2.2
+  version: 2"""
+
+        cfg = {
+            "write_files": {
+                "etc_netplan_installer": {
+                    "content": netcfg,
+                    "path": "etc/netplan/00-installer-config.yaml",
+                    "permissions": "0600",
+                },
+            },
+        }
+        expected = [
+            ("ip", "address", "add", "10.0.2.15/24", "dev", "ens3"),
+            ("ip", "link", "set", "ens3", "up"),
+            ("ip", "route", "add", "default", "via", "10.0.2.2"),
+        ]
+        self.assertEqual(expected, curthooks.nvmeotcp_get_ip_commands(cfg))
+
+    def test_nvmeotcp_get_ip_commands__ethernet_dhcp4(self):
+        netcfg = """\
+# This is the network config written by 'subiquity'
+network:
+  ethernets:
+    ens3:
+     dhcp4: true
+  version: 2"""
+
+        cfg = {
+            "write_files": {
+                "etc_netplan_installer": {
+                    "content": netcfg,
+                    "path": "etc/netplan/00-installer-config.yaml",
+                    "permissions": "0600",
+                },
+            },
+        }
+        expected = [
+            ("dhcpcd", "-4", "ens3"),
+        ]
+        self.assertEqual(expected, curthooks.nvmeotcp_get_ip_commands(cfg))
+
+    def test_nvmeotcp_need_network_in_initramfs__usr_is_netdev(self):
+        self.assertTrue(curthooks.nvmeotcp_need_network_in_initramfs({
+            "storage": {
+                "config": [
+                    {
+                        "type": "mount",
+                        "path": "/usr",
+                        "options": "default,_netdev",
+                    }, {
+                        "type": "mount",
+                        "path": "/",
+                    }, {
+                        "type": "mount",
+                        "path": "/boot",
+                    },
+                ],
+            },
+        }))
+
+    def test_nvmeotcp_need_network_in_initramfs__rootfs_is_netdev(self):
+        self.assertTrue(curthooks.nvmeotcp_need_network_in_initramfs({
+            "storage": {
+                "config": [
+                    {
+                        "type": "mount",
+                        "path": "/",
+                        "options": "default,_netdev",
+                    }, {
+                        "type": "mount",
+                        "path": "/boot",
+                    },
+                ],
+            },
+        }))
+
+    def test_nvmeotcp_need_network_in_initramfs__only_home_is_netdev(self):
+        self.assertFalse(curthooks.nvmeotcp_need_network_in_initramfs({
+            "storage": {
+                "config": [
+                    {
+                        "type": "mount",
+                        "path": "/home",
+                        "options": "default,_netdev",
+                    }, {
+                        "type": "mount",
+                        "path": "/",
+                    },
+                ],
+            },
+        }))
+
+    def test_nvmeotcp_need_network_in_initramfs__empty_conf(self):
+        self.assertFalse(curthooks.nvmeotcp_need_network_in_initramfs({}))
+        self.assertFalse(curthooks.nvmeotcp_need_network_in_initramfs(
+            {"storage": False}))
+        self.assertFalse(curthooks.nvmeotcp_need_network_in_initramfs(
+            {"storage": {}}))
+        self.assertFalse(curthooks.nvmeotcp_need_network_in_initramfs({
+            "storage": {
+                "config": "disabled",
+            },
+        }))
+
 
 class TestUefiFindGrubDeviceIds(CiTestCase):
 

Follow ups