← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad-buildd:fix-nvidia-device-handling into launchpad-buildd:master

 

Colin Watson has proposed merging ~cjwatson/launchpad-buildd:fix-nvidia-device-handling into launchpad-buildd:master.

Commit message:
Fix nvidia device node handling for devtmpfs

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad-buildd/+git/launchpad-buildd/+merge/443287

https://code.launchpad.net/~vorlon/launchpad-buildd/+git/launchpad-buildd/+merge/442776 turned out to break the NVIDIA GPU case on dogfood: mounting devtmpfs already creates the device nodes (although the kernel team thinks this shouldn't be relied upon), so let's guard against mknod failing due to that.  (`path_exists` is the equivalent of `test -e` within the container.)
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad-buildd:fix-nvidia-device-handling into launchpad-buildd:master.
diff --git a/lpbuildd/target/lxd.py b/lpbuildd/target/lxd.py
index 583b9ca..7b71a88 100644
--- a/lpbuildd/target/lxd.py
+++ b/lpbuildd/target/lxd.py
@@ -615,7 +615,7 @@ class LXD(Backend):
             for path in self._nvidia_container_paths:
                 if path.startswith("/dev/"):
                     st = os.stat(path)
-                    if stat.S_ISCHR(st.st_mode):
+                    if stat.S_ISCHR(st.st_mode) and not self.path_exists(path):
                         self.run(
                             [
                                 "mknod",
diff --git a/lpbuildd/target/tests/test_lxd.py b/lpbuildd/target/tests/test_lxd.py
index 22e9eb4..2cbe972 100644
--- a/lpbuildd/target/tests/test_lxd.py
+++ b/lpbuildd/target/tests/test_lxd.py
@@ -449,7 +449,11 @@ class TestLXD(TestCase):
     # XXX cjwatson 2022-08-25: Refactor this to use some more sensible kind
     # of test parameterization.
     def test_start(
-        self, arch="amd64", unmounts_cpuinfo=False, gpu_nvidia=False
+        self,
+        arch="amd64",
+        unmounts_cpuinfo=False,
+        gpu_nvidia=False,
+        gpu_nvidia_device_nodes_exist=False,
     ):
         self.fakeFS()
         DM_BLOCK_MAJOR = random.randrange(128, 255)
@@ -482,6 +486,9 @@ class TestLXD(TestCase):
             os.mknod(
                 "/dev/nvidiactl", stat.S_IFCHR | 0o666, os.makedev(195, 255)
             )
+            if gpu_nvidia_device_nodes_exist:
+                existing_files["/dev/nvidia0"] = []
+                existing_files["/dev/nvidiactl"] = []
             gpu_nvidia_paths = [
                 "/dev/nvidia0",
                 "/dev/nvidiactl",
@@ -612,35 +619,36 @@ class TestLXD(TestCase):
                 )
             )
         if gpu_nvidia:
-            expected_args.extend(
-                [
-                    Equals(
-                        lxc
-                        + [
-                            "mknod",
-                            "-m",
-                            "0666",
-                            "/dev/nvidia0",
-                            "c",
-                            "195",
-                            "0",
-                        ]
-                    ),
-                    Equals(
-                        lxc
-                        + [
-                            "mknod",
-                            "-m",
-                            "0666",
-                            "/dev/nvidiactl",
-                            "c",
-                            "195",
-                            "255",
-                        ]
-                    ),
-                    Equals(lxc + ["/sbin/ldconfig"]),
-                ]
-            )
+            if not gpu_nvidia_device_nodes_exist:
+                expected_args.extend(
+                    [
+                        Equals(
+                            lxc
+                            + [
+                                "mknod",
+                                "-m",
+                                "0666",
+                                "/dev/nvidia0",
+                                "c",
+                                "195",
+                                "0",
+                            ]
+                        ),
+                        Equals(
+                            lxc
+                            + [
+                                "mknod",
+                                "-m",
+                                "0666",
+                                "/dev/nvidiactl",
+                                "c",
+                                "195",
+                                "255",
+                            ]
+                        ),
+                    ]
+                )
+            expected_args.append(Equals(lxc + ["/sbin/ldconfig"]))
         expected_args.extend(
             [
                 Equals(
@@ -809,6 +817,12 @@ class TestLXD(TestCase):
     def test_start_gpu_nvidia(self):
         self.test_start(gpu_nvidia=True)
 
+    def test_start_gpu_nvidia_device_nodes_exist(self):
+        # Starting a container with NVIDIA GPU support works even if
+        # mounting devtmpfs inside the container causes the device nodes to
+        # exist.
+        self.test_start(gpu_nvidia=True, gpu_nvidia_device_nodes_exist=True)
+
     def test_run(self):
         processes_fixture = self.useFixture(FakeProcesses())
         processes_fixture.add(lambda _: {}, name="lxc")

Follow ups