← Back to team overview

curtin-dev team mailing list archive

[Merge] ~dbungert/curtin:flock-ex into curtin:master

 

Dan Bungert has proposed merging ~dbungert/curtin:flock-ex into curtin:master.

Commit message:
do not squash

Requested reviews:
  curtin developers (curtin-dev)
Related bugs:
  Bug #2016860 in Ubuntu on IBM z Systems: "Error/crash while trying to wipe disk during 23.04+ install"
  https://bugs.launchpad.net/ubuntu-z-systems/+bug/2016860
  Bug #2057661 in subiquity: "Daily install w/ ZFS + encryption: partitioning crashed with CurtinInstallError"
  https://bugs.launchpad.net/subiquity/+bug/2057661

For more details, see:
https://code.launchpad.net/~dbungert/curtin/+git/curtin/+merge/462753

Add exclusive flocks to known problem areas:

* ZFS encryption LP: #2057661
* wipefs LP: #2016860


-- 
Your team curtin developers is requested to review the proposed merge of ~dbungert/curtin:flock-ex into curtin:master.
diff --git a/curtin/block/zfs.py b/curtin/block/zfs.py
index 767ad30..ccdddbb 100644
--- a/curtin/block/zfs.py
+++ b/curtin/block/zfs.py
@@ -8,6 +8,7 @@ import os
 import tempfile
 import secrets
 import shutil
+from contextlib import ExitStack
 from pathlib import Path
 
 from curtin.config import merge_config
@@ -31,11 +32,12 @@ ZFS_UNSUPPORTED_RELEASES = ['precise', 'trusty']
 
 
 class ZPoolEncryption:
-    def __init__(self, poolname, style, keyfile):
+    def __init__(self, vdevs, poolname, style, keyfile):
         self.poolname = poolname
         self.style = style
         self.keyfile = keyfile
         self.system_key = None
+        self.vdevs = vdevs
 
     def get_system_key(self):
         if self.system_key is None:
@@ -88,24 +90,31 @@ class ZPoolEncryption:
             self.poolname, "keystore", {"encryption": "off"}, keystore_size,
         )
 
-        # cryptsetup format and open this keystore
-        keystore_volume = f"/dev/zvol/{self.poolname}/keystore"
-        cmd = ["cryptsetup", "luksFormat", keystore_volume, self.keyfile]
-        util.subp(cmd)
-        dm_name = f"keystore-{self.poolname}"
-        cmd = [
-            "cryptsetup", "open", "--type", "luks", keystore_volume, dm_name,
-            "--key-file", self.keyfile,
-        ]
-        util.subp(cmd)
-
-        # format as ext4, mount it, move the previously-generated system key
-        dmpath = f"/dev/mapper/{dm_name}"
-        cmd = ["mke2fs", "-t", "ext4", dmpath, "-L", dm_name]
-        util.subp(cmd, capture=True)
-
-        keystore_root = f"/run/keystore/{self.poolname}"
-        with util.mount(dmpath, keystore_root):
+        with ExitStack() as es:
+            for vdev in self.vdevs:
+                es.enter_context(util.FlockEx(vdev))
+
+            # cryptsetup format and open this keystore
+            keystore_volume = f"/dev/zvol/{self.poolname}/keystore"
+            cmd = ["cryptsetup", "luksFormat", keystore_volume, self.keyfile]
+            util.subp(cmd)
+            dm_name = f"keystore-{self.poolname}"
+            cmd = [
+                "cryptsetup", "open", "--type", "luks", keystore_volume,
+                dm_name, "--key-file", self.keyfile,
+            ]
+            util.subp(cmd)
+
+        with ExitStack() as es:
+            # format as ext4, mount it, move the previously-generated systemkey
+            dmpath = f"/dev/mapper/{dm_name}"
+            es.enter_context(util.FlockEx(dmpath))
+            cmd = ["mke2fs", "-t", "ext4", dmpath, "-L", dm_name]
+            util.subp(cmd, capture=True)
+
+            keystore_root = f"/run/keystore/{self.poolname}"
+
+            es.enter_context(util.mount(dmpath, keystore_root))
             ks_system_key = f"{keystore_root}/system.key"
             shutil.move(self.system_key, ks_system_key)
             Path(ks_system_key).chmod(0o400)
@@ -256,7 +265,7 @@ def zpool_create(poolname, vdevs, storage_config=None, context=None,
     if zfs_properties:
         merge_config(zfs_cfg, zfs_properties)
 
-    encryption = ZPoolEncryption(poolname, encryption_style, keyfile)
+    encryption = ZPoolEncryption(vdevs, poolname, encryption_style, keyfile)
     encryption.validate()
     if encryption.in_use():
         merge_config(zfs_cfg, encryption.dataset_properties())
diff --git a/curtin/commands/block_meta.py b/curtin/commands/block_meta.py
index 58889b9..dc26a8c 100644
--- a/curtin/commands/block_meta.py
+++ b/curtin/commands/block_meta.py
@@ -795,26 +795,29 @@ def disk_handler(info, storage_config, context):
                  "table" % disk)
     else:
         # wipe the disk and create the partition table if instructed to do so
-        if config.value_as_boolean(info.get('wipe')):
-            block.wipe_volume(disk, mode=info.get('wipe'))
-        if config.value_as_boolean(ptable):
-            LOG.info("labeling device: '%s' with '%s' partition table", disk,
-                     ptable)
-            if ptable == "gpt":
-                # Wipe both MBR and GPT that may be present on the disk.
-                # N.B.: wipe_volume wipes 1M at front and end of the disk.
-                # This could destroy disk data in filesystems that lived
-                # there.
-                block.wipe_volume(disk, mode='superblock')
-            elif ptable in _dos_names:
-                util.subp(["parted", disk, "--script", "mklabel", "msdos"])
-            elif ptable == "vtoc":
-                util.subp(["fdasd", "-c", "/dev/null", disk])
-        holders = clear_holders.get_holders(disk)
-        if len(holders) > 0:
-            LOG.info('Detected block holders on disk %s: %s', disk, holders)
-            clear_holders.clear_holders(disk)
-            clear_holders.assert_clear(disk)
+        with util.FlockEx(disk):
+            if config.value_as_boolean(info.get('wipe')):
+                block.wipe_volume(disk, mode=info.get('wipe'))
+            if config.value_as_boolean(ptable):
+                LOG.info("labeling device: '%s' with '%s' partition table",
+                         disk, ptable)
+                if ptable == "gpt":
+                    # Wipe both MBR and GPT that may be present on the disk.
+                    # N.B.: wipe_volume wipes 1M at front and end of the disk.
+                    # This could destroy disk data in filesystems that lived
+                    # there.
+                    block.wipe_volume(disk, mode='superblock')
+                elif ptable in _dos_names:
+                    util.subp(["parted", disk, "--script", "mklabel", "msdos"])
+                elif ptable == "vtoc":
+                    util.subp(["fdasd", "-c", "/dev/null", disk])
+            holders = clear_holders.get_holders(disk)
+            if len(holders) > 0:
+                LOG.info(
+                    'Detected block holders on disk %s: %s', disk, holders
+                )
+                clear_holders.clear_holders(disk)
+                clear_holders.assert_clear(disk)
 
     # Make the name if needed
     if info.get('name'):
diff --git a/curtin/util.py b/curtin/util.py
index dcf9422..66735e8 100644
--- a/curtin/util.py
+++ b/curtin/util.py
@@ -4,6 +4,7 @@ import argparse
 import collections
 from contextlib import contextmanager, suppress
 import errno
+import fcntl
 import json
 import os
 import platform
@@ -1420,4 +1421,40 @@ def not_exclusive_retry(fun, *args, **kwargs):
     time.sleep(1)
     return fun(*args, **kwargs)
 
+
+class FlockEx:
+    """Acquire an exclusive lock on device.
+    See https://systemd.io/BLOCK_DEVICE_LOCKING/ for details and
+    motivation, which has the summary:
+        Use BSD file locks (flock(2)) on block device nodes to synchronize
+        access for partitioning and file system formatting tools.
+
+    params: device: block device node to exclusively lock
+    """
+    def __init__(self, device, timeout=60):
+        self.device = device
+        self.timeout = timeout
+        self.retries = 60
+
+    def __enter__(self):
+        self.lock_fd = os.open(self.device, os.O_RDONLY)
+
+        LOG.debug(f"Acquiring fcntl LOCK_EX on {self.device}")
+
+        for i in range(self.retries):
+            try:
+                fcntl.flock(self.lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+                return
+            except OSError as ex:
+                LOG.debug(f"Try {i}: lock acquisition failed with {ex}")
+                time.sleep(self.timeout / self.retries)
+        else:
+            raise TimeoutError("Failed to acquire LOCK_EX on {self.device}")
+
+    def __exit__(self, *args):
+        with suppress(Exception):
+            fcntl.flock(self.lock_fd, fcntl.LOCK_UN)
+        with suppress(Exception):
+            os.close(self.lock_fd)
+
 # vi: ts=4 expandtab syntax=python
diff --git a/tests/unittests/test_block_zfs.py b/tests/unittests/test_block_zfs.py
index dd80caf..6d5c5db 100644
--- a/tests/unittests/test_block_zfs.py
+++ b/tests/unittests/test_block_zfs.py
@@ -614,7 +614,7 @@ class TestZfsKeystore(CiTestCase):
     def test_create_system_key(self, m_token_bytes):
         m_token_bytes.return_value = key = self.random_string()
         m_open = mock.mock_open()
-        zpe = zfs.ZPoolEncryption(None, None, None)
+        zpe = zfs.ZPoolEncryption(None, None, None, None)
         with mock.patch("builtins.open", m_open):
             system_key = zpe.get_system_key()
         self.assertIsNotNone(system_key)
@@ -622,7 +622,7 @@ class TestZfsKeystore(CiTestCase):
         m_open.return_value.write.assert_called_with(key)
 
     def test_validate_good(self):
-        zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", self.keyfile)
+        zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", self.keyfile)
         try:
             zpe.validate()
         except Exception:
@@ -630,22 +630,22 @@ class TestZfsKeystore(CiTestCase):
         self.assertTrue(zpe.in_use())
 
     def test_validate_missing_pool(self):
-        zpe = zfs.ZPoolEncryption(None, "luks_keystore", self.keyfile)
+        zpe = zfs.ZPoolEncryption(None, None, "luks_keystore", self.keyfile)
         with self.assertRaises(ValueError):
             zpe.validate()
 
     def test_validate_missing_key(self):
-        zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", None)
+        zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", None)
         with self.assertRaises(ValueError):
             zpe.validate()
 
     def test_validate_missing_key_file(self):
-        zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", "not-exist")
+        zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", "not-exist")
         with self.assertRaises(ValueError):
             zpe.validate()
 
     def test_validate_unencrypted_ok(self):
-        zpe = zfs.ZPoolEncryption("pool1", None, None)
+        zpe = zfs.ZPoolEncryption(None, "pool1", None, None)
         try:
             zpe.validate()
         except Exception:
@@ -653,7 +653,7 @@ class TestZfsKeystore(CiTestCase):
         self.assertFalse(zpe.in_use())
 
     def test_dataset_properties(self):
-        zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", self.keyfile)
+        zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", self.keyfile)
         keyloc = self.random_string()
         with mock.patch.object(zpe, "get_system_key", return_value=keyloc):
             props = zpe.dataset_properties()
diff --git a/tests/unittests/test_commands_block_meta.py b/tests/unittests/test_commands_block_meta.py
index ed58cc1..7237cfc 100644
--- a/tests/unittests/test_commands_block_meta.py
+++ b/tests/unittests/test_commands_block_meta.py
@@ -5,6 +5,7 @@ from collections import OrderedDict
 import copy
 from unittest.mock import (
     call,
+    MagicMock,
     Mock,
     patch,
 )
@@ -685,6 +686,7 @@ class TestBlockMeta(CiTestCase):
         self.m_mp.is_mpath_device.return_value = False
         self.m_mp.is_mpath_member.return_value = False
 
+    @patch('curtin.commands.block_meta.util.FlockEx', new=MagicMock())
     def test_disk_handler_calls_clear_holder(self):
         info = self.storage_config.get('sda')
         disk = info.get('path')
@@ -1789,6 +1791,7 @@ class TestDiskHandler(CiTestCase):
         m_getpath.assert_called_with(info['id'], storage_config)
         m_block.get_part_table_type.assert_called_with(disk_path)
 
+    @patch('curtin.commands.block_meta.util.FlockEx', new=MagicMock())
     @patch('curtin.commands.block_meta.util.subp')
     @patch('curtin.commands.block_meta.clear_holders.get_holders')
     @patch('curtin.commands.block_meta.get_path_to_storage_volume')
diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
index 96f260f..45d4b79 100644
--- a/tests/unittests/test_util.py
+++ b/tests/unittests/test_util.py
@@ -1,7 +1,9 @@
 # This file is part of curtin. See LICENSE file for copyright and license info.
 
+from pathlib import Path
 from unittest import mock
 import errno
+import fcntl
 import os
 import stat
 from textwrap import dedent
@@ -1406,4 +1408,30 @@ class TestEFIVarFSBug(CiTestCase):
                 util.EFIVarFSBug.apply_workaround_if_affected()
             apply_wa.assert_not_called()
 
+
+class TestFlockEx(CiTestCase):
+    def setUp(self):
+        self.tmpf = self.tmp_path('testfile')
+        Path(self.tmpf).touch()
+
+    def test_basic(self):
+        with open(self.tmpf) as fp:
+            with util.FlockEx(self.tmpf):
+                with self.assertRaises(BlockingIOError):
+                    fcntl.flock(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+            fcntl.flock(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
+            fcntl.flock(fp, fcntl.LOCK_UN)
+
+    def test_timeout(self):
+        with open(self.tmpf) as fp:
+            fcntl.flock(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+            with self.assertRaises(TimeoutError):
+                with util.FlockEx(self.tmpf, timeout=.01):
+                    pass
+
+            fcntl.flock(fp, fcntl.LOCK_UN)
+
+
 # vi: ts=4 expandtab syntax=python

Follow ups