← Back to team overview

curtin-dev team mailing list archive

[Merge] ~ogayot/curtin:retry-wipe-on-o-excl-busy into curtin:master

 

Olivier Gayot has proposed merging ~ogayot/curtin:retry-wipe-on-o-excl-busy into curtin:master.

Commit message:
block: sleep and retry wiping part. if open(O_EXCL) returns EBUSY

Requested reviews:
  curtin developers (curtin-dev)

For more details, see:
https://code.launchpad.net/~ogayot/curtin/+git/curtin/+merge/441894

In https://bugs.launchpad.net/subiquity/+bug/2016919, exclusive_open failed with the following error when attempting to wipe /dev/sda:

            block.wipe_volume(disk, mode='superblock')
          File "/snap/ubuntu-desktop-installer/935/lib/python3.10/site-packages/curtin/block/__init__.py", line 1322, in wipe_volume
            quick_zero(path, partitions=False, exclusive=exclusive)
          File "/snap/ubuntu-desktop-installer/935/lib/python3.10/site-packages/curtin/block/__init__.py", line 1236, in quick_zero
            return zero_file_at_offsets(path, offsets, buflen=buflen, count=count,
          File "/snap/ubuntu-desktop-installer/935/lib/python3.10/site-packages/curtin/block/__init__.py", line 1257, in zero_file_at_offsets
            with exclusive_open(path, exclusive=exclusive) as fp:
          File "/snap/ubuntu-desktop-installer/935/usr/lib/python3.10/contextlib.py", line 135, in __enter__
            return next(self.gen)
          File "/snap/ubuntu-desktop-installer/935/lib/python3.10/site-packages/curtin/block/__init__.py", line 1149, in exclusive_open
            fd = os.open(path, flags)
        OSError: [Errno 16] Device or resource busy: '/dev/sda'
        [Errno 16] Device or resource busy: '/dev/sda'

The output of fuser shows no process running with '/dev/sda' opened in write mode. This most likely mean that a process opened /dev/sda temporarily and closed it immediately after.

We now expect this scenario to be a possibility. If we get EBUSY in exclusive_open, we now sleep for 1 second and try again (only once).
-- 
Your team curtin developers is requested to review the proposed merge of ~ogayot/curtin:retry-wipe-on-o-excl-busy into curtin:master.
diff --git a/curtin/block/__init__.py b/curtin/block/__init__.py
index 1f9bb60..0dd8b2e 100644
--- a/curtin/block/__init__.py
+++ b/curtin/block/__init__.py
@@ -7,12 +7,14 @@ import os
 import stat
 import sys
 import tempfile
+import time
 
 from curtin import util
 from curtin.block import lvm
 from curtin.block import multipath
 from curtin.log import LOG
 from curtin.udev import udevadm_settle, udevadm_info
+from curtin.util import NotExclusiveError
 from curtin import storage_config
 
 
@@ -1159,7 +1161,7 @@ def exclusive_open(path, exclusive=True):
             # python2 leaves fd open if there os.fdopen fails
             if fd_needs_closing and sys.version_info.major == 2:
                 os.close(fd)
-    except OSError:
+    except OSError as exc:
         LOG.error("Failed to exclusively open path: %s", path)
         holders = get_holders(path)
         LOG.error('Device holders with exclusive access: %s', holders)
@@ -1167,7 +1169,10 @@ def exclusive_open(path, exclusive=True):
         LOG.error('Device mounts: %s', mount_points)
         fusers = util.fuser_mount(path)
         LOG.error('Possible users of %s:\n%s', path, fusers)
-        raise
+        if exclusive and exc.errno == errno.EBUSY:
+            raise NotExclusiveError from exc
+        else:
+            raise
 
 
 def wipe_file(path, reader=None, buflen=4 * 1024 * 1024, exclusive=True):
@@ -1233,8 +1238,9 @@ def quick_zero(path, partitions=True, exclusive=True):
         quick_zero(pt, partitions=False)
 
     LOG.debug("wiping 1M on %s at offsets %s", path, offsets)
-    return zero_file_at_offsets(path, offsets, buflen=buflen, count=count,
-                                exclusive=exclusive)
+    util.not_exclusive_retry(
+            zero_file_at_offsets,
+            path, offsets, buflen=buflen, count=count, exclusive=exclusive)
 
 
 def zero_file_at_offsets(path, offsets, buflen=1024, count=1024, strict=False,
diff --git a/curtin/util.py b/curtin/util.py
index aaa6008..623966d 100644
--- a/curtin/util.py
+++ b/curtin/util.py
@@ -2,7 +2,7 @@
 
 import argparse
 import collections
-from contextlib import contextmanager
+from contextlib import contextmanager, suppress
 import errno
 import json
 import os
@@ -62,6 +62,11 @@ _DNS_REDIRECT_IP = None
 BASIC_MATCHER = re.compile(r'\$\{([A-Za-z0-9_.]+)\}|\$([A-Za-z0-9_.]+)')
 
 
+class NotExclusiveError(OSError):
+    ''' Exception to raise when an exclusive open (i.e, O_EXCL fails with
+    EBUSY) '''
+
+
 def _subp(args, data=None, rcs=None, env=None, capture=False,
           combine_capture=False, shell=False, logstring=False,
           decode="replace", target=None, cwd=None, log_captured=False,
@@ -1323,4 +1328,11 @@ def uses_systemd():
 
     return _USES_SYSTEMD
 
+
+def not_exclusive_retry(fun, *args, **kwargs):
+    with suppress(NotExclusiveError):
+        return fun(*args, **kwargs)
+    time.sleep(1)
+    return fun(*args, **kwargs)
+
 # vi: ts=4 expandtab syntax=python
diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
index a111ea1..195b685 100644
--- a/tests/unittests/test_util.py
+++ b/tests/unittests/test_util.py
@@ -1,6 +1,7 @@
 # This file is part of curtin. See LICENSE file for copyright and license info.
 
 from unittest import skipIf
+import errno
 import mock
 import os
 import stat
@@ -1158,4 +1159,43 @@ class TestSanitizeSource(CiTestCase):
         self.assertEqual(expected, result)
 
 
+class TestNotExclusiveRetry(CiTestCase):
+    @mock.patch('curtin.util.time.sleep')
+    def test_not_exclusive_retry_success(self, sleep):
+        f = mock.Mock(return_value='success')
+
+        self.assertEqual(util.not_exclusive_retry(f, 1, 2, 3), 'success')
+        sleep.assert_not_called()
+
+    @mock.patch('curtin.util.time.sleep')
+    def test_not_exclusive_retry_failed(self, sleep):
+        f = mock.Mock(side_effect=OSError)
+
+        with self.assertRaises(OSError):
+            util.not_exclusive_retry(f, 1, 2, 3)
+        sleep.assert_not_called()
+
+    @mock.patch('curtin.util.time.sleep')
+    def test_not_exclusive_retry_not_exclusive_once_then_success(self, sleep):
+        f = mock.Mock(side_effect=[util.NotExclusiveError, 'success'])
+
+        self.assertEqual(util.not_exclusive_retry(f, 1, 2, 3), 'success')
+        sleep.assert_called_once()
+
+    @mock.patch('curtin.util.time.sleep')
+    def test_not_exclusive_retry_not_exclusive_twice(self, sleep):
+        f = mock.Mock(side_effect=[util.NotExclusiveError] * 2)
+
+        with self.assertRaises(util.NotExclusiveError):
+            util.not_exclusive_retry(f, 1, 2, 3)
+        sleep.assert_called_once()
+
+    @mock.patch('curtin.util.time.sleep')
+    def test_not_exclusive_retry_not_exclusive_once_then_error(self, sleep):
+        f = mock.Mock(side_effect=[util.NotExclusiveError, OSError])
+
+        with self.assertRaises(OSError):
+            util.not_exclusive_retry(f, 1, 2, 3)
+        sleep.assert_called_once()
+
 # vi: ts=4 expandtab syntax=python

Follow ups