← Back to team overview

cloud-init-dev team mailing list archive

Re: [Merge] ~smoser/cloud-init:bug/1686514-azure-reformat-large into cloud-init:master

 


Diff comments:

> diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
> index 5254e18..f7cbb43 100644
> --- a/cloudinit/sources/DataSourceAzure.py
> +++ b/cloudinit/sources/DataSourceAzure.py
> @@ -413,56 +412,71 @@ class DataSourceAzureNet(sources.DataSource):
>          return
>  
>  
> +def _partitions_on_device(devpath, maxnum=16):
> +    # return a list of tuples (ptnum, path) for each part on devpath
> +    for suff in ("-part", "p", ""):
> +        found = []
> +        for pnum in range(1, maxnum):
> +            ppath = devpath + suff + str(pnum)
> +            if os.path.exists(ppath):
> +                found.append((pnum, os.path.realpath(ppath)))
> +        if found:
> +            return found
> +    return []
> +
> +
> +def _has_ntfs_filesystem(devpath):
> +    ntfs_devices = util.find_devs_with("TYPE=ntfs", no_cache=True)
> +    LOG.debug('ntfs_devices found = %s', ntfs_devices)
> +    return os.path.realpath(devpath) in ntfs_devices

This may be confusing when examining the log messages since we check if realpath(device) is ntfs_devices.

Note, AFAICT, all callers of _has_ntfs_filesystem are passing in paths that have already had realpath called on them.

> +
> +
>  def can_dev_be_reformatted(devpath):
> -    # determine if the ephemeral block device path devpath
> -    # is newly formatted after a resize.
> +    """Determine if block device devpath is newly formatted ephemeral.
> +
> +    A newly formatted disk will:
> +      a.) have a partition table (dos or gpt)
> +      b.) have 1 partition that is ntfs formatted, or
> +          have 2 partitions with the second partition ntfs formatted.
> +          (larger instances with >2TB ephemeral disk have gpt, and will
> +           have a microsoft reserved partition as part 1.  LP: #1686514)
> +      c.) the ntfs partition will have no files other than possibly
> +          'dataloss_warning_readme.txt'"""
>      if not os.path.exists(devpath):
>          return False, 'device %s does not exist' % devpath
>  
> -    realpath = os.path.realpath(devpath)
> -    LOG.debug('Resolving realpath of %s -> %s', devpath, realpath)
> -
> -    # it is possible that the block device might exist, but the kernel
> -    # have not yet read the partition table and sent events.  we udevadm settle
> -    # to hope to resolve that.  Better here would probably be to test and see,
> -    # and then settle if we didn't find anything and try again.
> -    if util.which("udevadm"):
> -        util.subp(["udevadm", "settle"])

It appears that you're addressing the "Better here would probably be to test and see.."
Wouldn't udevadm settled --exist-if-exists=os.path.realpath(devpath) then?

It's also worth noting that realpath on symlinks with no targets return the symlink;
It may be worth checking that realpath actually resolved as an indicator of whether
the "real" disk is actually present.

> +    LOG.debug('Resolving realpath of %s -> %s', devpath,
> +              os.path.realpath(devpath))
>  
>      # devpath of /dev/sd[a-z] or /dev/disk/cloud/azure_resource
>      # where partitions are "<devpath>1" or "<devpath>-part1" or "<devpath>p1"
> -    part1path = None
> -    for suff in ("-part", "p", ""):
> -        cand = devpath + suff + "1"
> -        if os.path.exists(cand):
> -            if os.path.exists(devpath + suff + "2"):
> -                msg = ('device %s had more than 1 partition: %s, %s' %
> -                       devpath, cand, devpath + suff + "2")
> -                return False, msg
> -            part1path = cand
> -            break
> -
> -    if part1path is None:
> +    partitions = _partitions_on_device(devpath)
> +    if len(partitions) == 0:
>          return False, 'device %s was not partitioned' % devpath
> +    elif len(partitions) > 2:
> +        msg = ('device %s had 3 or more partitions: %s' %
> +               (devpath, ' '.join([p[1] for p in partitions])))
> +        return False, msg
> +    elif len(partitions) == 2:
> +        cand_part, cand_path = partitions[1]
> +    else:
> +        cand_part, cand_path = partitions[0]
>  
> -    real_part1path = os.path.realpath(part1path)
> -    ntfs_devices = util.find_devs_with("TYPE=ntfs", no_cache=True)
> -    LOG.debug('ntfs_devices found = %s', ntfs_devices)
> -    if real_part1path not in ntfs_devices:
> -        msg = ('partition 1 (%s -> %s) on device %s was not ntfs formatted' %
> -               (part1path, real_part1path, devpath))
> +    if not _has_ntfs_filesystem(cand_path):
> +        msg = ('partition %s (%s) on device %s was not ntfs formatted' %
> +               (cand_part, cand_path, devpath))
>          return False, msg
>  
>      def count_files(mp):
>          ignored = set(['dataloss_warning_readme.txt'])
>          return len([f for f in os.listdir(mp) if f.lower() not in ignored])
>  
> -    bmsg = ('partition 1 (%s -> %s) on device %s was ntfs formatted' %
> -            (part1path, real_part1path, devpath))
> +    bmsg = ('partition %s (%s) on device %s was ntfs formatted' %
> +            (cand_part, cand_path, devpath))
>      try:
> -        file_count = util.mount_cb(part1path, count_files)
> +        file_count = util.mount_cb(cand_path, count_files)
>      except util.MountFailedError as e:
> -        return False, bmsg + ' but mount of %s failed: %s' % (part1path, e)
> +        return False, bmsg + ' but mount of %s failed: %s' % (cand_part, e)
>  
>      if file_count != 0:
>          return False, bmsg + ' but had %d files on it.' % file_count
> diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
> index e6b0dcb..e7562bf 100644
> --- a/tests/unittests/test_datasource/test_azure.py
> +++ b/tests/unittests/test_datasource/test_azure.py
> @@ -701,15 +699,197 @@ class TestAzureBounce(TestCase):
>  class TestReadAzureOvf(TestCase):
>      def test_invalid_xml_raises_non_azure_ds(self):
>          invalid_xml = "<foo>" + construct_valid_ovf_env(data={})
> -        self.assertRaises(DataSourceAzure.BrokenAzureDataSource,
> -                          DataSourceAzure.read_azure_ovf, invalid_xml)
> +        self.assertRaises(dsaz.BrokenAzureDataSource,
> +                          dsaz.read_azure_ovf, invalid_xml)
>  
>      def test_load_with_pubkeys(self):
>          mypklist = [{'fingerprint': 'fp1', 'path': 'path1', 'value': ''}]
>          pubkeys = [(x['fingerprint'], x['path'], x['value']) for x in mypklist]
>          content = construct_valid_ovf_env(pubkeys=pubkeys)
> -        (_md, _ud, cfg) = DataSourceAzure.read_azure_ovf(content)
> +        (_md, _ud, cfg) = dsaz.read_azure_ovf(content)
>          for mypk in mypklist:
>              self.assertIn(mypk, cfg['_pubkeys'])
>  
> +
> +class TestCanDevBeReformatted(CiTestCase):
> +    warning_file = 'dataloss_warning_readme.txt'
> +
> +    def _domock(self, mockpath, sattr=None):
> +        patcher = mock.patch(mockpath)
> +        setattr(self, sattr, patcher.start())
> +        self.addCleanup(patcher.stop)
> +
> +    def setUp(self):
> +        super(TestCanDevBeReformatted, self).setUp()
> +
> +    def patchup(self, devs):
> +        def realpath(d):
> +            return bypath[d].get('realpath', d)
> +
> +        def partitions_on_device(devpath):
> +            parts = bypath.get(devpath, {}).get('partitions', {})
> +            ret = []
> +            for path, data in parts.items():
> +                ret.append((data.get('num'), realpath(path)))
> +            # return sorted by partition number
> +            return sorted(ret, key=lambda d: d[0])
> +
> +        def mount_cb(device, callback):
> +            p = self.tmp_dir()
> +            for f in bypath.get(device).get('files', []):
> +                write_file(os.path.join(p, f), content=f)
> +            return callback(p)
> +
> +        def has_ntfs_fs(device):
> +            return bypath.get(device, {}).get('fs') == 'ntfs'
> +
> +        bypath = {}

I'm missing something, does this assignment make this a class attribute?  it's strange (to me) to have methods 
refer to the bypath dictionary without self;  and further to see it assigned in the self. mock objects.

> +        for path, data in devs.items():
> +            bypath[path] = data
> +            if 'realpath' in data:
> +                bypath[data['realpath']] = data
> +            for ppath, pdata in data.get('partitions', {}).items():
> +                bypath[ppath] = pdata
> +                if 'realpath' in data:
> +                    bypath[pdata['realpath']] = pdata
> +
> +        p = 'cloudinit.sources.DataSourceAzure'
> +        self._domock(p + "._partitions_on_device", 'm_partitions_on_device')
> +        self._domock(p + "._has_ntfs_filesystem", 'm_has_ntfs_filesystem')
> +        self._domock(p + ".util.mount_cb", 'm_mount_cb')
> +        self._domock(p + ".os.path.realpath", 'm_realpath')
> +        self._domock(p + ".os.path.exists", 'm_exists')
> +
> +        self.m_exists.side_effect = lambda p: p in bypath
> +        self.m_realpath.side_effect = lambda p: realpath(p)
> +        self.m_has_ntfs_filesystem.side_effect = has_ntfs_fs
> +        self.m_mount_cb.side_effect = mount_cb
> +        self.m_partitions_on_device.side_effect = partitions_on_device
> +
> +    def test_three_partitions_is_false(self):
> +        self.patchup({
> +            '/dev/sda': {
> +                'partitions': {
> +                    '/dev/sda1': {'num': 1},
> +                    '/dev/sda2': {'num': 2},
> +                    '/dev/sda3': {'num': 3},
> +                }}})
> +        value, msg = dsaz.can_dev_be_reformatted("/dev/sda")
> +        self.assertFalse(False, value)
> +        self.assertIn("3 or more", msg.lower())
> +
> +    def test_no_partitions_is_false(self):
> +        self.patchup({'/dev/sda': {'realpath': '/dev/sda'}})
> +        value, msg = dsaz.can_dev_be_reformatted("/dev/sda")
> +        self.assertEqual(False, value)
> +        self.assertIn("not partitioned", msg.lower())
> +
> +    def test_two_partitions_not_ntfs_false(self):
> +        self.patchup({
> +            '/dev/sda': {
> +                'partitions': {
> +                    '/dev/sda1': {'num': 1},
> +                    '/dev/sda2': {'num': 2, 'fs': 'ext4', 'files': []},
> +                }}})
> +        value, msg = dsaz.can_dev_be_reformatted("/dev/sda")
> +        self.assertFalse(False, value)
> +        self.assertIn("not ntfs", msg.lower())
> +
> +    def test_two_partitions_ntfs_populated_false(self):
> +        self.patchup({
> +            '/dev/sda': {
> +                'partitions': {
> +                    '/dev/sda1': {'num': 1},
> +                    '/dev/sda2': {'num': 2, 'fs': 'ntfs',
> +                                  'files': ['secret.txt']},
> +                }}})
> +        value, msg = dsaz.can_dev_be_reformatted("/dev/sda")
> +        self.assertFalse(False, value)
> +        self.assertIn("files on it", msg.lower())
> +
> +    def test_two_partitions_ntfs_empty_is_true(self):
> +        self.patchup({
> +            '/dev/sda': {
> +                'partitions': {
> +                    '/dev/sda1': {'num': 1},
> +                    '/dev/sda2': {'num': 2, 'fs': 'ntfs', 'files': []},
> +                }}})
> +        value, msg = dsaz.can_dev_be_reformatted("/dev/sda")
> +        self.assertEqual(True, value)
> +        self.assertIn("safe for", msg.lower())
> +
> +    def test_one_partition_not_ntfs_false(self):
> +        self.patchup({
> +            '/dev/sda': {
> +                'partitions': {
> +                    '/dev/sda1': {'num': 1, 'fs': 'zfs'},
> +                }}})
> +        value, msg = dsaz.can_dev_be_reformatted("/dev/sda")
> +        self.assertEqual(False, value)
> +        self.assertIn("not ntfs", msg.lower())
> +
> +    def test_one_partition_ntfs_populated_false(self):
> +        self.patchup({
> +            '/dev/sda': {
> +                'partitions': {
> +                    '/dev/sda1': {'num': 1, 'fs': 'ntfs',
> +                                  'files': ['file1.txt', 'file2.exe']},
> +                }}})
> +        value, msg = dsaz.can_dev_be_reformatted("/dev/sda")
> +        self.assertEqual(False, value)
> +        self.assertIn("files on it", msg.lower())
> +
> +    def test_one_partition_ntfs_empty_is_true(self):
> +        self.patchup({
> +            '/dev/sda': {
> +                'partitions': {
> +                    '/dev/sda1': {'num': 1, 'fs': 'ntfs', 'files': []}
> +                }}})
> +        value, msg = dsaz.can_dev_be_reformatted("/dev/sda")
> +        self.assertEqual(True, value)
> +        self.assertIn("safe for", msg.lower())
> +
> +    def test_one_partition_ntfs_empty_with_dataloss_file_is_true(self):
> +        self.patchup({
> +            '/dev/sda': {
> +                'partitions': {
> +                    '/dev/sda1': {'num': 1, 'fs': 'ntfs',
> +                                  'files': ['dataloss_warning_readme.txt']}
> +                }}})
> +        value, msg = dsaz.can_dev_be_reformatted("/dev/sda")
> +        self.assertEqual(True, value)
> +        self.assertIn("safe for", msg.lower())
> +
> +    def test_one_partition_through_realpath_is_true(self):
> +        epath = '/dev/disk/cloud/azure_resource'
> +        self.patchup({
> +            epath: {
> +                'realpath': '/dev/sdb',
> +                'partitions': {
> +                    epath + '-part1': {
> +                        'num': 1, 'fs': 'ntfs', 'files': [self.warning_file],
> +                        'realpath': '/dev/sdb1'}
> +                }}})
> +        value, msg = dsaz.can_dev_be_reformatted(epath)
> +        self.assertEqual(True, value)
> +        self.assertIn("safe for", msg.lower())
> +
> +    def test_three_partition_through_realpath_is_true(self):
> +        epath = '/dev/disk/cloud/azure_resource'
> +        self.patchup({
> +            epath: {
> +                'realpath': '/dev/sdb',
> +                'partitions': {
> +                    epath + '-part1': {
> +                        'num': 1, 'fs': 'ntfs', 'files': [self.warning_file],
> +                        'realpath': '/dev/sdb1'},
> +                    epath + '-part2': {'num': 2, 'fs': 'ext3',
> +                                       'realpath': '/dev/sdb2'},
> +                    epath + '-part3': {'num': 3, 'fs': 'ext',
> +                                       'realpath': '/dev/sdb3'}
> +                }}})
> +        value, msg = dsaz.can_dev_be_reformatted(epath)
> +        self.assertEqual(False, value)
> +        self.assertIn("3 or more", msg.lower())
> +
>  # vi: ts=4 expandtab


-- 
https://code.launchpad.net/~smoser/cloud-init/+git/cloud-init/+merge/323420
Your team cloud init development team is requested to review the proposed merge of ~smoser/cloud-init:bug/1686514-azure-reformat-large into cloud-init:master.


References