← Back to team overview

cloud-init-dev team mailing list archive

[Merge] ~smoser/cloud-init:feature/ds-init into cloud-init:master

 

Scott Moser has proposed merging ~smoser/cloud-init:feature/ds-init into cloud-init:master.

Requested reviews:
  cloud init development team (cloud-init-dev)
Related bugs:
  Bug #1611074 in cloud-init: "Reformatting of ephemeral drive fails on resize of Azure VM"
  https://bugs.launchpad.net/cloud-init/+bug/1611074

For more details, see:
https://code.launchpad.net/~smoser/cloud-init/+git/cloud-init/+merge/311205
-- 
Your team cloud init development team is requested to review the proposed merge of ~smoser/cloud-init:feature/ds-init into cloud-init:master.
diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py
index 83eb02c..fe37075 100644
--- a/cloudinit/cmd/main.py
+++ b/cloudinit/cmd/main.py
@@ -326,6 +326,9 @@ def main_init(name, args):
         util.logexc(LOG, "Failed to re-adjust output redirection!")
     logging.setupLogging(mods.cfg)
 
+    # give the activated datasource a chance to adjust
+    init.activate_datasource()
+
     # Stage 10
     return (init.datasource, run_module_section(mods, name, name))
 
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index b802b03..10a8b6f 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -19,7 +19,6 @@
 import base64
 import contextlib
 import crypt
-import fnmatch
 from functools import partial
 import os
 import os.path
@@ -28,7 +27,6 @@ from xml.dom import minidom
 import xml.etree.ElementTree as ET
 
 from cloudinit import log as logging
-from cloudinit.settings import PER_ALWAYS
 from cloudinit import sources
 from cloudinit.sources.helpers.azure import get_metadata_from_fabric
 from cloudinit import util
@@ -42,6 +40,9 @@ BOUNCE_COMMAND = [
     'sh', '-xc',
     "i=$interface; x=0; ifdown $i || x=$?; ifup $i || x=$?; exit $x"
 ]
+# azure systems will always have a resource disk, and 66-azure-ephemeral.rules
+# ensures that it gets linked to this path.
+RESOURCE_DISK_PATH = '/dev/disk/cloud/azure_resource'
 
 BUILTIN_DS_CONFIG = {
     'agent_command': AGENT_START,
@@ -53,7 +54,7 @@ BUILTIN_DS_CONFIG = {
         'command': BOUNCE_COMMAND,
         'hostname_command': 'hostname',
     },
-    'disk_aliases': {'ephemeral0': '/dev/sdb'},
+    'disk_aliases': {'ephemeral0': RESOURCE_DISK_PATH},
     'dhclient_lease_file': '/var/lib/dhcp/dhclient.eth0.leases',
 }
 
@@ -245,15 +246,6 @@ class DataSourceAzureNet(sources.DataSource):
         self.metadata['instance-id'] = util.read_dmi_data('system-uuid')
         self.metadata.update(fabric_data)
 
-        found_ephemeral = find_fabric_formatted_ephemeral_disk()
-        if found_ephemeral:
-            self.ds_cfg['disk_aliases']['ephemeral0'] = found_ephemeral
-            LOG.debug("using detected ephemeral0 of %s", found_ephemeral)
-
-        cc_modules_override = support_new_ephemeral(self.sys_cfg)
-        if cc_modules_override:
-            self.cfg['cloud_init_modules'] = cc_modules_override
-
         return True
 
     def device_name_to_device(self, name):
@@ -266,97 +258,92 @@ class DataSourceAzureNet(sources.DataSource):
         # quickly (local check only) if self.instance_id is still valid
         return sources.instance_id_matches_system_uuid(self.get_instance_id())
 
-
-def count_files(mp):
-    return len(fnmatch.filter(os.listdir(mp), '*[!cdrom]*'))
+    def activate(self, cfg, is_new_instance):
+        address_ephemeral_resize(is_new_instance=is_new_instance)
+        return
 
 
-def find_fabric_formatted_ephemeral_part():
-    """
-    Locate the first fabric formatted ephemeral device.
-    """
-    potential_locations = ['/dev/disk/cloud/azure_resource-part1',
-                           '/dev/disk/azure/resource-part1']
-    device_location = None
-    for potential_location in potential_locations:
-        if os.path.exists(potential_location):
-            device_location = potential_location
+def can_dev_be_reformatted(devpath):
+    # determine if the ephemeral block device path devpath
+    # is newly formatted after a resize.
+    if not os.path.isfile(devpath):
+        return False, 'device %s is not a file' % devpath
+
+    # devpath of /dev/sd[a-z] or /dev/disk/cloud/azure_resource
+    # where partitions are "<devpath>1" or "<devpath>-part1" or "<devpath>p1"
+    partpath = None
+    for suff in ("-part", "p", ""):
+        cand = devpath + suff + "1"
+        if os.path.isfile(cand):
+            if os.path.isfile(devpath + suff + "2"):
+                msg = ('device %s had more than 1 partition: %s, %s' %
+                       devpath, cand, devpath + suff + "2")
+                return False, msg
+            partpath = cand
             break
-    if device_location is None:
-        LOG.debug("no azure resource disk partition path found")
-        return None
+
+    if partpath is None:
+        return False, 'device %s was not partitioned' % devpath
+
+    real_partpath = os.path.realpath(partpath)
     ntfs_devices = util.find_devs_with("TYPE=ntfs")
-    real_device = os.path.realpath(device_location)
-    if real_device in ntfs_devices:
-        return device_location
-    LOG.debug("'%s' existed (%s) but was not ntfs formated",
-              device_location, real_device)
-    return None
+    if real_partpath not in ntfs_devices:
+        msg = ('partition 1 (%s -> %s) on device %s was not ntfs formatted' %
+               partpath, real_partpath, devpath)
+        return False, msg
 
+    def count_files(mp):
+        ignored = {'dataloss_warning_readme.txt'}
+        return len([f for f in os.listdir(mp) if f.lower() not in ignored])
 
-def find_fabric_formatted_ephemeral_disk():
-    """
-    Get the ephemeral disk.
-    """
-    part_dev = find_fabric_formatted_ephemeral_part()
-    if part_dev:
-        return part_dev.split('-')[0]
-    return None
+    bmsg = ('partition 1 (%s -> %s) on device %s was ntfs formatted' %
+            partpath, real_partpath, devpath)
+    try:
+        file_count = util.mount_cb(devpath, count_files)
+    except util.MountFailedError as e:
+        return False, bmsg + ' but mount failed: %s' % e
 
+    if file_count != 0:
+        return False, bmsg + ' but had %d files on it.' % file_count
 
-def support_new_ephemeral(cfg):
-    """
-    Windows Azure makes ephemeral devices ephemeral to boot; a ephemeral device
-    may be presented as a fresh device, or not.
+    return True, bmsg + ' and had important files.'
 
-    Since the knowledge of when a disk is supposed to be plowed under is
-    specific to Windows Azure, the logic resides here in the datasource. When a
-    new ephemeral device is detected, cloud-init overrides the default
-    frequency for both disk-setup and mounts for the current boot only.
-    """
-    device = find_fabric_formatted_ephemeral_part()
-    if not device:
-        LOG.debug("no default fabric formated ephemeral0.1 found")
-        return None
-    LOG.debug("fabric formated ephemeral0.1 device at %s", device)
 
-    file_count = 0
-    try:
-        file_count = util.mount_cb(device, count_files)
-    except Exception:
-        return None
-    LOG.debug("fabric prepared ephmeral0.1 has %s files on it", file_count)
-
-    if file_count >= 1:
-        LOG.debug("fabric prepared ephemeral0.1 will be preserved")
-        return None
+def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,
+                             is_new_instance=False):
+    # wait for ephemeral disk to come up
+    naplen = .2
+    missing = wait_for_files([devpath], maxwait=maxwait, naplen=naplen)
+
+    if missing:
+        LOG.warn("ephemeral device '%s' did not appear after %d seconds.",
+                 devpath, maxwait)
+        return
+
+    result = False
+    msg = None
+    if is_new_instance:
+        result, msg = (True, "First instance boot.")
     else:
-        # if device was already mounted, then we need to unmount it
-        # race conditions could allow for a check-then-unmount
-        # to have a false positive. so just unmount and then check.
-        try:
-            util.subp(['umount', device])
-        except util.ProcessExecutionError as e:
-            if device in util.mounts():
-                LOG.warn("Failed to unmount %s, will not reformat.", device)
-                LOG.debug("Failed umount: %s", e)
-                return None
-
-    LOG.debug("cloud-init will format ephemeral0.1 this boot.")
-    LOG.debug("setting disk_setup and mounts modules 'always' for this boot")
-
-    cc_modules = cfg.get('cloud_init_modules')
-    if not cc_modules:
-        return None
-
-    mod_list = []
-    for mod in cc_modules:
-        if mod in ("disk_setup", "mounts"):
-            mod_list.append([mod, PER_ALWAYS])
-            LOG.debug("set module '%s' to 'always' for this boot", mod)
+        result, msg = can_dev_be_reformatted(devpath)
+
+    LOG.debug("reformattable=%s: %s" % (result, msg))
+    if not result:
+        return
+
+    for mod in ['disk_config', 'config_mounts']:
+        sempath = '/var/lib/cloud/instance/sem/config_' + mod
+        bmsg = 'Marker "%s" for module "%s"' % (sempath, mod)
+        if os.path.exists(sempath):
+            try:
+                os.unlink(sempath)
+                LOG.debug(bmsg + " removed.")
+            except Exception as e:
+                # python3 throws FileNotFoundError, python2 throws OSError
+                LOG.warn(bmsg + ": remove failed! (%s)" % e)
         else:
-            mod_list.append(mod)
-    return mod_list
+            LOG.debug(bmsg + " did not exist.")
+    return
 
 
 def perform_hostname_bounce(hostname, cfg, prev_hostname):
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index d139527..13fb7c6 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -261,6 +261,18 @@ class DataSource(object):
     def first_instance_boot(self):
         return
 
+    def activate(self, cfg, is_new_instance):
+        """activate(cfg, is_new_instance)
+
+        This is called before the init_modules will be called.
+        The cfg is fully up to date config, it contains a merged view of
+           system config, datasource config, user config, vendor config.
+        It should be used rather than the sys_cfg passed to __init__.
+
+        is_new_instance is a boolean indicating if this is a new instance.
+        """
+        return
+
 
 def normalize_pubkey_data(pubkey_data):
     keys = []
diff --git a/cloudinit/stages.py b/cloudinit/stages.py
index 47deac6..043e3b8 100644
--- a/cloudinit/stages.py
+++ b/cloudinit/stages.py
@@ -371,6 +371,13 @@ class Init(object):
         self._store_userdata()
         self._store_vendordata()
 
+    def activate_datasource(self):
+        if self.datasource is None:
+            raise RuntimeError("Datasource is None, cannot activate.")
+        self.datasource.activate(cfg=self.cfg,
+                                 new_instance=self.is_new_instance())
+        self._write_to_cache()
+
     def _store_userdata(self):
         raw_ud = self.datasource.get_userdata_raw()
         if raw_ud is None:
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
index e90e903..0712700 100644
--- a/tests/unittests/test_datasource/test_azure.py
+++ b/tests/unittests/test_datasource/test_azure.py
@@ -349,7 +349,7 @@ class TestAzureDataSource(TestCase):
         cfg = dsrc.get_config_obj()
 
         self.assertEqual(dsrc.device_name_to_device("ephemeral0"),
-                         "/dev/sdb")
+                         DataSourceAzure.RESOURCE_DISK_PATH)
         assert 'disk_setup' in cfg
         assert 'fs_setup' in cfg
         self.assertIsInstance(cfg['disk_setup'], dict)
@@ -462,14 +462,6 @@ class TestAzureBounce(TestCase):
             mock.patch.object(DataSourceAzure, 'list_possible_azure_ds_devs',
                               mock.MagicMock(return_value=[])))
         self.patches.enter_context(
-            mock.patch.object(DataSourceAzure,
-                              'find_fabric_formatted_ephemeral_disk',
-                              mock.MagicMock(return_value=None)))
-        self.patches.enter_context(
-            mock.patch.object(DataSourceAzure,
-                              'find_fabric_formatted_ephemeral_part',
-                              mock.MagicMock(return_value=None)))
-        self.patches.enter_context(
             mock.patch.object(DataSourceAzure, 'get_metadata_from_fabric',
                               mock.MagicMock(return_value={})))
         self.patches.enter_context(

Follow ups