cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #01406
[Merge] ~smoser/cloud-init:feature/ds-init into cloud-init:master
Scott Moser has proposed merging ~smoser/cloud-init:feature/ds-init into cloud-init:master.
Requested reviews:
cloud init development team (cloud-init-dev)
Related bugs:
Bug #1611074 in cloud-init: "Reformatting of ephemeral drive fails on resize of Azure VM"
https://bugs.launchpad.net/cloud-init/+bug/1611074
For more details, see:
https://code.launchpad.net/~smoser/cloud-init/+git/cloud-init/+merge/311205
--
Your team cloud init development team is requested to review the proposed merge of ~smoser/cloud-init:feature/ds-init into cloud-init:master.
diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py
index 83eb02c..fe37075 100644
--- a/cloudinit/cmd/main.py
+++ b/cloudinit/cmd/main.py
@@ -326,6 +326,9 @@ def main_init(name, args):
util.logexc(LOG, "Failed to re-adjust output redirection!")
logging.setupLogging(mods.cfg)
+ # give the activated datasource a chance to adjust
+ init.activate_datasource()
+
# Stage 10
return (init.datasource, run_module_section(mods, name, name))
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index b802b03..10a8b6f 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -19,7 +19,6 @@
import base64
import contextlib
import crypt
-import fnmatch
from functools import partial
import os
import os.path
@@ -28,7 +27,6 @@ from xml.dom import minidom
import xml.etree.ElementTree as ET
from cloudinit import log as logging
-from cloudinit.settings import PER_ALWAYS
from cloudinit import sources
from cloudinit.sources.helpers.azure import get_metadata_from_fabric
from cloudinit import util
@@ -42,6 +40,9 @@ BOUNCE_COMMAND = [
'sh', '-xc',
"i=$interface; x=0; ifdown $i || x=$?; ifup $i || x=$?; exit $x"
]
+# azure systems will always have a resource disk, and 66-azure-ephemeral.rules
+# ensures that it gets linked to this path.
+RESOURCE_DISK_PATH = '/dev/disk/cloud/azure_resource'
BUILTIN_DS_CONFIG = {
'agent_command': AGENT_START,
@@ -53,7 +54,7 @@ BUILTIN_DS_CONFIG = {
'command': BOUNCE_COMMAND,
'hostname_command': 'hostname',
},
- 'disk_aliases': {'ephemeral0': '/dev/sdb'},
+ 'disk_aliases': {'ephemeral0': RESOURCE_DISK_PATH},
'dhclient_lease_file': '/var/lib/dhcp/dhclient.eth0.leases',
}
@@ -245,15 +246,6 @@ class DataSourceAzureNet(sources.DataSource):
self.metadata['instance-id'] = util.read_dmi_data('system-uuid')
self.metadata.update(fabric_data)
- found_ephemeral = find_fabric_formatted_ephemeral_disk()
- if found_ephemeral:
- self.ds_cfg['disk_aliases']['ephemeral0'] = found_ephemeral
- LOG.debug("using detected ephemeral0 of %s", found_ephemeral)
-
- cc_modules_override = support_new_ephemeral(self.sys_cfg)
- if cc_modules_override:
- self.cfg['cloud_init_modules'] = cc_modules_override
-
return True
def device_name_to_device(self, name):
@@ -266,97 +258,92 @@ class DataSourceAzureNet(sources.DataSource):
# quickly (local check only) if self.instance_id is still valid
return sources.instance_id_matches_system_uuid(self.get_instance_id())
-
-def count_files(mp):
- return len(fnmatch.filter(os.listdir(mp), '*[!cdrom]*'))
+ def activate(self, cfg, is_new_instance):
+ address_ephemeral_resize(is_new_instance=is_new_instance)
+ return
-def find_fabric_formatted_ephemeral_part():
- """
- Locate the first fabric formatted ephemeral device.
- """
- potential_locations = ['/dev/disk/cloud/azure_resource-part1',
- '/dev/disk/azure/resource-part1']
- device_location = None
- for potential_location in potential_locations:
- if os.path.exists(potential_location):
- device_location = potential_location
+def can_dev_be_reformatted(devpath):
+ # determine if the ephemeral block device path devpath
+ # is newly formatted after a resize.
+ if not os.path.isfile(devpath):
+ return False, 'device %s is not a file' % devpath
+
+ # devpath of /dev/sd[a-z] or /dev/disk/cloud/azure_resource
+ # where partitions are "<devpath>1" or "<devpath>-part1" or "<devpath>p1"
+ partpath = None
+ for suff in ("-part", "p", ""):
+ cand = devpath + suff + "1"
+ if os.path.isfile(cand):
+ if os.path.isfile(devpath + suff + "2"):
+ msg = ('device %s had more than 1 partition: %s, %s' %
+ devpath, cand, devpath + suff + "2")
+ return False, msg
+ partpath = cand
break
- if device_location is None:
- LOG.debug("no azure resource disk partition path found")
- return None
+
+ if partpath is None:
+ return False, 'device %s was not partitioned' % devpath
+
+ real_partpath = os.path.realpath(partpath)
ntfs_devices = util.find_devs_with("TYPE=ntfs")
- real_device = os.path.realpath(device_location)
- if real_device in ntfs_devices:
- return device_location
- LOG.debug("'%s' existed (%s) but was not ntfs formated",
- device_location, real_device)
- return None
+ if real_partpath not in ntfs_devices:
+ msg = ('partition 1 (%s -> %s) on device %s was not ntfs formatted' %
+ partpath, real_partpath, devpath)
+ return False, msg
+ def count_files(mp):
+ ignored = {'dataloss_warning_readme.txt'}
+ return len([f for f in os.listdir(mp) if f.lower() not in ignored])
-def find_fabric_formatted_ephemeral_disk():
- """
- Get the ephemeral disk.
- """
- part_dev = find_fabric_formatted_ephemeral_part()
- if part_dev:
- return part_dev.split('-')[0]
- return None
+ bmsg = ('partition 1 (%s -> %s) on device %s was ntfs formatted' %
+ partpath, real_partpath, devpath)
+ try:
+ file_count = util.mount_cb(devpath, count_files)
+ except util.MountFailedError as e:
+ return False, bmsg + ' but mount failed: %s' % e
+ if file_count != 0:
+ return False, bmsg + ' but had %d files on it.' % file_count
-def support_new_ephemeral(cfg):
- """
- Windows Azure makes ephemeral devices ephemeral to boot; a ephemeral device
- may be presented as a fresh device, or not.
+ return True, bmsg + ' and had important files.'
- Since the knowledge of when a disk is supposed to be plowed under is
- specific to Windows Azure, the logic resides here in the datasource. When a
- new ephemeral device is detected, cloud-init overrides the default
- frequency for both disk-setup and mounts for the current boot only.
- """
- device = find_fabric_formatted_ephemeral_part()
- if not device:
- LOG.debug("no default fabric formated ephemeral0.1 found")
- return None
- LOG.debug("fabric formated ephemeral0.1 device at %s", device)
- file_count = 0
- try:
- file_count = util.mount_cb(device, count_files)
- except Exception:
- return None
- LOG.debug("fabric prepared ephmeral0.1 has %s files on it", file_count)
-
- if file_count >= 1:
- LOG.debug("fabric prepared ephemeral0.1 will be preserved")
- return None
+def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,
+ is_new_instance=False):
+ # wait for ephemeral disk to come up
+ naplen = .2
+ missing = wait_for_files([devpath], maxwait=maxwait, naplen=naplen)
+
+ if missing:
+ LOG.warn("ephemeral device '%s' did not appear after %d seconds.",
+ devpath, maxwait)
+ return
+
+ result = False
+ msg = None
+ if is_new_instance:
+ result, msg = (True, "First instance boot.")
else:
- # if device was already mounted, then we need to unmount it
- # race conditions could allow for a check-then-unmount
- # to have a false positive. so just unmount and then check.
- try:
- util.subp(['umount', device])
- except util.ProcessExecutionError as e:
- if device in util.mounts():
- LOG.warn("Failed to unmount %s, will not reformat.", device)
- LOG.debug("Failed umount: %s", e)
- return None
-
- LOG.debug("cloud-init will format ephemeral0.1 this boot.")
- LOG.debug("setting disk_setup and mounts modules 'always' for this boot")
-
- cc_modules = cfg.get('cloud_init_modules')
- if not cc_modules:
- return None
-
- mod_list = []
- for mod in cc_modules:
- if mod in ("disk_setup", "mounts"):
- mod_list.append([mod, PER_ALWAYS])
- LOG.debug("set module '%s' to 'always' for this boot", mod)
+ result, msg = can_dev_be_reformatted(devpath)
+
+ LOG.debug("reformattable=%s: %s" % (result, msg))
+ if not result:
+ return
+
+ for mod in ['disk_config', 'config_mounts']:
+ sempath = '/var/lib/cloud/instance/sem/config_' + mod
+ bmsg = 'Marker "%s" for module "%s"' % (sempath, mod)
+ if os.path.exists(sempath):
+ try:
+ os.unlink(sempath)
+ LOG.debug(bmsg + " removed.")
+ except Exception as e:
+ # python3 throws FileNotFoundError, python2 throws OSError
+ LOG.warn(bmsg + ": remove failed! (%s)" % e)
else:
- mod_list.append(mod)
- return mod_list
+ LOG.debug(bmsg + " did not exist.")
+ return
def perform_hostname_bounce(hostname, cfg, prev_hostname):
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index d139527..13fb7c6 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -261,6 +261,18 @@ class DataSource(object):
def first_instance_boot(self):
return
+ def activate(self, cfg, is_new_instance):
+ """activate(cfg, is_new_instance)
+
+ This is called before the init_modules will be called.
+ The cfg is fully up to date config, it contains a merged view of
+ system config, datasource config, user config, vendor config.
+ It should be used rather than the sys_cfg passed to __init__.
+
+ is_new_instance is a boolean indicating if this is a new instance.
+ """
+ return
+
def normalize_pubkey_data(pubkey_data):
keys = []
diff --git a/cloudinit/stages.py b/cloudinit/stages.py
index 47deac6..043e3b8 100644
--- a/cloudinit/stages.py
+++ b/cloudinit/stages.py
@@ -371,6 +371,13 @@ class Init(object):
self._store_userdata()
self._store_vendordata()
+ def activate_datasource(self):
+ if self.datasource is None:
+ raise RuntimeError("Datasource is None, cannot activate.")
+ self.datasource.activate(cfg=self.cfg,
+ new_instance=self.is_new_instance())
+ self._write_to_cache()
+
def _store_userdata(self):
raw_ud = self.datasource.get_userdata_raw()
if raw_ud is None:
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
index e90e903..0712700 100644
--- a/tests/unittests/test_datasource/test_azure.py
+++ b/tests/unittests/test_datasource/test_azure.py
@@ -349,7 +349,7 @@ class TestAzureDataSource(TestCase):
cfg = dsrc.get_config_obj()
self.assertEqual(dsrc.device_name_to_device("ephemeral0"),
- "/dev/sdb")
+ DataSourceAzure.RESOURCE_DISK_PATH)
assert 'disk_setup' in cfg
assert 'fs_setup' in cfg
self.assertIsInstance(cfg['disk_setup'], dict)
@@ -462,14 +462,6 @@ class TestAzureBounce(TestCase):
mock.patch.object(DataSourceAzure, 'list_possible_azure_ds_devs',
mock.MagicMock(return_value=[])))
self.patches.enter_context(
- mock.patch.object(DataSourceAzure,
- 'find_fabric_formatted_ephemeral_disk',
- mock.MagicMock(return_value=None)))
- self.patches.enter_context(
- mock.patch.object(DataSourceAzure,
- 'find_fabric_formatted_ephemeral_part',
- mock.MagicMock(return_value=None)))
- self.patches.enter_context(
mock.patch.object(DataSourceAzure, 'get_metadata_from_fabric',
mock.MagicMock(return_value={})))
self.patches.enter_context(
Follow ups