cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #01071
Re: [Merge] lp:~utlemming/cloud-init/digitalocean into lp:cloud-init
Generally good, and I'd like to have this datasource enabled by default also.
Its great to see cloud's putting instance-id in dmi data.
please see the comments inline.
Diff comments:
> === modified file 'cloudinit/sources/DataSourceDigitalOcean.py'
> --- cloudinit/sources/DataSourceDigitalOcean.py 2016-05-12 17:56:26 +0000
> +++ cloudinit/sources/DataSourceDigitalOcean.py 2016-07-26 14:34:57 +0000
> @@ -14,22 +15,27 @@
> # You should have received a copy of the GNU General Public License
> # along with this program. If not, see <http://www.gnu.org/licenses/>.
>
> -from cloudinit import ec2_utils
> +# DigitalOcean Droplet API:
> +# https://developers.digitalocean.com/documentation/metadata/
> +
> +import json
> +import os
> +
> from cloudinit import log as logging
> from cloudinit import sources
> +from cloudinit import url_helper
> from cloudinit import util
>
> -import functools
> -
> -
> LOG = logging.getLogger(__name__)
>
> BUILTIN_DS_CONFIG = {
> - 'metadata_url': 'http://169.254.169.254/metadata/v1/',
> - 'mirrors_url': 'http://mirrors.digitalocean.com/'
> + 'metadata_url': 'http://169.254.169.254/metadata/v1.json',
> }
> -MD_RETRIES = 0
> -MD_TIMEOUT = 1
> +
> +# Wait for a up to a minute, retrying the meta-data server
> +# every 2 seconds.
> +MD_RETRIES = 30
> +MD_TIMEOUT = 2
there is 'timeout' (how long to allow the url request to take before giving up)
and there is sec_between. you've used MD_TIMEOUT for both. might as well separate these to 2 variables and update your call to readurl.
>
>
> class DataSourceDigitalOcean(sources.DataSource):
> @@ -40,43 +46,67 @@
> util.get_cfg_by_path(sys_cfg, ["datasource", "DigitalOcean"], {}),
> BUILTIN_DS_CONFIG])
> self.metadata_address = self.ds_cfg['metadata_url']
> -
> - if self.ds_cfg.get('retries'):
> - self.retries = self.ds_cfg['retries']
> - else:
> - self.retries = MD_RETRIES
> -
> - if self.ds_cfg.get('timeout'):
> - self.timeout = self.ds_cfg['timeout']
> - else:
> - self.timeout = MD_TIMEOUT
> -
> - def get_data(self):
> - caller = functools.partial(util.read_file_or_url,
> - timeout=self.timeout, retries=self.retries)
> -
> - def mcaller(url):
> - return caller(url).contents
> -
> - md = ec2_utils.MetadataMaterializer(mcaller(self.metadata_address),
> - base_url=self.metadata_address,
> - caller=mcaller)
> -
> - self.metadata = md.materialize()
> -
> - if self.metadata.get('id'):
> - return True
> - else:
> + self.retries = self.ds_cfg.get('retries', MD_RETRIES)
> + self.timeout = self.ds_cfg.get('timeout', MD_TIMEOUT)
> +
> + def _get_sysinfo(self):
> + # DigitalOcean embeds vendor ID and instance/droplet_id in the
> + # SMBIOS information
> +
> + # DigitalOcean only supports i386/x86_64 hardware
> + uname_arch = os.uname()[4]
> + if uname_arch not in ('i386', 'i686', 'x86_64'):
> + return (None, None)
> + LOG.debug("checking if instance is a DigitalOcean droplet")
I'd prefer to just accept anything that says they're DigitalOcean (per the bios), independent of arch.
https://bugs.launchpad.net/qemu/+bug/1243287 is a pain, which is probably better fixed in generic util.read_dmi_data (avoiding running on arm*) than it is right now.
so either
a.) keep the check for arch
b.) fix util.read_dmi-data to not try running dmidecode on arm (see the comment in cloudinit/sources/DataSourceSmartOS.py for that bug and just put a similar work around in util).
> +
> + is_do = False
> + droplet_id = None
> +
> + # Detect if we are on DigitalOcean and return the Droplet's ID
> + vendor_name = util.read_dmi_data("system-manufacturer")
> + if vendor_name == "DigitalOcean":
i'm generally a fan of just getting out of a function early. then you can indent less.
if vendor_name != DigitalOcean:
return (Nonw, Nonw)
> + is_do = True
> + LOG.info("running on DigitalOcean")
> +
> + droplet_id = util.read_dmi_data("system-serial-number")
> +
> + if droplet_id:
> + LOG.debug(("system identified via SMBIOS as DigitalOcean "
> + "Droplet {}").format(droplet_id))
> + else:
> + LOG.critical(("system identified via SMBIOS as a DigitalOcean "
> + "Droplet, but not provide an ID. "
> + "Please file a support ticket at: "
> + "https://cloud.digitalocean.com/support/tickets"
> + "/new"))
> +
> + return (is_do, droplet_id)
> +
> + def get_data(self, apply_filter=False):
> + (is_do, droplet_id) = self._get_sysinfo()
> +
> + # only proceed if we know we are on DigitalOcean
> + if not is_do:
> return False
>
> - def get_userdata_raw(self):
> - return "\n".join(self.metadata['user-data'])
> -
> - def get_vendordata_raw(self):
> - return "\n".join(self.metadata['vendor-data'])
> + LOG.debug("reading metadata from {}".format(self.metadata_address))
> + response = url_helper.readurl(self.metadata_address,
> + timeout=self.timeout,
> + sec_between=self.timeout,
> + retries=self.retries)
> +
> + contents = util.decode_binary(response.contents)
> + decoded = json.loads(contents)
> +
> + self.metadata = decoded
> + self.metadata['instance-id'] = decoded.get('droplet_id', droplet_id)
> + self.metadata['local-hostname'] = decoded.get('hostname', droplet_id)
> + self.vendordata_raw = decoded.get("vendor_data", None)
> + self.userdata_raw = decoded.get("user_data", None)
> + return True
>
> def get_public_ssh_keys(self):
> - public_keys = self.metadata['public-keys']
> + public_keys = self.metadata.get('public_keys', [])
> if isinstance(public_keys, list):
> return public_keys
> else:
> @@ -84,21 +114,17 @@
>
> @property
> def availability_zone(self):
> - return self.metadata['region']
> -
> - def get_instance_id(self):
> - return self.metadata['id']
> -
> - def get_hostname(self, fqdn=False, resolve_ip=False):
> - return self.metadata['hostname']
> -
> - def get_package_mirror_info(self):
> - return self.ds_cfg['mirrors_url']
> + return self.metadata.get('region', 'default')
>
> @property
> def launch_index(self):
> return None
>
> + def check_instance_id(self, sys_cfg):
this is supposed to check if the instance_id is still valid and return true or false.
it will be called on each boot after the first, and if it returns False, a slower path is used, and metadata will end up being re-checked.
So, look at the azure source which does this:
return sources.instance_id_matches_system_uuid(self.get_instance_id())
I think you might want this:
return sources.instance_id_matches_system_uuid(
self.get_instance_id(), 'system-serial-number')
> + (_, droplet_id) = self._get_sysinfo()
> + return droplet_id
> +
> +
> # Used to match classes to dependencies
> datasources = [
> (DataSourceDigitalOcean, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)),
--
https://code.launchpad.net/~utlemming/cloud-init/digitalocean/+merge/301123
Your team cloud init development team is requested to review the proposed merge of lp:~utlemming/cloud-init/digitalocean into lp:cloud-init.
References