← Back to team overview

cloud-init-dev team mailing list archive

Re: [Merge] lp:~utlemming/cloud-init/digitalocean into lp:cloud-init

 

Generally good, and I'd like to have this datasource enabled by default also.
Its great to see cloud's putting instance-id in dmi data.

please see the comments inline.

Diff comments:

> === modified file 'cloudinit/sources/DataSourceDigitalOcean.py'
> --- cloudinit/sources/DataSourceDigitalOcean.py	2016-05-12 17:56:26 +0000
> +++ cloudinit/sources/DataSourceDigitalOcean.py	2016-07-26 14:34:57 +0000
> @@ -14,22 +15,27 @@
>  #    You should have received a copy of the GNU General Public License
>  #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
>  
> -from cloudinit import ec2_utils
> +# DigitalOcean Droplet API:
> +# https://developers.digitalocean.com/documentation/metadata/
> +
> +import json
> +import os
> +
>  from cloudinit import log as logging
>  from cloudinit import sources
> +from cloudinit import url_helper
>  from cloudinit import util
>  
> -import functools
> -
> -
>  LOG = logging.getLogger(__name__)
>  
>  BUILTIN_DS_CONFIG = {
> -    'metadata_url': 'http://169.254.169.254/metadata/v1/',
> -    'mirrors_url': 'http://mirrors.digitalocean.com/'
> +    'metadata_url': 'http://169.254.169.254/metadata/v1.json',
>  }
> -MD_RETRIES = 0
> -MD_TIMEOUT = 1
> +
> +# Wait for a up to a minute, retrying the meta-data server
> +# every 2 seconds.
> +MD_RETRIES = 30
> +MD_TIMEOUT = 2

there is 'timeout' (how long to allow the url request to take before giving up)
and there is sec_between.  you've used MD_TIMEOUT for both.  might as well separate these to 2 variables and update your call to readurl.

>  
>  
>  class DataSourceDigitalOcean(sources.DataSource):
> @@ -40,43 +46,67 @@
>              util.get_cfg_by_path(sys_cfg, ["datasource", "DigitalOcean"], {}),
>              BUILTIN_DS_CONFIG])
>          self.metadata_address = self.ds_cfg['metadata_url']
> -
> -        if self.ds_cfg.get('retries'):
> -            self.retries = self.ds_cfg['retries']
> -        else:
> -            self.retries = MD_RETRIES
> -
> -        if self.ds_cfg.get('timeout'):
> -            self.timeout = self.ds_cfg['timeout']
> -        else:
> -            self.timeout = MD_TIMEOUT
> -
> -    def get_data(self):
> -        caller = functools.partial(util.read_file_or_url,
> -                                   timeout=self.timeout, retries=self.retries)
> -
> -        def mcaller(url):
> -            return caller(url).contents
> -
> -        md = ec2_utils.MetadataMaterializer(mcaller(self.metadata_address),
> -                                            base_url=self.metadata_address,
> -                                            caller=mcaller)
> -
> -        self.metadata = md.materialize()
> -
> -        if self.metadata.get('id'):
> -            return True
> -        else:
> +        self.retries = self.ds_cfg.get('retries', MD_RETRIES)
> +        self.timeout = self.ds_cfg.get('timeout', MD_TIMEOUT)
> +
> +    def _get_sysinfo(self):
> +        # DigitalOcean embeds vendor ID and instance/droplet_id in the
> +        # SMBIOS information
> +
> +        # DigitalOcean only supports i386/x86_64 hardware
> +        uname_arch = os.uname()[4]
> +        if uname_arch not in ('i386', 'i686', 'x86_64'):
> +            return (None, None)
> +        LOG.debug("checking if instance is a DigitalOcean droplet")

I'd prefer to just accept anything that says they're DigitalOcean (per the bios), independent of arch.
https://bugs.launchpad.net/qemu/+bug/1243287 is a pain, which is probably better fixed in generic util.read_dmi_data (avoiding running on arm*) than it is right now.  

so either
a.) keep the check for arch
b.) fix util.read_dmi-data to not try running dmidecode on arm (see the comment in cloudinit/sources/DataSourceSmartOS.py for that bug and just put a similar work around in util).

> +
> +        is_do = False
> +        droplet_id = None
> +
> +        # Detect if we are on DigitalOcean and return the Droplet's ID
> +        vendor_name = util.read_dmi_data("system-manufacturer")
> +        if vendor_name == "DigitalOcean":

i'm generally a fan of just getting out of a function early. then you can indent less.

if vendor_name != DigitalOcean:
    return (Nonw, Nonw)

> +            is_do = True
> +            LOG.info("running on DigitalOcean")
> +
> +            droplet_id = util.read_dmi_data("system-serial-number")
> +
> +            if droplet_id:
> +                LOG.debug(("system identified via SMBIOS as DigitalOcean "
> +                           "Droplet {}").format(droplet_id))
> +            else:
> +                LOG.critical(("system identified via SMBIOS as a DigitalOcean "
> +                              "Droplet, but not provide an ID. "
> +                              "Please file a support ticket at: "
> +                              "https://cloud.digitalocean.com/support/tickets";
> +                              "/new"))
> +
> +        return (is_do, droplet_id)
> +
> +    def get_data(self, apply_filter=False):
> +        (is_do, droplet_id) = self._get_sysinfo()
> +
> +        # only proceed if we know we are on DigitalOcean
> +        if not is_do:
>              return False
>  
> -    def get_userdata_raw(self):
> -        return "\n".join(self.metadata['user-data'])
> -
> -    def get_vendordata_raw(self):
> -        return "\n".join(self.metadata['vendor-data'])
> +        LOG.debug("reading metadata from {}".format(self.metadata_address))
> +        response = url_helper.readurl(self.metadata_address,
> +                                      timeout=self.timeout,
> +                                      sec_between=self.timeout,
> +                                      retries=self.retries)
> +
> +        contents = util.decode_binary(response.contents)
> +        decoded = json.loads(contents)
> +
> +        self.metadata = decoded
> +        self.metadata['instance-id'] = decoded.get('droplet_id', droplet_id)
> +        self.metadata['local-hostname'] = decoded.get('hostname', droplet_id)
> +        self.vendordata_raw = decoded.get("vendor_data", None)
> +        self.userdata_raw = decoded.get("user_data", None)
> +        return True
>  
>      def get_public_ssh_keys(self):
> -        public_keys = self.metadata['public-keys']
> +        public_keys = self.metadata.get('public_keys', [])
>          if isinstance(public_keys, list):
>              return public_keys
>          else:
> @@ -84,21 +114,17 @@
>  
>      @property
>      def availability_zone(self):
> -        return self.metadata['region']
> -
> -    def get_instance_id(self):
> -        return self.metadata['id']
> -
> -    def get_hostname(self, fqdn=False, resolve_ip=False):
> -        return self.metadata['hostname']
> -
> -    def get_package_mirror_info(self):
> -        return self.ds_cfg['mirrors_url']
> +        return self.metadata.get('region', 'default')
>  
>      @property
>      def launch_index(self):
>          return None
>  
> +    def check_instance_id(self, sys_cfg):

this is supposed to check if the instance_id is still valid and return true or false.
it will be called on each boot after the first, and if it returns False, a slower path is used, and metadata will end up being re-checked.

So, look at the azure source which does this:
   return sources.instance_id_matches_system_uuid(self.get_instance_id())

I think you might want this:
        return sources.instance_id_matches_system_uuid(
            self.get_instance_id(), 'system-serial-number')

> +        (_, droplet_id) = self._get_sysinfo()
> +        return droplet_id
> +
> +
>  # Used to match classes to dependencies
>  datasources = [
>      (DataSourceDigitalOcean, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)),


-- 
https://code.launchpad.net/~utlemming/cloud-init/digitalocean/+merge/301123
Your team cloud init development team is requested to review the proposed merge of lp:~utlemming/cloud-init/digitalocean into lp:cloud-init.


References