cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #02731
Re: [Merge] ~jcastets/cloud-init:scaleway-datasource into cloud-init:master
over all nice.
some comments.
thank you, Julien.
Diff comments:
> diff --git a/cloudinit/sources/DataSourceScaleway.py b/cloudinit/sources/DataSourceScaleway.py
> new file mode 100644
> index 0000000..ed3fef2
> --- /dev/null
> +++ b/cloudinit/sources/DataSourceScaleway.py
> @@ -0,0 +1,235 @@
> +# vi: ts=4 expandtab
> +#
> +# Author: Julien Castets <castets.j@xxxxxxxxx>
comments to header as before.
> +#
> +# This program is free software: you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License version 3, as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program. If not, see <http://www.gnu.org/licenses/>.
> +
> +import json
> +import os
> +import socket
> +import time
> +
> +import requests
> +
> +# pylint fails to import the two modules below.
> +# pylint: disable=E0401
> +from requests.packages.urllib3.connection import HTTPConnection
> +from requests.packages.urllib3.poolmanager import PoolManager
> +
> +from cloudinit import log as logging
> +from cloudinit import sources
> +from cloudinit import url_helper
> +from cloudinit import util
> +
> +
> +LOG = logging.getLogger(__name__)
> +
maybe just remove the duplication of http://169.254.42.42 ?
DS_BASE_URL = 'http://169.254.169.254'
'metadata_url': DS_BASE_URL + "/conf?format=json",
...
> +BUILTIN_DS_CONFIG = {
> + 'metadata_url': 'http://169.254.42.42/conf?format=json',
> + 'userdata_url': 'http://169.254.42.42/user_data/cloud-init',
> + 'vendordata_url': 'http://169.254.42.42/vendor_data/cloud-init'
> +}
> +
> +DEF_MD_RETRIES = 5
> +DEF_MD_TIMEOUT = 10
> +
> +
> +def on_scaleway():
> + """
> + There are three way to detect if you are on Scaleway:
> +
> + * check DMI data: not yet implemented by Scaleway, but the check is made to
> + be future-proof.
> + * the initrd created the file /var/run/scaleway.
> + * "scaleway" is in the kernel cmdline.
> + """
> + vendor_name = util.read_dmi_data('system-manufacturer')
> + if vendor_name == 'Scaleway':
> + return True
> +
> + if os.path.exists('/var/run/scaleway'):
> + return True
> +
> + cmdline = util.get_cmdline()
> + if 'scaleway' in cmdline:
> + return True
> +
> + return False
> +
> +
> +class SourceAddressAdapter(requests.adapters.HTTPAdapter):
> + """
> + Adapter for requests to choose the local address to bind to.
> + """
> +
> + def __init__(self, source_address, **kwargs):
> + self.source_address = source_address
> + super(SourceAddressAdapter, self).__init__(**kwargs)
> +
> + def init_poolmanager(self, connections, maxsize, block=False):
> + socket_options = HTTPConnection.default_socket_options + [
> + (socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
> + ]
> + self.poolmanager = PoolManager(num_pools=connections,
> + maxsize=maxsize,
> + block=block,
> + source_address=self.source_address,
> + socket_options=socket_options)
> +
> +
> +def _get_type_data(typedata_address, timeout, requests_session):
> + """
> + Retrieve user data or vendor data.
> +
> + Scaleway userdata/vendordata API returns HTTP/404 if user/vendor data is
> + not set.
> +
> + This function calls `url_helper.readurl` but instead of considering
> + HTTP/404 as an error that requires a retry, it considers it as empty
> + user/vendor data.
> +
> + Also, be aware the user data/vendor API requires the source port to be
> + below 1024. If requests raises ConnectionError (EADDRINUSE), the caller
> + should retry to call this function on an other port.
> + """
> + try:
> + resp = url_helper.readurl(
> + typedata_address,
> + data=None,
> + timeout=timeout,
> + # It's the caller's responsability to recall this function in case
> + # of exception. Don't let url_helper.readurl() retry by itself.
> + retries=0,
> + session=requests_session,
> + # If the error is a HTTP/404 or a ConnectionError, go into raise
> + # block below.
> + exception_cb=lambda _, exc: exc.code == 404 or (
> + isinstance(exc.cause, requests.exceptions.ConnectionError) and
> + 'Address already in use' in exc.message
> + )
> + )
> + return util.decode_binary(resp.contents)
> + except url_helper.UrlError as exc:
> + # Empty user data.
> + if exc.code == 404:
> + return None
> + raise
> +
> +
> +class DataSourceScaleway(sources.DataSource):
> +
> + def __init__(self, sys_cfg, distro, paths):
> + LOG.debug('Init Scaleway')
drop debug message.
> +
> + sources.DataSource.__init__(self, sys_cfg, distro, paths)
> +
> + self.metadata = {}
> + self.user_data = {}
> + self.vendor_data = {}
> +
> + self.ds_cfg = util.mergemanydict([
> + util.get_cfg_by_path(sys_cfg, ["datasource", "Scaleway"], {}),
> + BUILTIN_DS_CONFIG
> + ])
> +
> + self.metadata_address = self.ds_cfg['metadata_url']
> + self.userdata_address = self.ds_cfg['userdata_url']
> + self.vendordata_address = self.ds_cfg['vendordata_url']
> +
> + self.retries = self.ds_cfg.get('retries', DEF_MD_RETRIES)
> + self.timeout = self.ds_cfg.get('timeout', DEF_MD_TIMEOUT)
> +
> + def _get_privileged_type(self, type_):
i dont know that i like the trailing '_' in the type_ name.
(nit)
and you can probably move this out of the datasource to make it easier to mock.
it looks like all it reads is 'timeout' from the datasource, just pass that in.
then you can mock it more easily (and its response or error).
> + assert type_ in ('user', 'vendor')
> +
> + type_address = self.userdata_address \
> + if type_ == 'user' else self.vendordata_address
> +
> + # Query user/vendor-data. Try to make a request on the first privileged
> + # port available.
> + for port in range(1, max(int(self.retries), 2)):
> + try:
> + LOG.debug(
> + 'Trying to get %s data (bind on port %d)...', type_, port
> + )
> + requests_session = requests.Session()
> + requests_session.mount(
> + 'http://',
> + SourceAddressAdapter(source_address=('0.0.0.0', port))
> + )
> + data = _get_type_data(
> + type_address,
> + timeout=self.timeout,
> + requests_session=requests_session
> + )
> + LOG.debug('%s-data downloaded', type_)
> + return data
> +
> + except url_helper.UrlError as exc:
> + # Local port already in use or HTTP/429.
> + time.sleep(5)
> + last_exc = exc
> + continue
> +
> + # Max number of retries reached.
> + raise last_exc
> +
> + def get_data(self):
> + if on_scaleway() is False:
i think just:
if not on_scaleway():
return False
> + return False
> +
> + resp = url_helper.readurl(self.metadata_address,
> + timeout=self.timeout,
> + retries=self.retries)
> +
> + self.metadata = json.loads(util.decode_binary(resp.contents))
> + self.user_data = self._get_privileged_type('user')
> + self.vendor_data = self._get_privileged_type('vendor')
i think i'd just set this to vendordata_raw and drop the @get_vendordata_raw() below.
> +
> + return True
> +
> + @property
> + def launch_index(self):
> + return None
> +
> + def get_instance_id(self):
> + return self.metadata['id']
> +
> + def get_public_ssh_keys(self):
> + return [key['key'] for key in self.metadata['ssh_public_keys']]
> +
> + def get_hostname(self, fqdn=False, resolve_ip=False):
> + return self.metadata['hostname']
> +
> + def get_userdata_raw(self):
> + return self.user_data
> +
> + def get_vendordata_raw(self):
> + return self.vendor_data
> +
> + @property
> + def availability_zone(self):
> + return None
> +
> + @property
> + def region(self):
> + return None
> +
> +
> +datasources = [
> + (DataSourceScaleway, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)),
> +]
> +
> +
> +def get_datasource_list(depends):
> + return sources.list_from_depends(depends, datasources)
> diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py
> index d2b92e6..34458ec 100644
> --- a/cloudinit/url_helper.py
> +++ b/cloudinit/url_helper.py
> @@ -231,7 +232,10 @@ def readurl(url, data=None, timeout=None, retries=0, sec_between=1,
> LOG.debug("[%s/%s] open '%s' with %s configuration", i,
> manual_tries, url, filtered_req_args)
>
> - r = requests.request(**req_args)
> + if session is None:
> + session = requests.Session()
this isn't obvious to me if this is equivalent to behavior before if no session is passed in.
it may well be, i just dont know.
> + r = session.request(**req_args)
> +
> if check_status:
> r.raise_for_status()
> LOG.debug("Read from %s (%s, %sb) after %s attempts", url,
> diff --git a/tools/ds-identify b/tools/ds-identify
> index 7c8b144..221a286 100755
> --- a/tools/ds-identify
> +++ b/tools/ds-identify
> @@ -112,7 +112,7 @@ DI_DSNAME=""
> # be searched if there is no setting found in config.
> DI_DSLIST_DEFAULT="MAAS ConfigDrive NoCloud AltCloud Azure Bigstep \
> CloudSigma CloudStack DigitalOcean AliYun Ec2 GCE OpenNebula OpenStack \
> -OVF SmartOS"
> +OVF SmartOS Scaleway"
> DI_DSLIST=""
add a 'dscheck_Scaleway' that implements the same basic logic as in the
python 'on_scaleway'.
I think this is a (untested) reasonable guess:
dscheck_Scaleway() {
if [ "$DI_DMI_SYS_VENDOR" = "Scaleway" ];
return $DS_FOUND
fi
case " $DI_KERNEL_CMDLINE " in
*\ scaleway\ *) return $DS_FOUND;;
esac
if [ -f /var/run/scaleway ]; then
return $DS_FOUND
fi
return $DS_NOT_FOUND
}
There are unit tests for this in tests/unittests/test_ds_identify.py
I think you'll be able to copy other tests and adjust.
> DI_MODE=""
> DI_ON_FOUND=""
--
https://code.launchpad.net/~jcastets/cloud-init/+git/cloud-init/+merge/325740
Your team cloud-init commiters is requested to review the proposed merge of ~jcastets/cloud-init:scaleway-datasource into cloud-init:master.
References