cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #02728
Re: [Merge] ~jcastets/cloud-init:scaleway-datasource into cloud-init:master
Some things, and some content inline:
* We'll need some unit tests for this. Otherwise it is at increased risk of being inadvertently broken.
* We will need to add knowledge of the datasource to tools/ds-identify (without that your datasource will only ever be considered if it is a single entry in the configured list)
* we really, *REALLY* want a positive non-network identification. Without such a thing, we can't enable the datasource by default, meaning Ubuntu or other images that would work elsewhere wont work on your platform.
Also, give a nicer commit message:
Summary
<blank line>
More information
...
<blank line>
LP: #XXXXXXX
Diff comments:
> diff --git a/cloudinit/sources/DataSourceScaleway.py b/cloudinit/sources/DataSourceScaleway.py
> new file mode 100644
> index 0000000..7ac1b1a
> --- /dev/null
> +++ b/cloudinit/sources/DataSourceScaleway.py
> @@ -0,0 +1,264 @@
> +# vi: ts=4 expandtab
this goes at the bottom.
> +#
> +# Author: Julien Castets <castets.j@xxxxxxxxx>
please copy other headers.
If you want to put 'Author' thats fine, but then after that a blank line single line license info.
so, yours might look like:
# Author: Julien Castets <castets.j@xxxxxxxxx>
#
# This file is part of cloud-init. See LICENSE file for license information.
> +#
> +# This program is free software: you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License version 3, as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program. If not, see <http://www.gnu.org/licenses/>.
> +
> +import json
> +import socket
> +import time
> +
> +import requests
> +
> +# pylint fails to import the two modules below.
> +# pylint: disable=E0401
> +from requests.packages.urllib3.connection import HTTPConnection
> +from requests.packages.urllib3.poolmanager import PoolManager
> +
> +from cloudinit import log as logging
> +from cloudinit import sources
> +from cloudinit import url_helper
> +from cloudinit import util
> +
> +
> +LOG = logging.getLogger(__name__)
> +
> +BUILTIN_DS_CONFIG = {
> + 'metadata_url': 'http://169.254.42.42/conf?format=json',
> + 'userdata_url': 'http://169.254.42.42/user_data/cloud-init',
> + 'vendordata_url': 'http://169.254.42.42/vendor_data/cloud-init'
> +}
> +
> +DEF_MD_RETRIES = 5
> +DEF_MD_TIMEOUT = 10
> +
> +
> +def on_scaleway(user_data_url, retries):
> + """
> + Check if we are on Scaleway.
> +
> + The only backward-compatible way to check if you are on a Scaleway instance
> + is to query the metadata API. For security reasons, the special endpoint
> + /user_data/ can only be accessed from a privileged TCP source port (ie.
> + below 1024), otherwise the API returns a HTTP/403.
> +
> + In other words, on Scaleway:
> +
> + #> curl http://169.254.42.42/user_data
> + [...] 403 error
> +
> + #> curl --local-port 1-1024 http://169.254.42.42/user_data
> + [...] 200 OK
> +
> + This function queries the endpoint /user_data/ and returns True if a
> + HTTP/403 is returned.
> + """
> + tries = max(int(retries) + 1, 1)
> + for ntry in range(tries):
> + try:
> + code = requests.head(user_data_url).status_code
> + if code == 403:
> + return True
> +
> + # If we are rate limited or if there is a server error, we might
> + # not be talking to the Scaleway metadata API and we need to try
> + # again.
> + if code != 429 and code < 500:
> + return False
> +
> + # Couldn't query the API.
> + except (requests.exceptions.ConnectionError,
> + requests.exceptions.Timeout):
> + return False
> +
> + # Be nice, and wait a bit before retrying.
> + time.sleep(5)
> +
> + return False
> +
> +
> +class SourceAddressAdapter(requests.adapters.HTTPAdapter):
> + """
> + Adapter for requests to choose the local address to bind to.
> + """
> +
> + def __init__(self, source_address, **kwargs):
> + self.source_address = source_address
> + super(SourceAddressAdapter, self).__init__(**kwargs)
> +
> + def init_poolmanager(self, connections, maxsize, block=False):
> + socket_options = HTTPConnection.default_socket_options + [
> + (socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
> + ]
> + self.poolmanager = PoolManager(num_pools=connections,
> + maxsize=maxsize,
> + block=block,
> + source_address=self.source_address,
> + socket_options=socket_options)
> +
> +
> +def _get_type_data(typedata_address, timeout, requests_session):
> + """
> + Retrieve user data or vendor data.
> +
> + Scaleway userdata/vendordata API returns HTTP/404 if user/vendor data is
> + not set.
> +
> + This function calls `url_helper.readurl` but instead of considering
> + HTTP/404 as an error that requires a retry, it considers it as empty
> + user/vendor data.
> +
> + Also, be aware the user data/vendor API requires the source port to be
> + below 1024. If requests raises ConnectionError (EADDRINUSE), the caller
> + should retry to call this function on an other port.
> + """
> + try:
> + resp = url_helper.readurl(
> + typedata_address,
> + data=None,
> + timeout=timeout,
> + # It's the caller's responsability to recall this function in case
> + # of exception. Don't let url_helper.readurl() retry by itself.
> + retries=0,
> + session=requests_session,
> + # If the error is a HTTP/404 or a ConnectionError, go into raise
> + # block below.
> + exception_cb=lambda _, exc: exc.code == 404 or (
> + isinstance(exc.cause, requests.exceptions.ConnectionError) and
> + 'Address already in use' in exc.message
> + )
> + )
> + return util.decode_binary(resp.contents)
> + except url_helper.UrlError as exc:
> + # Empty user data.
> + if exc.code == 404:
> + return None
> + raise
> +
> +
> +class DataSourceScaleway(sources.DataSource):
> +
> + def __init__(self, sys_cfg, distro, paths):
> + LOG.debug('Init Scaleway')
> +
> + sources.DataSource.__init__(self, sys_cfg, distro, paths)
> +
> + self.metadata = {}
> + self.ds_cfg = util.mergemanydict([
> + util.get_cfg_by_path(sys_cfg, ["datasource", "Scaleway"], {}),
> + BUILTIN_DS_CONFIG
> + ])
> +
> + self.metadata_address = self.ds_cfg['metadata_url']
> + self.userdata_address = self.ds_cfg['userdata_url']
> + self.vendordata_address = self.ds_cfg['vendordata_url']
> +
> + self.retries = self.ds_cfg.get('retries', DEF_MD_RETRIES)
> + self.timeout = self.ds_cfg.get('timeout', DEF_MD_TIMEOUT)
> +
> + def _get_privileged_type(self, type_):
> + assert type_ in ('user', 'vendor')
> +
> + type_address = self.userdata_address \
> + if type_ == 'user' else self.vendordata_address
> +
> + # Query user/vendor-data. Try to make a request on the first privileged
> + # port available.
Wonder why? The other end cares about the source port?
> + for port in range(1, max(int(self.retries), 2)):
> + try:
> + LOG.debug(
> + 'Trying to get %s data (bind on port %d)...', type_, port
> + )
> + requests_session = requests.Session()
> + requests_session.mount(
> + 'http://',
> + SourceAddressAdapter(source_address=('0.0.0.0', port))
> + )
> + data = _get_type_data(
> + type_address,
> + timeout=self.timeout,
> + requests_session=requests_session
> + )
> + LOG.debug('%s-data downloaded', type_)
> + return data
> +
> + except url_helper.UrlError as exc:
> + # Local port already in use or HTTP/429.
> + time.sleep(5)
> + last_exc = exc
> + continue
> +
> + # Max number of retries reached.
> + raise last_exc
> +
> + def _get_metadata(self):
> + resp = url_helper.readurl(self.metadata_address,
> + timeout=self.timeout,
> + retries=self.retries)
> +
> + metadata = json.loads(util.decode_binary(resp.contents))
> +
> + metadata['user-data'] = self._get_privileged_type('user')
> + metadata['vendor-data'] = self._get_privileged_type('vendor')
> +
> + return metadata
> +
> + def get_data(self):
> + if on_scaleway(self.ds_cfg['userdata_url'], self.retries) is False:
> + return False
> +
> + metadata = self._get_metadata()
> +
> + self.metadata = {
> + 'id': metadata['id'],
> + 'hostname': metadata['hostname'],
> + 'user-data': metadata['user-data'],
> + 'vendor-data': metadata['vendor-data'],
> + 'ssh_public_keys': [
> + key['key'] for key in metadata['ssh_public_keys']
> + ]
> + }
> + return True
> +
> + @property
> + def launch_index(self):
> + return None
> +
> + def get_instance_id(self):
> + return self.metadata['id']
> +
> + def get_public_ssh_keys(self):
> + return self.metadata['ssh_public_keys']
> +
> + def get_hostname(self, fqdn=False, resolve_ip=False):
> + return self.metadata['hostname']
> +
> + def get_userdata_raw(self):
> + return self.metadata['user-data']
> +
> + @property
> + def availability_zone(self):
> + return None
> +
> + @property
> + def region(self):
> + return None
> +
> +
> +datasources = [
> + (DataSourceScaleway, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)),
> +]
> +
> +
> +def get_datasource_list(depends):
> + return sources.list_from_depends(depends, datasources)
--
https://code.launchpad.net/~jcastets/cloud-init/+git/cloud-init/+merge/325740
Your team cloud-init commiters is requested to review the proposed merge of ~jcastets/cloud-init:scaleway-datasource into cloud-init:master.
References