← Back to team overview

cloud-init-dev team mailing list archive

Re: [Merge] ~jcastets/cloud-init:scaleway-datasource into cloud-init:master

 

Some things, and some content inline:
 * We'll need some unit tests for this.  Otherwise it is at increased risk of being inadvertently broken.
 * We will need to add knowledge of the datasource to tools/ds-identify (without that your datasource will only ever be considered if it is a single entry in the configured list)
 * we really, *REALLY* want a positive non-network identification.  Without such a thing, we can't enable the datasource by default, meaning Ubuntu or other images that would work elsewhere wont work on your platform.


Also, give a nicer commit message:
  Summary
  <blank line>
  More information
  ...
  <blank line>
  LP: #XXXXXXX


Diff comments:

> diff --git a/cloudinit/sources/DataSourceScaleway.py b/cloudinit/sources/DataSourceScaleway.py
> new file mode 100644
> index 0000000..7ac1b1a
> --- /dev/null
> +++ b/cloudinit/sources/DataSourceScaleway.py
> @@ -0,0 +1,264 @@
> +# vi: ts=4 expandtab

this goes at the bottom.

> +#
> +#    Author: Julien Castets <castets.j@xxxxxxxxx>

please copy other headers.
If you want to put 'Author' thats fine, but then after that a blank line single line license info.

so, yours might look like:
# Author: Julien Castets <castets.j@xxxxxxxxx>
#
# This file is part of cloud-init. See LICENSE file for license information.

> +#
> +#    This program is free software: you can redistribute it and/or modify
> +#    it under the terms of the GNU General Public License version 3, as
> +#    published by the Free Software Foundation.
> +#
> +#    This program is distributed in the hope that it will be useful,
> +#    but WITHOUT ANY WARRANTY; without even the implied warranty of
> +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +#    GNU General Public License for more details.
> +#
> +#    You should have received a copy of the GNU General Public License
> +#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
> +
> +import json
> +import socket
> +import time
> +
> +import requests
> +
> +# pylint fails to import the two modules below.
> +# pylint: disable=E0401
> +from requests.packages.urllib3.connection import HTTPConnection
> +from requests.packages.urllib3.poolmanager import PoolManager
> +
> +from cloudinit import log as logging
> +from cloudinit import sources
> +from cloudinit import url_helper
> +from cloudinit import util
> +
> +
> +LOG = logging.getLogger(__name__)
> +
> +BUILTIN_DS_CONFIG = {
> +    'metadata_url': 'http://169.254.42.42/conf?format=json',
> +    'userdata_url': 'http://169.254.42.42/user_data/cloud-init',
> +    'vendordata_url': 'http://169.254.42.42/vendor_data/cloud-init'
> +}
> +
> +DEF_MD_RETRIES = 5
> +DEF_MD_TIMEOUT = 10
> +
> +
> +def on_scaleway(user_data_url, retries):
> +    """
> +    Check if we are on Scaleway.
> +
> +    The only backward-compatible way to check if you are on a Scaleway instance
> +    is to query the metadata API. For security reasons, the special endpoint
> +    /user_data/ can only be accessed from a privileged TCP source port (ie.
> +    below 1024), otherwise the API returns a HTTP/403.
> +
> +    In other words, on Scaleway:
> +
> +    #> curl http://169.254.42.42/user_data
> +    [...] 403 error
> +
> +    #> curl --local-port 1-1024 http://169.254.42.42/user_data
> +    [...] 200 OK
> +
> +    This function queries the endpoint /user_data/ and returns True if a
> +    HTTP/403 is returned.
> +    """
> +    tries = max(int(retries) + 1, 1)
> +    for ntry in range(tries):
> +        try:
> +            code = requests.head(user_data_url).status_code
> +            if code == 403:
> +                return True
> +
> +            # If we are rate limited or if there is a server error, we might
> +            # not be talking to the Scaleway metadata API and we need to try
> +            # again.
> +            if code != 429 and code < 500:
> +                return False
> +
> +        # Couldn't query the API.
> +        except (requests.exceptions.ConnectionError,
> +                requests.exceptions.Timeout):
> +            return False
> +
> +        # Be nice, and wait a bit before retrying.
> +        time.sleep(5)
> +
> +    return False
> +
> +
> +class SourceAddressAdapter(requests.adapters.HTTPAdapter):
> +    """
> +    Adapter for requests to choose the local address to bind to.
> +    """
> +
> +    def __init__(self, source_address, **kwargs):
> +        self.source_address = source_address
> +        super(SourceAddressAdapter, self).__init__(**kwargs)
> +
> +    def init_poolmanager(self, connections, maxsize, block=False):
> +        socket_options = HTTPConnection.default_socket_options + [
> +            (socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
> +        ]
> +        self.poolmanager = PoolManager(num_pools=connections,
> +                                       maxsize=maxsize,
> +                                       block=block,
> +                                       source_address=self.source_address,
> +                                       socket_options=socket_options)
> +
> +
> +def _get_type_data(typedata_address, timeout, requests_session):
> +    """
> +    Retrieve user data or vendor data.
> +
> +    Scaleway userdata/vendordata API returns HTTP/404 if user/vendor data is
> +    not set.
> +
> +    This function calls `url_helper.readurl` but instead of considering
> +    HTTP/404 as an error that requires a retry, it considers it as empty
> +    user/vendor data.
> +
> +    Also, be aware the user data/vendor API requires the source port to be
> +    below 1024. If requests raises ConnectionError (EADDRINUSE), the caller
> +    should retry to call this function on an other port.
> +    """
> +    try:
> +        resp = url_helper.readurl(
> +            typedata_address,
> +            data=None,
> +            timeout=timeout,
> +            # It's the caller's responsability to recall this function in case
> +            # of exception. Don't let url_helper.readurl() retry by itself.
> +            retries=0,
> +            session=requests_session,
> +            # If the error is a HTTP/404 or a ConnectionError, go into raise
> +            # block below.
> +            exception_cb=lambda _, exc: exc.code == 404 or (
> +                isinstance(exc.cause, requests.exceptions.ConnectionError) and
> +                'Address already in use' in exc.message
> +            )
> +        )
> +        return util.decode_binary(resp.contents)
> +    except url_helper.UrlError as exc:
> +        # Empty user data.
> +        if exc.code == 404:
> +            return None
> +        raise
> +
> +
> +class DataSourceScaleway(sources.DataSource):
> +
> +    def __init__(self, sys_cfg, distro, paths):
> +        LOG.debug('Init Scaleway')
> +
> +        sources.DataSource.__init__(self, sys_cfg, distro, paths)
> +
> +        self.metadata = {}
> +        self.ds_cfg = util.mergemanydict([
> +            util.get_cfg_by_path(sys_cfg, ["datasource", "Scaleway"], {}),
> +            BUILTIN_DS_CONFIG
> +        ])
> +
> +        self.metadata_address = self.ds_cfg['metadata_url']
> +        self.userdata_address = self.ds_cfg['userdata_url']
> +        self.vendordata_address = self.ds_cfg['vendordata_url']
> +
> +        self.retries = self.ds_cfg.get('retries', DEF_MD_RETRIES)
> +        self.timeout = self.ds_cfg.get('timeout', DEF_MD_TIMEOUT)
> +
> +    def _get_privileged_type(self, type_):
> +        assert type_ in ('user', 'vendor')
> +
> +        type_address = self.userdata_address \
> +            if type_ == 'user' else self.vendordata_address
> +
> +        # Query user/vendor-data. Try to make a request on the first privileged
> +        # port available.

Wonder why?  The other end cares about the source port?

> +        for port in range(1, max(int(self.retries), 2)):
> +            try:
> +                LOG.debug(
> +                    'Trying to get %s data (bind on port %d)...', type_, port
> +                )
> +                requests_session = requests.Session()
> +                requests_session.mount(
> +                    'http://',
> +                    SourceAddressAdapter(source_address=('0.0.0.0', port))
> +                )
> +                data = _get_type_data(
> +                    type_address,
> +                    timeout=self.timeout,
> +                    requests_session=requests_session
> +                )
> +                LOG.debug('%s-data downloaded', type_)
> +                return data
> +
> +            except url_helper.UrlError as exc:
> +                # Local port already in use or HTTP/429.
> +                time.sleep(5)
> +                last_exc = exc
> +                continue
> +
> +        # Max number of retries reached.
> +        raise last_exc
> +
> +    def _get_metadata(self):
> +        resp = url_helper.readurl(self.metadata_address,
> +                                  timeout=self.timeout,
> +                                  retries=self.retries)
> +
> +        metadata = json.loads(util.decode_binary(resp.contents))
> +
> +        metadata['user-data'] = self._get_privileged_type('user')
> +        metadata['vendor-data'] = self._get_privileged_type('vendor')
> +
> +        return metadata
> +
> +    def get_data(self):
> +        if on_scaleway(self.ds_cfg['userdata_url'], self.retries) is False:
> +            return False
> +
> +        metadata = self._get_metadata()
> +
> +        self.metadata = {
> +            'id': metadata['id'],
> +            'hostname': metadata['hostname'],
> +            'user-data': metadata['user-data'],
> +            'vendor-data': metadata['vendor-data'],
> +            'ssh_public_keys': [
> +                key['key'] for key in metadata['ssh_public_keys']
> +            ]
> +        }
> +        return True
> +
> +    @property
> +    def launch_index(self):
> +        return None
> +
> +    def get_instance_id(self):
> +        return self.metadata['id']
> +
> +    def get_public_ssh_keys(self):
> +        return self.metadata['ssh_public_keys']
> +
> +    def get_hostname(self, fqdn=False, resolve_ip=False):
> +        return self.metadata['hostname']
> +
> +    def get_userdata_raw(self):
> +        return self.metadata['user-data']
> +
> +    @property
> +    def availability_zone(self):
> +        return None
> +
> +    @property
> +    def region(self):
> +        return None
> +
> +
> +datasources = [
> +    (DataSourceScaleway, (sources.DEP_FILESYSTEM, sources.DEP_NETWORK)),
> +]
> +
> +
> +def get_datasource_list(depends):
> +    return sources.list_from_depends(depends, datasources)


-- 
https://code.launchpad.net/~jcastets/cloud-init/+git/cloud-init/+merge/325740
Your team cloud-init commiters is requested to review the proposed merge of ~jcastets/cloud-init:scaleway-datasource into cloud-init:master.


References