cloud-init-dev team mailing list archive

Thread
Date
Re: [Merge] ~smoser/cloud-init:feature/oracle-datasource into cloud-init:master

To: Scott Moser <ssmoser2+ubuntu@xxxxxxxxx>
From: Ryan Harper <ryan.harper@xxxxxxxxxxxxx>
Date: Tue, 14 Aug 2018 16:27:32 -0000
In-reply-to: <153393453114.26437.15597806329598750716.codereview@gac.canonical.com>
Reply-to: mp+352921@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx
Generally looks good to go.  A couple questions in line.
How do you see the upgrade from previous DS to the new DS is going to work?

Do we have a unittest that would cover the transition of one DS to another?

Diff comments:

> diff --git a/cloudinit/sources/DataSourceOracle.py b/cloudinit/sources/DataSourceOracle.py
> new file mode 100644
> index 0000000..34dbb6b
> --- /dev/null
> +++ b/cloudinit/sources/DataSourceOracle.py
> @@ -0,0 +1,202 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +"""Datasource for Oracle (OCI/Oracle Cloud Infrastructure)
> +
> +OCI provdes a OpenStack like metadata service which provides only
> +'2013-10-17' and 'latest' versions..
> +
> +Notes:
> + * This datasource does not support the OCI-Classic. OCI-Classic
> +   provides an EC2 lookalike metadata service.
> + * The uuid provided in DMI data is not the same as the meta-data provided
> +   instance-id, but has an equivalent lifespan.
> + * We do need to support upgrade from an instance that cloud-init
> +   identified as OpenStack.
> + * Both bare-metal and vms use iscsi root
> + * Both bare-metal and vms provide chassis-asset-tag of OracleCloud.com
> +"""
> +
> +from cloudinit.url_helper import combine_url, readurl, UrlError
> +from cloudinit import sources
> +from cloudinit import util
> +from cloudinit.net import cmdline
> +from cloudinit import log as logging
> +
> +import json
> +import re
> +
> +LOG = logging.getLogger(__name__)
> +
> +CHASSIS_ASSET_TAG = "OracleCloud.com"
> +METADATA_ENDPOINT = "http://169.254.169.254/openstack/2013-10-17/";
> +
> +
> +class DataSourceOracle(sources.DataSource):
> +
> +    dsname = 'Oracle'
> +    system_uuid = None
> +    vendordata_pure = None
> +    _network_config = None
> +
> +    def _is_platform_viable(self):
> +        """Check platform environment to report if this datasource may run."""
> +        return _is_platform_viable()
> +
> +    def _get_data(self):
> +        if not self._is_platform_viable():
> +            return False
> +
> +        data = self.crawl_metadata()
> +        self._crawled_metadata = data
> +
> +        self.userdata_raw = data.get('user_data')
> +        self.system_uuid = data['system_uuid']
> +
> +        vd = data.get('vendor_data')
> +        if vd:
> +            self.vendordata_pure = vd
> +            try:
> +                self.vendordata_raw = sources.convert_vendordata(vd)
> +            except ValueError as e:
> +                LOG.warning("Invalid content in vendor-data: %s", e)
> +                self.vendordata_raw = None

Does it make sense to preserve the existing vendordata_raw in case of parse error? Or do we always to the state to match the state given the most recent call to _get_data()?

> +
> +        mdcopies = ('public_keys',)
> +        md = dict([(k, data['meta_data'].get(k))
> +                   for k in mdcopies if k in data['meta_data']])
> +
> +        mdtrans = (
> +            ('availability_zone', 'availability-zone'),
> +            ('hostname', 'local-hostname'),
> +            ('launch_index', 'launch-index'),
> +            ('uuid', 'instance-id'),

Is this mapping to the common instance metadata fields?  Is there a defined list in say DataSource base class?

> +        )
> +        for dsname, ciname in mdtrans:
> +            if dsname in data['meta_data']:
> +                md[ciname] = data['meta_data'][dsname]
> +
> +        self.metadata = md
> +        return True
> +
> +    def crawl_metadata(self):
> +        return read_metadata()
> +
> +    def check_instance_id(self, sys_cfg):
> +        """quickly check (local only) if self.instance_id is still valid
> +
> +        On Oracle, the dmi-provided system uuid differs from the instance-id
> +        but has the same life-span."""
> +        return sources.instance_id_matches_system_uuid(self.system_uuid)
> +
> +    def get_public_ssh_keys(self):
> +        return sources.normalize_pubkey_data(self.metadata.get('public_keys'))
> +
> +    @property
> +    def network_config(self):
> +        """Network config is read from initramfs provides files."""
> +        if self._network_config == sources.UNSET:
> +            self._network_config = cmdline.read_kernel_cmdline_config()
> +        return self._network_config
> +
> +
> +def _read_system_uuid():
> +    sys_uuid = util.read_dmi_data('system-uuid')
> +    return None if sys_uuid is None else sys_uuid.lower()
> +
> +
> +def _is_platform_viable():
> +    asset_tag = util.read_dmi_data('chassis-asset-tag')
> +    return asset_tag == CHASSIS_ASSET_TAG
> +
> +
> +def _load_index(content):
> +    """Return a list entries parsed from content.

Should this be added to cloudinit/url_helper.py ?

> +
> +    OpenStack's metadata service returns a newline delimited list
> +    of items.  Oracle's implementation has html formatted list of links.
> +    The parser here just grabs targets from <a href="target">
> +    and throws away "../".
> +
> +    Oracle has accepted that to be buggy and may fix in the future
> +    to instead return a '\n' delimited plain text list.  This function
> +    will continue to work if that change is made."""
> +    if not content.lower().startswith("<html>"):
> +        return content.splitlines()
> +    items = re.findall(
> +        r'href="(?P<target>[^"]*)"', content, re.MULTILINE | re.IGNORECASE)
> +    return [i for i in items if not i.startswith(".")]
> +
> +
> +def read_metadata(endpoint=METADATA_ENDPOINT, sys_uuid=None):
> +    """Read metadata, return a dictionary.
> +
> +    Each path listed in the index will be represented in the dictionary.
> +    If the path ends in .json, then the content will be decoded and
> +    populated into the dictionary.
> +
> +    The system uuid (/sys/class/dmi/id/product_uuid) is also populated.
> +    Example: paths = ('user_data', 'meta_data.json')
> +    return {'user_data': b'blob', 'meta_data': json.loads(blob.decode())

In this diff, this looked like code not comment. Maybe returns? or put the """ on the line below?

> +            'system_uuid': '3b54f2e0-3ab2-458d-b770-af9926eee3b2'}"""
> +    if sys_uuid is None:
> +        sys_uuid = _read_system_uuid()
> +    if not sys_uuid:
> +        raise sources.BrokenMetadata("Failed to read system uuid.")
> +
> +    try:
> +        resp = readurl(endpoint)
> +        if not resp.ok():
> +            raise sources.BrokenMetadata(
> +                "Bad response from %s: %s" % (endpoint, resp.code))
> +    except UrlError as e:
> +        raise sources.BrokenMetadata(
> +            "Failed to read index at %s: %s" % (endpoint, e))
> +
> +    entries = _load_index(resp.contents.decode('utf-8'))
> +    LOG.debug("index url %s contained: %s", endpoint, entries)
> +
> +    # meta_data.json is required.
> +    mdj = 'meta_data.json'
> +    if mdj not in entries:
> +        raise sources.BrokenMetadata(
> +            "Required field '%s' missing in index at %s" % (mdj, endpoint))
> +
> +    ret = {'system_uuid': sys_uuid}
> +    for path in entries:
> +        response = readurl(combine_url(endpoint, path))
> +        if path.endswith(".json"):
> +            ret[path.rpartition(".")[0]] = (
> +                json.loads(response.contents.decode('utf-8')))
> +        else:
> +            ret[path] = response.contents
> +
> +    return ret
> +
> +
> +# Used to match classes to dependencies
> +datasources = [
> +    (DataSourceOracle, (sources.DEP_FILESYSTEM,)),
> +]
> +
> +
> +# Return a list of data sources that match this set of dependencies
> +def get_datasource_list(depends):
> +    return sources.list_from_depends(depends, datasources)
> +
> +
> +if __name__ == "__main__":
> +    import argparse
> +    import os
> +
> +    parser = argparse.ArgumentParser(description='Query Oracle Cloud Metadata')
> +    args = parser.parse_args()
> +    parser.add_argument("--endpoint", metavar="URL",
> +                        help="The url of the metadata service.",
> +                        default=METADATA_ENDPOINT)
> +    args = parser.parse_args()
> +    sys_uuid = "uuid-not-available-not-root" if os.geteuid() != 0 else None
> +
> +    data = read_metadata(endpoint=METADATA_ENDPOINT, sys_uuid=sys_uuid)
> +    data['is_platform_viable'] = _is_platform_viable()
> +    print(util.json_dumps(data))
> +
> +# vi: ts=4 expandtab
> diff --git a/cloudinit/sources/tests/test_oracle.py b/cloudinit/sources/tests/test_oracle.py
> new file mode 100644
> index 0000000..c33f93e
> --- /dev/null
> +++ b/cloudinit/sources/tests/test_oracle.py
> @@ -0,0 +1,206 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +from cloudinit.sources import DataSourceOracle as oracle
> +from cloudinit.sources import BrokenMetadata
> +from cloudinit import helpers
> +
> +from cloudinit.tests import helpers as test_helpers
> +
> +from textwrap import dedent
> +import httpretty
> +import mock
> +import os
> +import uuid
> +
> +DS_PATH = "cloudinit.sources.DataSourceOracle"
> +
> +
> +class TestDataSourceOracle(test_helpers.CiTestCase):
> +    """Test the datasource."""
> +
> +    ds_class = oracle.DataSourceOracle
> +
> +    my_uuid = str(uuid.uuid4())

is it useful to generate a different UUID for each run or can we just use a static value?

> +    my_md = {"uuid": "ocid1.instance.oc1.phx.abyhqlj",
> +             "name": "ci-vm1", "availability_zone": "phx-ad-3",
> +             "hostname": "ci-vm1hostname",
> +             "launch_index": 0, "files": [],
> +             "public_keys": {"0": "ssh-rsa AAAAB3N...== user@host"},
> +             "meta": {}}
> +
> +    def _get_ds(self, sys_cfg=None, distro=None, paths=None, ud_proc=None,
> +                patches=None):
> +        if sys_cfg is None:
> +            sys_cfg = {}
> +        if patches is None:
> +            patches = {}
> +        if paths is None:
> +            tmpd = self.tmp_dir()
> +            dirs = {'cloud_dir': self.tmp_path('cloud_dir', tmpd),
> +                    'run_dir': self.tmp_path('run_dir')}
> +            for d in dirs.values():
> +                os.mkdir(d)
> +            paths = helpers.Paths(dirs)
> +
> +        ds = self.ds_class(sys_cfg=sys_cfg, distro=distro,
> +                           paths=paths, ud_proc=ud_proc)
> +
> +        # do not bother with cleanup as instance is somewhat transient.
> +        for name, mmock in patches.items():
> +            if not hasattr(ds, name):
> +                raise RuntimeError(
> +                    "Datasource %s does not have attr %s" % (ds, name))
> +            setattr(ds, name, mmock)
> +        return ds
> +
> +    def test_platform_not_viable_returns_false(self):
> +        patches = {
> +            '_is_platform_viable': mock.MagicMock(return_value=False)}
> +        ds = self._get_ds(patches=patches)
> +        self.assertFalse(ds._get_data())
> +        patches['_is_platform_viable'].asssert_called_once_with(ds)
> +
> +    def test_without_userdata(self):
> +        patches = {
> +            '_is_platform_viable': mock.MagicMock(return_value=True),
> +            'crawl_metadata': mock.MagicMock(
> +                return_value={'system_uuid': self.my_uuid,
> +                              'meta_data': self.my_md})}
> +        ds = self._get_ds(patches=patches)
> +        patches['_is_platform_viable'].asssert_called_once_with(ds)
> +        patches['crawl_metadata'].asssert_called_once_with(ds)
> +        self.assertTrue(ds._get_data())
> +        self.assertEqual(self.my_uuid, ds.system_uuid)
> +        self.assertEqual(self.my_md['availability_zone'], ds.availability_zone)
> +        self.assertIn(self.my_md["public_keys"]["0"], ds.get_public_ssh_keys())
> +        self.assertEqual(self.my_md['uuid'], ds.get_instance_id())
> +        self.assertIsNone(ds.userdata_raw)
> +
> +    def test_with_userdata(self):
> +        my_userdata = b'abcdefg'
> +        patches = {
> +            '_is_platform_viable': mock.MagicMock(return_value=True),
> +            'crawl_metadata': mock.MagicMock(
> +                return_value={'system_uuid': self.my_uuid,
> +                              'meta_data': self.my_md,
> +                              'user_data': my_userdata})}
> +        ds = self._get_ds(patches=patches)
> +        patches['_is_platform_viable'].asssert_called_once_with(ds)
> +        patches['crawl_metadata'].asssert_called_once_with(ds)
> +        self.assertTrue(ds._get_data())
> +        self.assertEqual(self.my_uuid, ds.system_uuid)
> +        self.assertIn(self.my_md["public_keys"]["0"], ds.get_public_ssh_keys())
> +        self.assertEqual(self.my_md['uuid'], ds.get_instance_id())
> +        self.assertEqual(my_userdata, ds.userdata_raw)
> +
> +
> +@mock.patch(DS_PATH + "._read_system_uuid", return_value=str(uuid.uuid4()))
> +class TestReadMetaData(test_helpers.HttprettyTestCase):
> +    """Test the read_metadata which interacts with http metadata service."""
> +
> +    mdurl = oracle.METADATA_ENDPOINT
> +
> +    def test_broken_no_sys_uuid(self, m_read_system_uuid):
> +        """Datasource requires ability to read system_uuid and true return."""
> +        m_read_system_uuid.return_value = None
> +        self.assertRaises(BrokenMetadata, oracle.read_metadata)
> +
> +    def test_broken_no_metadata_json(self, m_read_system_uuid):
> +        """Datasource requires meta_data.json."""
> +        httpretty.register_uri(
> +            httpretty.GET, self.mdurl, '\n'.join(['user_data']))
> +        with self.assertRaises(BrokenMetadata) as cm:
> +            oracle.read_metadata()
> +        self.assertIn("Required field 'meta_data.json' missing",
> +                      str(cm.exception))
> +
> +
> +class TestIsPlatformViable(test_helpers.CiTestCase):
> +    @mock.patch(DS_PATH + ".util.read_dmi_data",
> +                return_value=oracle.CHASSIS_ASSET_TAG)
> +    def test_expected_viable(self, m_read_dmi_data):
> +        """System with known chassis tag is viable."""
> +        self.assertTrue(oracle._is_platform_viable())
> +        m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')])
> +
> +    @mock.patch(DS_PATH + ".util.read_dmi_data", return_value=None)
> +    def test_expected_not_viable_dmi_data_none(self, m_read_dmi_data):
> +        """System without known chassis tag is not viable."""
> +        self.assertFalse(oracle._is_platform_viable())
> +        m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')])
> +
> +    @mock.patch(DS_PATH + ".util.read_dmi_data", return_value="LetsGoCubs")
> +    def test_expected_not_viable_other(self, m_read_dmi_data):
> +        """System with unnown chassis tag is not viable."""
> +        self.assertFalse(oracle._is_platform_viable())
> +        m_read_dmi_data.assert_has_calls([mock.call('chassis-asset-tag')])
> +
> +
> +class TestLoadIndex(test_helpers.CiTestCase):
> +    """_load_index handles parsing of an index into a proper list.
> +    The tests here guarantee correct parsing of html version or
> +    a fixed version.  See the function docstring for more doc."""
> +
> +    _known_html_api_versions = dedent("""\
> +        <html>
> +        <head><title>Index of /openstack/</title></head>
> +        <body bgcolor="white">
> +        <h1>Index of /openstack/</h1><hr><pre><a href="../">../</a>
> +        <a href="2013-10-17/">2013-10-17/</a>   27-Jun-2018 12:22  -
> +        <a href="latest/">latest/</a>           27-Jun-2018 12:22  -
> +        </pre><hr></body>
> +        </html>""")
> +
> +    _known_html_contents = dedent("""\
> +        <html>
> +        <head><title>Index of /openstack/2013-10-17/</title></head>
> +        <body bgcolor="white">
> +        <h1>Index of /openstack/2013-10-17/</h1><hr><pre><a href="../">../</a>
> +        <a href="meta_data.json">meta_data.json</a>  27-Jun-2018 12:22  679
> +        <a href="user_data">user_data</a>            27-Jun-2018 12:22  146
> +        </pre><hr></body>
> +        </html>""")
> +
> +    def test_parse_html(self):
> +        """Test parsing of lower case html."""
> +        self.assertEqual(
> +            ['2013-10-17/', 'latest/'],
> +            oracle._load_index(self._known_html_api_versions))
> +        self.assertEqual(
> +            ['meta_data.json', 'user_data'],
> +            oracle._load_index(self._known_html_contents))
> +
> +    def test_parse_html_upper(self):
> +        """Test parsing of upper case html, although known content is lower."""
> +        def _toupper(data):
> +            return data.replace("<a", "<A").replace("html>", "HTML>")
> +
> +        self.assertEqual(
> +            ['2013-10-17/', 'latest/'],
> +            oracle._load_index(_toupper(self._known_html_api_versions)))
> +        self.assertEqual(
> +            ['meta_data.json', 'user_data'],
> +            oracle._load_index(_toupper(self._known_html_contents)))
> +
> +    def test_parse_newline_list_with_endl(self):
> +        """Test parsing of newline separated list with ending newline."""
> +        self.assertEqual(
> +            ['2013-10-17/', 'latest/'],
> +            oracle._load_index("\n".join(["2013-10-17/", "latest/", ""])))
> +        self.assertEqual(
> +            ['meta_data.json', 'user_data'],
> +            oracle._load_index("\n".join(["meta_data.json", "user_data", ""])))
> +
> +    def test_parse_newline_list_without_endl(self):
> +        """Test parsing of newline separated list with no ending newline.
> +
> +        Actual openstack implementation does not include trailing newline."""
> +        self.assertEqual(
> +            ['2013-10-17/', 'latest/'],
> +            oracle._load_index("\n".join(["2013-10-17/", "latest/"])))
> +        self.assertEqual(
> +            ['meta_data.json', 'user_data'],
> +            oracle._load_index("\n".join(["meta_data.json", "user_data"])))
> +
> +
> +# vi: ts=4 expandtab


-- 
https://code.launchpad.net/~smoser/cloud-init/+git/cloud-init/+merge/352921
Your team cloud-init commiters is requested to review the proposed merge of ~smoser/cloud-init:feature/oracle-datasource into cloud-init:master.
References

Re: [Merge] ~smoser/cloud-init:feature/oracle-datasource into cloud-init:master
From: Scott Moser, 2018-08-10