← Back to team overview

cloud-init-dev team mailing list archive

Re: [Merge] ~powersj/cloud-init:cii-enable-ec2 into cloud-init:master

 

Some in-line comments and questions.

Diff comments:

> diff --git a/tests/cloud_tests/platforms.yaml b/tests/cloud_tests/platforms.yaml
> index fa4f845..458ab81 100644
> --- a/tests/cloud_tests/platforms.yaml
> +++ b/tests/cloud_tests/platforms.yaml
> @@ -6,8 +6,13 @@ default_platform_config:
>      get_image_timeout: 300
>      # maximum time to create instance (before waiting for cloud-init)
>      create_instance_timeout: 60
> -
> +    private_key: id_rsa
> +    public_key: id_rsa.pub

We recently got a request to rename these to indicate they are throwaway of some sort.

>  platforms:
> +    ec2:
> +        enabled: true
> +        instance-type: t2.micro
> +        tag: cloud-init-testing
>      lxd:
>          enabled: true
>          # overrides for image templates
> diff --git a/tests/cloud_tests/platforms/ec2/image.py b/tests/cloud_tests/platforms/ec2/image.py
> new file mode 100644
> index 0000000..456dda2
> --- /dev/null
> +++ b/tests/cloud_tests/platforms/ec2/image.py
> @@ -0,0 +1,105 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +"""EC2 Image Base Class."""
> +
> +from ..images import Image
> +from .snapshot import EC2Snapshot
> +
> +
> +class EC2Image(Image):
> +    """EC2 backed image."""
> +
> +    platform_name = 'ec2'
> +
> +    def __init__(self, platform, config, image_ami):
> +        """Set up image.
> +
> +        @param platform: platform object
> +        @param config: image configuration
> +        @param image_ami: string of image ami ID
> +        """
> +        super(EC2Image, self).__init__(platform, config)
> +        self._img_instance = None
> +        self.image_ami = image_ami
> +        self.image_ami_edited = False
> +
> +    @property
> +    def _instance(self):
> +        """Internal use only, returns a running instance"""
> +        if not self._img_instance:
> +            self._img_instance = self.platform.create_instance(
> +                self.properties, self.config, self.features,
> +                self.image_ami, b'')

I'm not familiar with the signature of create_instance, what's the  b'' doing here?
maybe a local variable name and assign it the b'' value when passing?

> +            self._img_instance.start(wait=True, wait_for_cloud_init=True)
> +            self.modified = True
> +        return self._img_instance
> +
> +    @property
> +    def properties(self):
> +        """Dictionary containing: 'arch', 'os', 'version', 'release'."""
> +        return {
> +            'arch': self.config['arch'],
> +            'os': self.config['family'],
> +            'release': self.config['release'],
> +            'version': self.config['version'],
> +        }
> +
> +    def destroy(self):
> +        """Unset path to signal image is no longer used.
> +
> +        The removal of the images and all other items is handled by the
> +        framework. In some cases we want to keep the images, so let the
> +        framework decide whether to keep or destroy everything.
> +        """
> +        if self.image_ami_edited:
> +            self.platform.ec2_client.deregister_image(
> +                ImageId=self.image_ami_edited
> +            )
> +
> +        super(EC2Image, self).destroy()

I see that the edited image is deregistered, where is it destroyed?
And what about the base image, does that also need deregistered before being destroyed?

Also, in the case that the image is to be kept, what about the edited image? or are those always destroyed?

> +
> +    def _execute(self, *args, **kwargs):
> +        """Execute command in image, modifying image."""
> +        return self._instance._execute(*args, **kwargs)
> +
> +    def push_file(self, local_path, remote_path):
> +        """Copy file at 'local_path' to instance at 'remote_path'."""
> +        return self._instance.push_file(local_path, remote_path)
> +
> +    def run_script(self, *args, **kwargs):
> +        """Run script in image, modifying image.
> +
> +        @return_value: script output
> +        """
> +        return self._instance.run_script(*args, **kwargs)
> +
> +    def snapshot(self):
> +        """Create snapshot of image, block until done.
> +
> +        Will return base image_ami if no instance has been booted, otherwise
> +        will run the clean script, shutdown the instance, create a custom
> +        AMI, and use that AMI once available.
> +        """
> +        if not self._img_instance:
> +            return EC2Snapshot(self.platform, self.properties, self.config,
> +                               self.features, self.image_ami)
> +
> +        if self.config.get('boot_clean_script'):
> +            self._img_instance.run_script(self.config.get('boot_clean_script'))
> +
> +        self._img_instance.shutdown(wait=True)
> +
> +        name = '%s-%s-%s' % (
> +            self.platform.tag, self.image_ami, self.platform.uuid
> +        )
> +        response = self.platform.ec2_client.create_image(
> +            Name=name, InstanceId=self._img_instance.instance.instance_id
> +        )
> +        self.image_ami_edited = response['ImageId']
> +        image = self.platform.ec2_resource.Image(self.image_ami_edited)
> +        self.platform.wait_for_state(image, 'available')
> +
> +        return EC2Snapshot(self.platform, self.properties, self.config,
> +                           self.features, self.image_ami_edited)
> +
> +# vi: ts=4 expandtab
> diff --git a/tests/cloud_tests/platforms/ec2/instance.py b/tests/cloud_tests/platforms/ec2/instance.py
> new file mode 100644
> index 0000000..72bafee
> --- /dev/null
> +++ b/tests/cloud_tests/platforms/ec2/instance.py
> @@ -0,0 +1,105 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +"""Base EC2 instance."""
> +import os
> +
> +import botocore
> +
> +from ..instances import Instance
> +from tests.cloud_tests import util
> +
> +
> +class EC2Instance(Instance):
> +    """EC2 backed instance."""
> +
> +    platform_name = "ec2"
> +    _ssh_client = None
> +
> +    def __init__(self, platform, properties, config, features,
> +                 image_ami, user_data):
> +        """Set up instance.
> +
> +        @param platform: platform object
> +        @param properties: dictionary of properties
> +        @param config: dictionary of configuration values
> +        @param features: dictionary of supported feature flags
> +        @param image_ami: AWS AMI ID for image to use
> +        @param user_data: test user-data to pass to instance
> +        """
> +        super(EC2Instance, self).__init__(
> +            platform, image_ami, properties, config, features)
> +
> +        self.image_ami = image_ami
> +        self.instance = None
> +        self.user_data = user_data
> +        self.ssh_ip = None
> +        self.ssh_port = 22
> +        self.ssh_key_file = os.path.join(
> +            platform.config['data_dir'], platform.config['private_key'])
> +        self.ssh_pubkey_file = os.path.join(
> +            platform.config['data_dir'], platform.config['public_key'])
> +
> +    def destroy(self):
> +        """Clean up instance."""
> +        if self._ssh_client:
> +            self._ssh_client.close()
> +            self._ssh_client = None
> +
> +        if self.instance:
> +            self.instance.terminate()
> +
> +        super(EC2Instance, self).destroy()
> +
> +    def _execute(self, command, stdin=None, env=None):
> +        """Execute command on instance."""
> +        env_args = []
> +        if env:
> +            env_args = ['env'] + ["%s=%s" for k, v in env.items()]
> +
> +        return self._ssh(['sudo'] + env_args + list(command), stdin=stdin)
> +
> +    def start(self, wait=True, wait_for_cloud_init=False):
> +        """Start instance on EC2 with the platfrom's VPC."""
> +        name_tag = {
> +            'ResourceType': 'instance',
> +            'Tags': [
> +                {
> +                    'Key': 'Name',
> +                    'Value': self.platform.tag,
> +                },
> +            ]
> +        }
> +
> +        try:
> +            instances = self.platform.ec2_resource.create_instances(
> +                ImageId=self.image_ami,
> +                InstanceType=self.platform.instance_type,
> +                KeyName=self.platform.key_name,
> +                MaxCount=1,
> +                MinCount=1,
> +                SecurityGroupIds=[self.platform.vpc_security_group.id],
> +                SubnetId=self.platform.vpc_subnet.id,
> +                TagSpecifications=[name_tag],
> +                UserData=self.user_data,
> +            )
> +        except botocore.exceptions.ClientError as error:
> +            error_msg = error.response['Error']['Message']
> +            raise util.InTargetExecuteError(b'', b'', 1, '', 'start',

What are the throw-away values here?

> +                                            reason=error_msg)
> +
> +        self.instance = instances[0]
> +
> +        if wait:
> +            # Need to give AWS some time to process the launch
> +            self.platform.wait_for_state(self.instance, 'running')
> +            self.ssh_ip = self.instance.public_ip_address
> +            self._wait_for_system(wait_for_cloud_init)
> +
> +    def shutdown(self, wait=True):
> +        """Shutdown instance."""
> +        self.instance.stop()
> +
> +        if wait:
> +            self.platform.wait_for_state(self.instance, 'stopped')
> +
> +# vi: ts=4 expandtab
> diff --git a/tests/cloud_tests/platforms/ec2/platform.py b/tests/cloud_tests/platforms/ec2/platform.py
> new file mode 100644
> index 0000000..8ff2716
> --- /dev/null
> +++ b/tests/cloud_tests/platforms/ec2/platform.py
> @@ -0,0 +1,255 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +"""Base EC2 platform."""
> +import os
> +import time
> +import uuid
> +
> +import boto3
> +import botocore
> +
> +from ..platforms import Platform
> +from .image import EC2Image
> +from .instance import EC2Instance
> +from tests.cloud_tests import util
> +
> +
> +class EC2Platform(Platform):
> +    """EC2 test platform."""
> +
> +    platform_name = 'ec2'
> +    ipv4_cidr = '192.168.1.0/20'
> +
> +    def __init__(self, config):
> +        """Set up platform."""
> +        super(EC2Platform, self).__init__(config)
> +        # Used with SSH key and custom AMI generation naming with EC2
> +        self.uuid = str(uuid.uuid1())[0:8]

Is there a reason we're taking on the first part of the uuid?
Seems dubious to call it a uuid when it's short quite a bit of it's total value.

> +        self.tag = config['tag']
> +
> +        self.ec2_client = boto3.client('ec2')
> +        self.ec2_resource = boto3.resource('ec2')
> +        self.instance_type = config['instance-type']
> +        self.key_name = self._upload_public_key(config)
> +        self.vpc = self._setup_vpc()
> +        self.vpc_security_group = self._get_security_group()
> +        self.vpc_subnet = self._get_subnet()
> +
> +    def create_instance(self, properties, config, features,
> +                        image_ami, user_data):
> +        """Create an instance
> +
> +        @param src_img_path: image path to launch from
> +        @param properties: image properties
> +        @param config: image configuration
> +        @param features: image features
> +        @param image_ami: string of image ami ID
> +        @param user_data: test user-data to pass to instance
> +        @return_value: cloud_tests.instances instance
> +        """
> +        return EC2Instance(self, properties, config, features,
> +                           image_ami, user_data)
> +
> +    def destroy(self):
> +        """Delete platform."""
> +        if self.key_name:
> +            self.ec2_client.delete_key_pair(KeyName=self.key_name)
> +
> +    def get_image(self, img_conf):
> +        """Get image using specified image configuration.
> +
> +        @param img_conf: configuration for image
> +        @return_value: cloud_tests.images instance
> +        """
> +        if img_conf['root-store'] == 'ebs':
> +            image_type = 'hvm-ssd'
> +        elif img_conf['root-store'] == 'instance-store':
> +            image_type = 'hvm-instance'
> +        else:
> +            raise RuntimeError('Unknown root-store type: %s' %
> +                               (img_conf['root-store']))
> +
> +        image_filter = {
> +            'Name': 'name',
> +            'Values': ['ubuntu/images-testing/%s/ubuntu-%s-daily-%s-server-*'
> +                       % (image_type, img_conf['release'], img_conf['arch'])]
> +        }
> +        response = self.ec2_client.describe_images(Filters=[image_filter])
> +        images = sorted(response['Images'], key=lambda k: k['CreationDate'])
> +
> +        try:
> +            image_ami = images[-1]['ImageId']
> +        except IndexError:
> +            raise RuntimeError('No images found for %s!' % img_conf['release'])
> +
> +        image = EC2Image(self, img_conf, image_ami)
> +        return image
> +
> +    @staticmethod
> +    def wait_for_state(resource, goal_state):

it might make sense to allow the caller to specify the total time to wait;
this could allow the user configuration to specify an specific wait time that
can be applied generally.

> +        """Wait up to 5 minutes (5 * 60) for a specific state.
> +
> +        @param: resource to check with resource.state
> +        @param: string goal state to be in
> +        """
> +        attempts = 0
> +
> +        while attempts < 60:
> +            try:
> +                resource.reload()
> +                current_state = resource.state
> +                # Sometimes get {'Code': 0, 'Name': 'pending'}
> +                if isinstance(current_state, dict):
> +                    current_state = current_state['Name']
> +
> +                if current_state == goal_state:
> +                    return
> +            # will occur if instance is not up yet
> +            except botocore.exception.BotoSeverError:
> +                pass
> +
> +            time.sleep(5)
> +            attempts += 1
> +
> +        raise util.InTargetExecuteError(
> +            b'', b'', 1, goal_state, 'wait_for_state',
> +            reason='%s: took too long to get to %s' % (resource, goal_state)
> +        )
> +
> +    def _create_internet_gateway(self, vpc):
> +        """Create Internet Gateway and assign to VPC.
> +
> +        @param vpc: VPC to create internet gateway on
> +        """
> +        internet_gateway = self.ec2_resource.create_internet_gateway()
> +        internet_gateway.attach_to_vpc(VpcId=vpc.vpc_id)
> +        self._tag_resource(internet_gateway)
> +        return internet_gateway
> +
> +    def _create_subnet(self, vpc):
> +        """Generate IPv4 and IPv6 subnets for use.
> +
> +        @param vpc: VPC to create subnets on
> +        """
> +        ipv6_block = vpc.ipv6_cidr_block_association_set[0]['Ipv6CidrBlock']
> +        ipv6_cidr = ipv6_block[:-2] + '64'
> +
> +        subnet = vpc.create_subnet(CidrBlock=self.ipv4_cidr,
> +                                   Ipv6CidrBlock=ipv6_cidr)
> +        modify_subnet = subnet.meta.client.modify_subnet_attribute
> +        modify_subnet(SubnetId=subnet.id,
> +                      MapPublicIpOnLaunch={'Value': True})
> +        self._tag_resource(subnet)
> +
> +    def _get_security_group(self):
> +        """Return default security group from VPC.
> +
> +        For now there should only be one.
> +
> +        @return_value: Return first security group object
> +        """
> +        return list(self.vpc.security_groups.all())[0]

use pop() to avoid rendering the entire list

> +
> +    def _get_subnet(self):
> +        """Return first subnet from VPC.
> +
> +        For now there should only be one.
> +
> +        @return_value: Return subnet object
> +        """
> +        return list(self.vpc.subnets.all())[0]

pop()

> +
> +    def _setup_vpc(self):
> +        """Setup AWS EC2 VPC or return existing VPC."""
> +        for vpc in self.ec2_resource.vpcs.all():
> +            if not vpc.tags:
> +                continue
> +            for tag in vpc.tags:
> +                if tag['Value'] == self.tag:
> +                    return vpc

existing = [vpc for vpc in self.ec2_resource.vpcs.all() if self.tag in vpc.tags]
if existing:
   return existing.pop()

> +
> +        vpc = self.ec2_resource.create_vpc(
> +            CidrBlock=self.ipv4_cidr,
> +            AmazonProvidedIpv6CidrBlock=True
> +        )
> +        vpc.wait_until_available()
> +        self._tag_resource(vpc)
> +
> +        internet_gateway = self._create_internet_gateway(vpc)
> +        self._create_subnet(vpc)
> +        self._update_routing_table(vpc, internet_gateway.id)
> +        self._update_security_group(vpc)
> +
> +        return vpc
> +

You might move this section into a def _create_vpc()

Are we only going to have one vpc?

It may be useful to model this on a @property
where if it's unset call _create_vpc() and assign it
and then return it.

That would avoid crawling ec2_resources for a match.

> +    def _tag_resource(self, resource):
> +        """Tag a resouce with the specified tag.
> +
> +        This makes finding and deleting resources specific to this testing
> +        much easier to find.
> +
> +        @param resource: resource to tag"""
> +        tag = {
> +            'Key': 'Name',
> +            'Value': self.tag
> +        }
> +        resource.create_tags(Tags=[tag])
> +
> +    def _update_routing_table(self, vpc, internet_gateway_id):
> +        """Update default routing table with internet gateway.
> +
> +        This sets up internet access between the VPC via the internet gateway
> +        by configuring routing tables for IPv4 and IPv6.
> +
> +        @param vpc: VPC containing routing table to update
> +        @param internet_gateway_id: gateway ID to use for routing
> +        """
> +        route_table = list(vpc.route_tables.all())[0]

How do we know that the first entry is the right one?

> +        route_table.create_route(DestinationCidrBlock='0.0.0.0/0',
> +                                 GatewayId=internet_gateway_id)
> +        route_table.create_route(DestinationIpv6CidrBlock='::/0',
> +                                 GatewayId=internet_gateway_id)
> +        self._tag_resource(route_table)
> +
> +    def _update_security_group(self, vpc):
> +        """Allow only SSH inbound to default VPC security group.
> +
> +        This revokes the initial accept all permissions and only allows
> +        the SSH inbound.
> +
> +        @param vpc: VPC containing security group to update
> +        """
> +        ssh = {
> +            'IpProtocol': 'TCP',
> +            'FromPort': 22,
> +            'ToPort': 22,
> +            'IpRanges': [{'CidrIp': '0.0.0.0/0'}],
> +            'Ipv6Ranges': [{'CidrIpv6': '::/0'}]
> +        }
> +
> +        security_group = list(vpc.security_groups.all())[0]
> +        security_group.revoke_ingress(
> +            IpPermissions=security_group.ip_permissions
> +        )
> +        security_group.authorize_ingress(
> +            IpPermissions=[ssh]
> +        )
> +        self._tag_resource(security_group)
> +
> +    def _upload_public_key(self, config):
> +        """Generate random name and upload SSH key with that name.
> +
> +        @param config: platform config
> +        @return: string of ssh key name
> +        """
> +        key_file = os.path.join(config['data_dir'], config['public_key'])
> +        with open(key_file, 'r') as file:
> +            public_key = file.read().strip('\n')
> +
> +        name = '%s-%s' % (self.tag, self.uuid)
> +        self.ec2_client.import_key_pair(KeyName=name,
> +                                        PublicKeyMaterial=public_key)
> +
> +        return name
> +
> +# vi: ts=4 expandtab
> diff --git a/tests/cloud_tests/platforms/instances.py b/tests/cloud_tests/platforms/instances.py
> index 8c59d62..6918003 100644
> --- a/tests/cloud_tests/platforms/instances.py
> +++ b/tests/cloud_tests/platforms/instances.py
> @@ -49,6 +61,58 @@ class Instance(TargetBase):
>          """Clean up instance."""
>          pass
>  
> +    def _ssh(self, command, stdin=None):
> +        """Run a command via SSH."""
> +        client = self._ssh_connect()
> +
> +        cmd = util.shell_pack(command)
> +        try:
> +            fp_in, fp_out, fp_err = client.exec_command(cmd)
> +            channel = fp_in.channel
> +
> +            if stdin is not None:
> +                fp_in.write(stdin)
> +                fp_in.close()
> +
> +            channel.shutdown_write()
> +            rc = channel.recv_exit_status()
> +        except SSHException as e:
> +            raise util.InTargetExecuteError(b'', b'', 1, command, 'ssh',
> +                                            reason=e)
> +
> +        return (fp_out.read(), fp_err.read(), rc)
> +
> +    def _ssh_connect(self):
> +        """Connect via SSH."""
> +        if self._ssh_client:
> +            return self._ssh_client
> +
> +        if not self.ssh_ip or not self.ssh_port:
> +            raise ValueError
> +
> +        client = paramiko.SSHClient()
> +        client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
> +        private_key = paramiko.RSAKey.from_private_key_file(self.ssh_key_file)
> +
> +        retries = 30
> +        while retries:
> +            try:
> +                client.connect(username='ubuntu', hostname=self.ssh_ip,

should this username come from some config?  This would only work on ubuntu images, also user-data could change the default username

> +                               port=self.ssh_port, pkey=private_key,
> +                               banner_timeout=30)
> +                self._ssh_client = client
> +                return client
> +            except (ConnectionRefusedError, AuthenticationException,
> +                    BadHostKeyException, ConnectionResetError, SSHException,
> +                    OSError) as e:
> +                retries -= 1
> +                time.sleep(10)
> +
> +        ssh_cmd = 'Failed command to: ubuntu@%s:%s after 300 seconds' % (
> +            self.ssh_ip, self.ssh_port
> +        )
> +        raise util.InTargetExecuteError(b'', b'', 1, ssh_cmd, 'ssh')
> +
>      def _wait_for_system(self, wait_for_cloud_init):
>          """Wait until system has fully booted and cloud-init has finished.
>  
> diff --git a/tests/cloud_tests/platforms/platforms.py b/tests/cloud_tests/platforms/platforms.py
> index 2897536..93d6b88 100644
> --- a/tests/cloud_tests/platforms/platforms.py
> +++ b/tests/cloud_tests/platforms/platforms.py
> @@ -24,4 +28,16 @@ class Platform(object):
>          """Clean up platform data."""
>          pass
>  
> +    def _generate_ssh_keys(self, data_dir):
> +        """Generate SSH keys to be used with image."""
> +        filename = os.path.join(data_dir, 'id_rsa')

throwaway

> +
> +        if os.path.exists(filename):
> +            c_util.del_file(filename)
> +
> +        c_util.subp(['ssh-keygen', '-t', 'rsa', '-b', '4096',
> +                     '-f', filename, '-P', '',
> +                     '-C', 'ubuntu@cloud_test'],
> +                    capture=True)
> +
>  # vi: ts=4 expandtab


-- 
https://code.launchpad.net/~powersj/cloud-init/+git/cloud-init/+merge/335186
Your team cloud-init commiters is requested to review the proposed merge of ~powersj/cloud-init:cii-enable-ec2 into cloud-init:master.


References