cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #03978
Re: [Merge] ~powersj/cloud-init:cii-enable-ec2 into cloud-init:master
Added commit for using stream image data and comments with more questions from intial reviews of Scott and Ryan. Thank you both!
Diff comments:
> diff --git a/tests/cloud_tests/platforms.yaml b/tests/cloud_tests/platforms.yaml
> index fa4f845..458ab81 100644
> --- a/tests/cloud_tests/platforms.yaml
> +++ b/tests/cloud_tests/platforms.yaml
> @@ -6,8 +6,13 @@ default_platform_config:
> get_image_timeout: 300
> # maximum time to create instance (before waiting for cloud-init)
> create_instance_timeout: 60
> -
> + private_key: id_rsa
> + public_key: id_rsa.pub
I will do that in a follow-on merge. I have to update the generate_ssh_keys method to write to a different file name and not the hardcoded id_rsa as well.
> platforms:
> + ec2:
> + enabled: true
> + instance-type: t2.micro
> + tag: cloud-init-testing
> lxd:
> enabled: true
> # overrides for image templates
> diff --git a/tests/cloud_tests/platforms/ec2/image.py b/tests/cloud_tests/platforms/ec2/image.py
> new file mode 100644
> index 0000000..456dda2
> --- /dev/null
> +++ b/tests/cloud_tests/platforms/ec2/image.py
> @@ -0,0 +1,105 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +"""EC2 Image Base Class."""
> +
> +from ..images import Image
> +from .snapshot import EC2Snapshot
> +
> +
> +class EC2Image(Image):
> + """EC2 backed image."""
> +
> + platform_name = 'ec2'
> +
> + def __init__(self, platform, config, image_ami):
> + """Set up image.
> +
> + @param platform: platform object
> + @param config: image configuration
> + @param image_ami: string of image ami ID
> + """
> + super(EC2Image, self).__init__(platform, config)
> + self._img_instance = None
> + self.image_ami = image_ami
> + self.image_ami_edited = False
> +
> + @property
> + def _instance(self):
> + """Internal use only, returns a running instance"""
> + if not self._img_instance:
> + self._img_instance = self.platform.create_instance(
> + self.properties, self.config, self.features,
> + self.image_ami, b'')
That is fair and I will fix this. It tripped me up to, so I should have known better :) it is user-data that needs to be passed in, and in this case since we are customizing the image we don't need any custom user-data.
> + self._img_instance.start(wait=True, wait_for_cloud_init=True)
> + self.modified = True
> + return self._img_instance
> +
> + @property
> + def properties(self):
> + """Dictionary containing: 'arch', 'os', 'version', 'release'."""
> + return {
> + 'arch': self.config['arch'],
> + 'os': self.config['family'],
> + 'release': self.config['release'],
> + 'version': self.config['version'],
> + }
> +
> + def destroy(self):
> + """Unset path to signal image is no longer used.
Thanks ;)
> +
> + The removal of the images and all other items is handled by the
> + framework. In some cases we want to keep the images, so let the
> + framework decide whether to keep or destroy everything.
> + """
> + if self.image_ami_edited:
> + self.platform.ec2_client.deregister_image(
> + ImageId=self.image_ami_edited
> + )
> +
> + super(EC2Image, self).destroy()
As stated on IRC deregister is AWS's lingo for destroyed/deleted. The base image is official Ubuntu image and cannot/should not be deleted. The base instance is however deleted.
There is no mechanism in this model to keep an images. After a run is complete or exited on trace, any custom AMI are deregister.
> +
> + def _execute(self, *args, **kwargs):
> + """Execute command in image, modifying image."""
> + return self._instance._execute(*args, **kwargs)
> +
> + def push_file(self, local_path, remote_path):
> + """Copy file at 'local_path' to instance at 'remote_path'."""
> + return self._instance.push_file(local_path, remote_path)
> +
> + def run_script(self, *args, **kwargs):
> + """Run script in image, modifying image.
> +
> + @return_value: script output
> + """
> + return self._instance.run_script(*args, **kwargs)
> +
> + def snapshot(self):
> + """Create snapshot of image, block until done.
> +
> + Will return base image_ami if no instance has been booted, otherwise
> + will run the clean script, shutdown the instance, create a custom
> + AMI, and use that AMI once available.
> + """
> + if not self._img_instance:
> + return EC2Snapshot(self.platform, self.properties, self.config,
> + self.features, self.image_ami)
> +
> + if self.config.get('boot_clean_script'):
> + self._img_instance.run_script(self.config.get('boot_clean_script'))
> +
> + self._img_instance.shutdown(wait=True)
> +
> + name = '%s-%s-%s' % (
> + self.platform.tag, self.image_ami, self.platform.uuid
> + )
> + response = self.platform.ec2_client.create_image(
> + Name=name, InstanceId=self._img_instance.instance.instance_id
> + )
> + self.image_ami_edited = response['ImageId']
> + image = self.platform.ec2_resource.Image(self.image_ami_edited)
> + self.platform.wait_for_state(image, 'available')
> +
> + return EC2Snapshot(self.platform, self.properties, self.config,
> + self.features, self.image_ami_edited)
> +
> +# vi: ts=4 expandtab
> diff --git a/tests/cloud_tests/platforms/ec2/instance.py b/tests/cloud_tests/platforms/ec2/instance.py
> new file mode 100644
> index 0000000..72bafee
> --- /dev/null
> +++ b/tests/cloud_tests/platforms/ec2/instance.py
> @@ -0,0 +1,105 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +"""Base EC2 instance."""
> +import os
> +
> +import botocore
> +
> +from ..instances import Instance
> +from tests.cloud_tests import util
> +
> +
> +class EC2Instance(Instance):
> + """EC2 backed instance."""
> +
> + platform_name = "ec2"
> + _ssh_client = None
> +
> + def __init__(self, platform, properties, config, features,
> + image_ami, user_data):
> + """Set up instance.
> +
> + @param platform: platform object
> + @param properties: dictionary of properties
> + @param config: dictionary of configuration values
> + @param features: dictionary of supported feature flags
> + @param image_ami: AWS AMI ID for image to use
> + @param user_data: test user-data to pass to instance
> + """
> + super(EC2Instance, self).__init__(
> + platform, image_ami, properties, config, features)
> +
> + self.image_ami = image_ami
> + self.instance = None
> + self.user_data = user_data
> + self.ssh_ip = None
> + self.ssh_port = 22
> + self.ssh_key_file = os.path.join(
> + platform.config['data_dir'], platform.config['private_key'])
> + self.ssh_pubkey_file = os.path.join(
> + platform.config['data_dir'], platform.config['public_key'])
> +
> + def destroy(self):
> + """Clean up instance."""
> + if self._ssh_client:
> + self._ssh_client.close()
> + self._ssh_client = None
> +
> + if self.instance:
> + self.instance.terminate()
> +
> + super(EC2Instance, self).destroy()
> +
> + def _execute(self, command, stdin=None, env=None):
> + """Execute command on instance."""
> + env_args = []
> + if env:
> + env_args = ['env'] + ["%s=%s" for k, v in env.items()]
> +
> + return self._ssh(['sudo'] + env_args + list(command), stdin=stdin)
> +
> + def start(self, wait=True, wait_for_cloud_init=False):
> + """Start instance on EC2 with the platfrom's VPC."""
> + name_tag = {
> + 'ResourceType': 'instance',
> + 'Tags': [
> + {
> + 'Key': 'Name',
> + 'Value': self.platform.tag,
> + },
> + ]
> + }
> +
> + try:
> + instances = self.platform.ec2_resource.create_instances(
> + ImageId=self.image_ami,
> + InstanceType=self.platform.instance_type,
> + KeyName=self.platform.key_name,
> + MaxCount=1,
> + MinCount=1,
> + SecurityGroupIds=[self.platform.vpc_security_group.id],
> + SubnetId=self.platform.vpc_subnet.id,
> + TagSpecifications=[name_tag],
> + UserData=self.user_data,
> + )
> + except botocore.exceptions.ClientError as error:
> + error_msg = error.response['Error']['Message']
> + raise util.InTargetExecuteError(b'', b'', 1, '', 'start',
InTargetExecuteError(stdout, stderr, return code, command, description, reason)
Given I am ignoring/not really using the first 4, maybe I need a different exception? Maybe something like SetupExecuteError? or PlatformError? More I look at this, the more I feel I should not be using that exception here...
> + reason=error_msg)
> +
> + self.instance = instances[0]
> +
> + if wait:
> + # Need to give AWS some time to process the launch
> + self.platform.wait_for_state(self.instance, 'running')
> + self.ssh_ip = self.instance.public_ip_address
> + self._wait_for_system(wait_for_cloud_init)
> +
> + def shutdown(self, wait=True):
> + """Shutdown instance."""
> + self.instance.stop()
> +
> + if wait:
> + self.platform.wait_for_state(self.instance, 'stopped')
> +
> +# vi: ts=4 expandtab
> diff --git a/tests/cloud_tests/platforms/ec2/platform.py b/tests/cloud_tests/platforms/ec2/platform.py
> new file mode 100644
> index 0000000..8ff2716
> --- /dev/null
> +++ b/tests/cloud_tests/platforms/ec2/platform.py
> @@ -0,0 +1,255 @@
> +# This file is part of cloud-init. See LICENSE file for license information.
> +
> +"""Base EC2 platform."""
> +import os
> +import time
> +import uuid
> +
> +import boto3
> +import botocore
> +
> +from ..platforms import Platform
> +from .image import EC2Image
> +from .instance import EC2Instance
> +from tests.cloud_tests import util
> +
> +
> +class EC2Platform(Platform):
> + """EC2 test platform."""
> +
> + platform_name = 'ec2'
> + ipv4_cidr = '192.168.1.0/20'
> +
> + def __init__(self, config):
> + """Set up platform."""
> + super(EC2Platform, self).__init__(config)
> + # Used with SSH key and custom AMI generation naming with EC2
> + self.uuid = str(uuid.uuid1())[0:8]
I needed a random string to tack on to the name of things like the SSH key and custom AMI name. Shall I call it 'prefix'? Is there a better solution here?
> + self.tag = config['tag']
> +
> + self.ec2_client = boto3.client('ec2')
> + self.ec2_resource = boto3.resource('ec2')
> + self.instance_type = config['instance-type']
> + self.key_name = self._upload_public_key(config)
> + self.vpc = self._setup_vpc()
> + self.vpc_security_group = self._get_security_group()
> + self.vpc_subnet = self._get_subnet()
> +
> + def create_instance(self, properties, config, features,
> + image_ami, user_data):
> + """Create an instance
> +
> + @param src_img_path: image path to launch from
> + @param properties: image properties
> + @param config: image configuration
> + @param features: image features
> + @param image_ami: string of image ami ID
> + @param user_data: test user-data to pass to instance
> + @return_value: cloud_tests.instances instance
> + """
> + return EC2Instance(self, properties, config, features,
> + image_ami, user_data)
> +
> + def destroy(self):
> + """Delete platform."""
> + if self.key_name:
> + self.ec2_client.delete_key_pair(KeyName=self.key_name)
> +
> + def get_image(self, img_conf):
> + """Get image using specified image configuration.
> +
> + @param img_conf: configuration for image
> + @return_value: cloud_tests.images instance
> + """
> + if img_conf['root-store'] == 'ebs':
> + image_type = 'hvm-ssd'
> + elif img_conf['root-store'] == 'instance-store':
> + image_type = 'hvm-instance'
> + else:
> + raise RuntimeError('Unknown root-store type: %s' %
> + (img_conf['root-store']))
> +
> + image_filter = {
> + 'Name': 'name',
> + 'Values': ['ubuntu/images-testing/%s/ubuntu-%s-daily-%s-server-*'
> + % (image_type, img_conf['release'], img_conf['arch'])]
> + }
Thanks, I have pushed a 2nd commit that utilizes stream data. Take a look.
> + response = self.ec2_client.describe_images(Filters=[image_filter])
> + images = sorted(response['Images'], key=lambda k: k['CreationDate'])
> +
> + try:
> + image_ami = images[-1]['ImageId']
> + except IndexError:
> + raise RuntimeError('No images found for %s!' % img_conf['release'])
> +
> + image = EC2Image(self, img_conf, image_ami)
> + return image
> +
> + @staticmethod
> + def wait_for_state(resource, goal_state):
ok! so have an argument like: total_wait_time = 300 and then:
sleep_time = 5
attempts = int(total_wait_time / sleep_time)
Thoughts?
> + """Wait up to 5 minutes (5 * 60) for a specific state.
> +
> + @param: resource to check with resource.state
> + @param: string goal state to be in
> + """
> + attempts = 0
> +
> + while attempts < 60:
> + try:
> + resource.reload()
> + current_state = resource.state
> + # Sometimes get {'Code': 0, 'Name': 'pending'}
> + if isinstance(current_state, dict):
> + current_state = current_state['Name']
> +
> + if current_state == goal_state:
> + return
> + # will occur if instance is not up yet
> + except botocore.exception.BotoSeverError:
> + pass
> +
> + time.sleep(5)
> + attempts += 1
> +
> + raise util.InTargetExecuteError(
> + b'', b'', 1, goal_state, 'wait_for_state',
> + reason='%s: took too long to get to %s' % (resource, goal_state)
> + )
> +
> + def _create_internet_gateway(self, vpc):
> + """Create Internet Gateway and assign to VPC.
> +
> + @param vpc: VPC to create internet gateway on
> + """
> + internet_gateway = self.ec2_resource.create_internet_gateway()
> + internet_gateway.attach_to_vpc(VpcId=vpc.vpc_id)
> + self._tag_resource(internet_gateway)
> + return internet_gateway
> +
> + def _create_subnet(self, vpc):
> + """Generate IPv4 and IPv6 subnets for use.
> +
> + @param vpc: VPC to create subnets on
> + """
> + ipv6_block = vpc.ipv6_cidr_block_association_set[0]['Ipv6CidrBlock']
> + ipv6_cidr = ipv6_block[:-2] + '64'
> +
> + subnet = vpc.create_subnet(CidrBlock=self.ipv4_cidr,
> + Ipv6CidrBlock=ipv6_cidr)
> + modify_subnet = subnet.meta.client.modify_subnet_attribute
> + modify_subnet(SubnetId=subnet.id,
> + MapPublicIpOnLaunch={'Value': True})
> + self._tag_resource(subnet)
> +
> + def _get_security_group(self):
> + """Return default security group from VPC.
> +
> + For now there should only be one.
> +
> + @return_value: Return first security group object
> + """
> + return list(self.vpc.security_groups.all())[0]
Should I use limit instead:
>>> import boto3
>>> ec2_resource = boto3.resource('ec2')
>>> vpc = list(ec2_resource.vpcs.all())[0]
>>> vpc.subnets.all()
ec2.Vpc.subnetsCollection(ec2.Vpc(id='vpc-9bf381ff'), ec2.Subnet)
>>> vpc.subnets.next()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'ec2.Vpc.subnetsCollectionManager' object has no attribute 'next'
>>> vpc.subnets.
vpc.subnets.all( vpc.subnets.iterator( vpc.subnets.page_size(
vpc.subnets.filter( vpc.subnets.limit( vpc.subnets.pages(
> +
> + def _get_subnet(self):
> + """Return first subnet from VPC.
> +
> + For now there should only be one.
> +
> + @return_value: Return subnet object
> + """
> + return list(self.vpc.subnets.all())[0]
> +
> + def _setup_vpc(self):
> + """Setup AWS EC2 VPC or return existing VPC."""
> + for vpc in self.ec2_resource.vpcs.all():
> + if not vpc.tags:
> + continue
> + for tag in vpc.tags:
> + if tag['Value'] == self.tag:
> + return vpc
> +
> + vpc = self.ec2_resource.create_vpc(
> + CidrBlock=self.ipv4_cidr,
> + AmazonProvidedIpv6CidrBlock=True
> + )
> + vpc.wait_until_available()
> + self._tag_resource(vpc)
> +
> + internet_gateway = self._create_internet_gateway(vpc)
> + self._create_subnet(vpc)
> + self._update_routing_table(vpc, internet_gateway.id)
> + self._update_security_group(vpc)
> +
> + return vpc
> +
I was only going to create a single VPC to isolate our testing. However, stepping back, if we make changes to the VPC config down the road this means the VPC would not get re-created and would require someone to manually delete it or have to put logic in to re-create.
Like the other resources, I think it may be better to create the VPC each time and delete it each time. I think I will change this to create and delete each time.
> + def _tag_resource(self, resource):
> + """Tag a resouce with the specified tag.
> +
> + This makes finding and deleting resources specific to this testing
> + much easier to find.
> +
> + @param resource: resource to tag"""
> + tag = {
> + 'Key': 'Name',
> + 'Value': self.tag
> + }
> + resource.create_tags(Tags=[tag])
> +
> + def _update_routing_table(self, vpc, internet_gateway_id):
> + """Update default routing table with internet gateway.
> +
> + This sets up internet access between the VPC via the internet gateway
> + by configuring routing tables for IPv4 and IPv6.
> +
> + @param vpc: VPC containing routing table to update
> + @param internet_gateway_id: gateway ID to use for routing
> + """
> + route_table = list(vpc.route_tables.all())[0]
Since we create the VPC it will only have the default routing table.
We could create a new one as well, but would need to associate the route_table with the subnets as well via:
route_table.associate_with_subnet(SubnetId=subnet.id)
> + route_table.create_route(DestinationCidrBlock='0.0.0.0/0',
> + GatewayId=internet_gateway_id)
> + route_table.create_route(DestinationIpv6CidrBlock='::/0',
> + GatewayId=internet_gateway_id)
> + self._tag_resource(route_table)
> +
> + def _update_security_group(self, vpc):
> + """Allow only SSH inbound to default VPC security group.
> +
> + This revokes the initial accept all permissions and only allows
> + the SSH inbound.
> +
> + @param vpc: VPC containing security group to update
> + """
> + ssh = {
> + 'IpProtocol': 'TCP',
> + 'FromPort': 22,
> + 'ToPort': 22,
> + 'IpRanges': [{'CidrIp': '0.0.0.0/0'}],
> + 'Ipv6Ranges': [{'CidrIpv6': '::/0'}]
> + }
> +
> + security_group = list(vpc.security_groups.all())[0]
> + security_group.revoke_ingress(
> + IpPermissions=security_group.ip_permissions
> + )
> + security_group.authorize_ingress(
> + IpPermissions=[ssh]
> + )
> + self._tag_resource(security_group)
> +
> + def _upload_public_key(self, config):
> + """Generate random name and upload SSH key with that name.
> +
> + @param config: platform config
> + @return: string of ssh key name
> + """
> + key_file = os.path.join(config['data_dir'], config['public_key'])
> + with open(key_file, 'r') as file:
> + public_key = file.read().strip('\n')
> +
> + name = '%s-%s' % (self.tag, self.uuid)
> + self.ec2_client.import_key_pair(KeyName=name,
> + PublicKeyMaterial=public_key)
> +
> + return name
> +
> +# vi: ts=4 expandtab
> diff --git a/tests/cloud_tests/platforms/instances.py b/tests/cloud_tests/platforms/instances.py
> index 8c59d62..6918003 100644
> --- a/tests/cloud_tests/platforms/instances.py
> +++ b/tests/cloud_tests/platforms/instances.py
> @@ -49,6 +61,58 @@ class Instance(TargetBase):
> """Clean up instance."""
> pass
>
> + def _ssh(self, command, stdin=None):
> + """Run a command via SSH."""
> + client = self._ssh_connect()
> +
> + cmd = util.shell_pack(command)
> + try:
> + fp_in, fp_out, fp_err = client.exec_command(cmd)
> + channel = fp_in.channel
> +
> + if stdin is not None:
> + fp_in.write(stdin)
> + fp_in.close()
> +
> + channel.shutdown_write()
> + rc = channel.recv_exit_status()
> + except SSHException as e:
> + raise util.InTargetExecuteError(b'', b'', 1, command, 'ssh',
> + reason=e)
> +
> + return (fp_out.read(), fp_err.read(), rc)
> +
> + def _ssh_connect(self):
> + """Connect via SSH."""
> + if self._ssh_client:
> + return self._ssh_client
> +
> + if not self.ssh_ip or not self.ssh_port:
> + raise ValueError
> +
> + client = paramiko.SSHClient()
> + client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
> + private_key = paramiko.RSAKey.from_private_key_file(self.ssh_key_file)
> +
> + retries = 30
> + while retries:
> + try:
> + client.connect(username='ubuntu', hostname=self.ssh_ip,
agreed, previously username was a default argument that was never able to be customized. I think moving it to be a class variable that the platforms can override is the best option for now.
We can set things in the releases.yaml, however that may also change platform to platform. Thoughts?
> + port=self.ssh_port, pkey=private_key,
> + banner_timeout=30)
> + self._ssh_client = client
> + return client
> + except (ConnectionRefusedError, AuthenticationException,
> + BadHostKeyException, ConnectionResetError, SSHException,
> + OSError) as e:
> + retries -= 1
> + time.sleep(10)
> +
> + ssh_cmd = 'Failed command to: ubuntu@%s:%s after 300 seconds' % (
> + self.ssh_ip, self.ssh_port
> + )
> + raise util.InTargetExecuteError(b'', b'', 1, ssh_cmd, 'ssh')
> +
> def _wait_for_system(self, wait_for_cloud_init):
> """Wait until system has fully booted and cloud-init has finished.
>
--
https://code.launchpad.net/~powersj/cloud-init/+git/cloud-init/+merge/335186
Your team cloud-init commiters is requested to review the proposed merge of ~powersj/cloud-init:cii-enable-ec2 into cloud-init:master.
References