← Back to team overview

cloud-init-dev team mailing list archive

Re: [Merge] ~powersj/cloud-init:cii-restructure-ssh into cloud-init:master

 

Couple nits on this approach Josh, then I think it looks good.

Diff comments:

> diff --git a/tests/cloud_tests/platforms/instances.py b/tests/cloud_tests/platforms/instances.py
> index 3bad021..3b2fe1c 100644
> --- a/tests/cloud_tests/platforms/instances.py
> +++ b/tests/cloud_tests/platforms/instances.py
> @@ -98,21 +103,21 @@ class Instance(TargetBase):
>          client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
>          private_key = paramiko.RSAKey.from_private_key_file(self.ssh_key_file)
>  
> -        retries = 30
> +        retries = 3
>          while retries:
>              try:
>                  client.connect(username=self.ssh_username,
>                                 hostname=self.ssh_ip, port=self.ssh_port,
> -                               pkey=private_key, banner_timeout=30)
> +                               pkey=private_key)
>                  self._ssh_client = client
>                  return client
>              except (ConnectionRefusedError, AuthenticationException,
>                      BadHostKeyException, ConnectionResetError, SSHException,
> -                    OSError) as e:
> +                    OSError):
>                  retries -= 1
> -                time.sleep(10)
> +                time.sleep(3)

Can we add a LOG.debug('Retrying ssh connection on connect failure') here?

>  
> -        ssh_cmd = 'Failed ssh connection to %s@%s:%s after 300 seconds' % (
> +        ssh_cmd = 'Failed ssh connection to %s@%s:%s after 3 retries' % (
>              self.ssh_username, self.ssh_ip, self.ssh_port
>          )
>          raise util.InTargetExecuteError(b'', b'', 1, ssh_cmd, 'ssh')
> @@ -128,18 +133,29 @@ class Instance(TargetBase):
>              return ' '.join(l for l in test.strip().splitlines()
>                              if not l.lstrip().startswith('#'))
>  
> -        time = self.config['boot_timeout']
> +        boot_timeout = self.config['boot_timeout']
>          tests = [self.config['system_ready_script']]
>          if wait_for_cloud_init:
>              tests.append(self.config['cloud_init_ready_script'])
>  
>          formatted_tests = ' && '.join(clean_test(t) for t in tests)
>          cmd = ('i=0; while [ $i -lt {time} ] && i=$(($i+1)); do {test} && '
> -               'exit 0; sleep 1; done; exit 1').format(time=time,
> +               'exit 0; sleep 1; done; exit 1').format(time=boot_timeout,
>                                                         test=formatted_tests)
>  
> -        if self.execute(cmd, rcs=(0, 1))[-1] != 0:
> -            raise OSError('timeout: after {}s system not started'.format(time))
> -
> +        end_time = time.time() + boot_timeout
> +        while True:
> +            try:
> +                if self.execute(cmd, rcs=(0, 1),
> +                                description='wait for instance start'):
> +                    break

I think you actually want to be checking the last element of the tuple returned by execute for exit code of 0 before breaking here, otherwise you break out of the while loop even on error conditions (such as hitting the 300 second boot_timeout). You won't raise the error in that case.

> +            except util.InTargetExecuteError:
> +                LOG.warning("failed to connect via SSH")
> +
> +            if time.time() < end_time:
> +                time.sleep(3)
> +            else:
> +                raise util.PlatformError('ssh', 'after %ss instance is not '
> +                                         'reachable' % boot_timeout)
>  
>  # vi: ts=4 expandtab


-- 
https://code.launchpad.net/~powersj/cloud-init/+git/cloud-init/+merge/342010
Your team cloud-init commiters is requested to review the proposed merge of ~powersj/cloud-init:cii-restructure-ssh into cloud-init:master.


References