cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #04483
Re: [Merge] ~powersj/cloud-init:cii-restructure-ssh into cloud-init:master
Diff comments:
> diff --git a/tests/cloud_tests/platforms/instances.py b/tests/cloud_tests/platforms/instances.py
> index 3bad021..fa392b7 100644
> --- a/tests/cloud_tests/platforms/instances.py
> +++ b/tests/cloud_tests/platforms/instances.py
> @@ -98,21 +103,21 @@ class Instance(TargetBase):
> client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
> private_key = paramiko.RSAKey.from_private_key_file(self.ssh_key_file)
>
> - retries = 30
> + retries = 3
> while retries:
> try:
> client.connect(username=self.ssh_username,
> hostname=self.ssh_ip, port=self.ssh_port,
> - pkey=private_key, banner_timeout=30)
If we don't have a timeout on the client.connect method, then is this test now unbounded? Or is there a default timeout for failed connections?
> + pkey=private_key)
> self._ssh_client = client
> return client
> except (ConnectionRefusedError, AuthenticationException,
> BadHostKeyException, ConnectionResetError, SSHException,
> - OSError) as e:
> + OSError):
> retries -= 1
> - time.sleep(10)
> + time.sleep(3)
This seems like an arbitrary change; I think it would be good to know the upper bound on how long we want to wait
before we call _ssh_connect() a bust, and then work backwards w.r.t the number of tries and how long to sleep
within the total time we plan to wait.
>
> - ssh_cmd = 'Failed ssh connection to %s@%s:%s after 300 seconds' % (
> + ssh_cmd = 'Failed ssh connection to %s@%s:%s after 3 retries' % (
> self.ssh_username, self.ssh_ip, self.ssh_port
> )
> raise util.InTargetExecuteError(b'', b'', 1, ssh_cmd, 'ssh')
> @@ -120,6 +125,10 @@ class Instance(TargetBase):
> def _wait_for_system(self, wait_for_cloud_init):
> """Wait until system has fully booted and cloud-init has finished.
>
> + Attempt to connect to the instance for the duration of the
> + boot_timeout value. This means we will retry SSH connections
> + untill we connect or untill the timeout is reached.
s/untill/until
That comment doesn't seem to mach the ssh logic above which will only retry 3 times.
> +
> @param wait_time: maximum time to wait
> @return_value: None, may raise OSError if wait_time exceeded
> """
> @@ -128,18 +137,26 @@ class Instance(TargetBase):
> return ' '.join(l for l in test.strip().splitlines()
> if not l.lstrip().startswith('#'))
>
> - time = self.config['boot_timeout']
> + boot_timeout = self.config['boot_timeout']
> tests = [self.config['system_ready_script']]
> if wait_for_cloud_init:
> tests.append(self.config['cloud_init_ready_script'])
>
> formatted_tests = ' && '.join(clean_test(t) for t in tests)
> cmd = ('i=0; while [ $i -lt {time} ] && i=$(($i+1)); do {test} && '
> - 'exit 0; sleep 1; done; exit 1').format(time=time,
> + 'exit 0; sleep 1; done; exit 1').format(time=boot_timeout,
> test=formatted_tests)
>
> - if self.execute(cmd, rcs=(0, 1))[-1] != 0:
> - raise OSError('timeout: after {}s system not started'.format(time))
> -
> + end_time = time.time() + boot_timeout
> + while time.time() < end_time:
I would suggest a slightly different structure to ensure that we always execute the test at least once (note that if for some reason the system sleeps between end_time and the next time.time() we'll never run self.execute.
end_time = time.time() + boot_timeout
while True:
try:
if self.execute()[-1] == 0:
break # success, we can exit the while loop
else:
raise OSError
except util.InTargetExecuteError as e:
log.error("Encountered error: %s", e)
if time.time() < end_time:
time.sleep(3)
else:
raise OSError
Hrm, looking at that, I'm not sure why we'd raise and OSerror with "timeout", can't execute return 1 and not be related to timing out?
> + try:
> + if self.execute(cmd, rcs=(0, 1),
> + description='wait for instance')[-1] != 0:
> + raise OSError('timeout: after %ss system not started' %
> + boot_timeout)
> + else:
> + break
> + except util.InTargetExecuteError:
> + time.sleep(3)
>
> # vi: ts=4 expandtab
--
https://code.launchpad.net/~powersj/cloud-init/+git/cloud-init/+merge/342010
Your team cloud-init commiters is requested to review the proposed merge of ~powersj/cloud-init:cii-restructure-ssh into cloud-init:master.
References