canonical-ubuntu-qa team mailing list archive
-
canonical-ubuntu-qa team
-
Mailing list archive
-
Message #05819
[Merge] ~andersson123/autopkgtest-cloud:copy-security-group-more-robust into autopkgtest-cloud:master
Tim Andersson has proposed merging ~andersson123/autopkgtest-cloud:copy-security-group-more-robust into autopkgtest-cloud:master.
Requested reviews:
Canonical's Ubuntu QA (canonical-ubuntu-qa)
For more details, see:
https://code.launchpad.net/~andersson123/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/477569
Adds a timeout, and sleep, etc, to make calls to nova more robust, in order to avoid having autopkgtest@*.service services end up in an inactive/dead state
--
Your team Canonical's Ubuntu QA is requested to review the proposed merge of ~andersson123/autopkgtest-cloud:copy-security-group-more-robust into autopkgtest-cloud:master.
diff --git a/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/copy-security-group b/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/copy-security-group
index 3de9a98..f1df12a 100755
--- a/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/copy-security-group
+++ b/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/copy-security-group
@@ -9,11 +9,37 @@ If --delete-only is given, it only deletes existing groups called NAME.
import argparse
import os
+import signal
+import time
+from contextlib import contextmanager
from keystoneauth1 import session
+from keystoneauth1.exceptions import InternalServerError
from keystoneauth1.identity import v2, v3
from neutronclient.v2_0 import client
+
+class TimeOutException(Exception):
+ pass
+
+
+@contextmanager
+def raise_timeout(timeout):
+ def _handler(signum, frame):
+ raise TimeOutException
+
+ signal.signal(signal.SIGALRM, _handler)
+ signal.alarm(timeout)
+
+ try:
+ yield
+ except TimeOutException:
+ print(f"Timeout after {timeout} seconds")
+ raise
+ finally:
+ signal.alarm(0)
+
+
# Members in a security group rule that cannot be copied.
RULE_MEMBERS_IGNORE = [
"id",
@@ -27,6 +53,10 @@ RULE_MEMBERS_IGNORE = [
"normalized_cidr",
]
+# keystoneauth1.exceptions.http.InternalServerError
+NOVA_TIMEOUT = 600
+NOVA_RETRY_SLEEP_DURATION = 30
+
def main():
parser = argparse.ArgumentParser(description="Copy security groups")
@@ -68,43 +98,87 @@ def main():
region_name=os.environ["OS_REGION_NAME"],
)
- # Find the source group - crashes if it does not exists
- source = [
- g
- for g in neutron.list_security_groups()["security_groups"]
- if g["name"] == args.source
- ][0]
+ security_groups = None
+ with raise_timeout(NOVA_TIMEOUT):
+ while not security_groups:
+ try:
+ security_groups = neutron.list_security_groups()[
+ "security_groups"
+ ]
+ except InternalServerError as e:
+ print(f"Listing security groups failed with {e}")
+ time.sleep(NOVA_RETRY_SLEEP_DURATION)
+
+ source = [g for g in security_groups if g["name"] == args.source][0]
description = "copy {} of {} ({})".format(
args.name, args.source, source["description"]
)
# Delete any existing group with the same name
- existing_groups = [
- g
- for g in neutron.list_security_groups()["security_groups"]
- if g["name"] == args.name
- ]
- existing_ports = neutron.list_ports()["ports"]
+ existing_groups = [g for g in security_groups if g["name"] == args.name]
+
+ existing_ports = None
+ with raise_timeout(NOVA_TIMEOUT):
+ while not existing_ports:
+ try:
+ existing_ports = neutron.list_ports()["ports"]
+ except InternalServerError as e:
+ print(f"Listing ports failed with {e}")
+ time.sleep(NOVA_RETRY_SLEEP_DURATION)
+
for target in existing_groups:
print("Deleting existing group", target)
for port in existing_ports:
if target["id"] in port["security_groups"]:
print("Deleting port in group:", target["id"])
+ deleted = False
+ with raise_timeout(NOVA_TIMEOUT):
+ while not deleted:
+ try:
+ neutron.delete_port(port["id"])
+ deleted = True
+ except Exception as e:
+ print(f"Could not delete port: {e}")
+ time.sleep(NOVA_RETRY_SLEEP_DURATION)
+
+ with raise_timeout(NOVA_TIMEOUT):
+ deleted = False
+ while not deleted:
try:
- neutron.delete_port(port["id"])
+ neutron.delete_security_group(target["id"])
except Exception as e:
- print("Could not delete port:", e)
- neutron.delete_security_group(target["id"])
+ print(f"Could not delete security group: {e}")
+ time.sleep(NOVA_RETRY_SLEEP_DURATION)
if not args.delete_only:
print("Creating", description)
- target = neutron.create_security_group(
- {"security_group": {"name": args.name, "description": description}}
- )["security_group"]
+ with raise_timeout(NOVA_TIMEOUT):
+ target = None
+ while not target:
+ try:
+ target = neutron.create_security_group(
+ {
+ "security_group": {
+ "name": args.name,
+ "description": description,
+ }
+ }
+ )["security_group"]
+ except Exception as e:
+ print(f"Failed to create security group: {e}")
+ time.sleep(NOVA_RETRY_SLEEP_DURATION)
for rule in target["security_group_rules"]:
- neutron.delete_security_group_rule(rule["id"])
+ with raise_timeout(NOVA_TIMEOUT):
+ deleted = False
+ while not deleted:
+ try:
+ neutron.delete_security_group_rule(rule["id"])
+ deleted = True
+ except Exception as e:
+ print(f"Failed to delete security group with: {e}")
+ time.sleep(NOVA_RETRY_SLEEP_DURATION)
for rule in source["security_group_rules"]:
rule = {
@@ -116,7 +190,19 @@ def main():
rule["security_group_id"] = target["id"]
print("Copying rule", rule)
- neutron.create_security_group_rule({"security_group_rule": rule})
+ with raise_timeout(NOVA_TIMEOUT):
+ created = False
+ while not created:
+ try:
+ neutron.create_security_group_rule(
+ {"security_group_rule": rule}
+ )
+ created = True
+ except Exception as e:
+ print(
+ f"Failed to create security group rule with: {e}"
+ )
+ time.sleep(NOVA_RETRY_SLEEP_DURATION)
if __name__ == "__main__":