canonical-ubuntu-qa team mailing list archive
-
canonical-ubuntu-qa team
-
Mailing list archive
-
Message #03855
[Merge] ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master
Tim Andersson has proposed merging ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master.
Requested reviews:
Canonical's Ubuntu QA (canonical-ubuntu-qa)
For more details, see:
https://code.launchpad.net/~andersson123/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/461654
--
Your team Canonical's Ubuntu QA is requested to review the proposed merge of ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master.
diff --git a/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/test-killer b/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/test-killer
new file mode 100755
index 0000000..bfec858
--- /dev/null
+++ b/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/test-killer
@@ -0,0 +1,294 @@
+#!/usr/bin/python3
+"""Kills running tests."""
+
+import configparser
+import json
+import logging
+import pathlib
+import socket
+import subprocess
+import time
+from typing import List
+
+import amqplib.client_0_8 as amqp
+import requests
+
+WRITER_EXCHANGE_NAME = "stop-running.fanout"
+RABBIT_CREDS = "/home/ubuntu/rabbitmq.cred"
+MSG_ONLY_KEYS = [
+ "uuid",
+ "not-running-on",
+]
+
+RABBIT_CFG = configparser.ConfigParser()
+with open(RABBIT_CREDS, "r") as f:
+ RABBIT_CFG.read_string("[rabbit]\n" + f.read().replace('"', ""))
+
+
+def amqp_connect() -> amqp.Connection:
+ """
+ Creates an amqp.Connection object from the relevant creds
+ """
+ amqp_con = amqp.Connection(
+ RABBIT_CFG["rabbit"]["RABBIT_HOST"],
+ userid=RABBIT_CFG["rabbit"]["RABBIT_USER"],
+ password=RABBIT_CFG["rabbit"]["RABBIT_PASSWORD"],
+ confirm_publish=True,
+ )
+ return amqp_con
+
+
+def check_message(msg: dict) -> bool:
+ """
+ Checks the "kill-request" message sent has only the desired keys
+
+ :param msg: the amqp message converted from bytes to dictionary
+ """
+ return list(msg.keys()) == MSG_ONLY_KEYS
+
+
+def get_test_pid(uuid: str) -> int:
+ """
+ Parses the output of ps aux and finds the pid of a running test
+ with a given uuid
+
+ :param uuid: The given test uuid that is desired to be killed
+ """
+ try:
+ # get list of running processes
+ ps_aux_run = subprocess.run(
+ ["ps", "aux"],
+ stdout=subprocess.PIPE,
+ check=True,
+ )
+ # Filter the list for only 'runner' processes
+ runner_run = subprocess.run(
+ ["grep", "runner"],
+ input=ps_aux_run.stdout,
+ stdout=subprocess.PIPE,
+ check=True,
+ )
+ # Check all runner processes for the given uuid
+ # If this one fails, the test isn't running on this worker
+ uuid_run = subprocess.run(
+ ["grep", uuid],
+ input=runner_run.stdout,
+ capture_output=True,
+ check=True,
+ )
+ except subprocess.CalledProcessError as _:
+ # We hit this exception if the test with the given uuid
+ # isn't running on this cloud worker
+ return None
+ search_for_test_output = uuid_run.stdout
+ search_me = search_for_test_output.splitlines()
+ # We have to assert the length is 1 otherwise we'll only kill
+ # the first one in the list - which may be the incorrect one
+ # if there's two processes with same uuid - something is wrong!
+ assert len(search_me) == 1
+ line = search_me[0].decode("utf-8")
+ if uuid in line:
+ line = line.split(" ")
+ line = [x for x in line if x]
+ pid = line[1]
+ return int(pid)
+
+
+def place_message_in_queue(info: dict, amqp_con: amqp.Connection):
+ """
+ Places a given dictionary into amqp as an amqp.Message object
+ into the queue with the WRITER_EXCHANGE_NAME exchange
+
+ :param info: dictionary that'll be converted to an amqp message
+ :param amqp_con: the amqp connection that test-killer is using
+ """
+ complete_amqp = amqp_con.channel()
+ complete_amqp.access_request(
+ "/complete", active=True, read=False, write=True
+ )
+ complete_amqp.exchange_declare(
+ WRITER_EXCHANGE_NAME, "fanout", durable=True, auto_delete=False
+ )
+ complete_amqp.basic_publish(
+ amqp.Message(json.dumps(info), delivery_mode=2),
+ WRITER_EXCHANGE_NAME,
+ "",
+ )
+
+
+def kill_process(pid: int, uuid: str) -> bool:
+ """
+ Sends SIGUSR1 to worker.
+ This causes the worker to go into the fallback failure mode,
+ in which the worker then exits the test and kills the
+ openstack server. The worker goes on to the next test in the
+ queue
+
+ :param pid: pid of autopkgtest process to kill
+ :param uuid: The given test uuid that is desired to be killed
+ """
+ kill_cmd = "kill -USR1 %i" % pid
+ try:
+ _ = subprocess.run(
+ kill_cmd.split(" "),
+ check=True,
+ )
+ while get_test_pid(uuid) is not None:
+ time.sleep(1)
+ return True
+ except subprocess.CalledProcessError as _:
+ return False
+
+
+def test_is_queued(uuid: str) -> bool:
+ """
+ Checks autopkgtest-web's queued.json file for the presence of a test
+ with the given uuid. If the uuid is found, we know the test is currently
+ queued, and not running on any unit.
+
+ :param uuid: The given test uuid that is desired to be killed
+ """
+ net_name_path = pathlib.Path("/home/ubuntu/net-name.rc")
+ if not net_name_path.exists():
+ logging.warning(
+ "No net-name.rc! Cannot check queues. Presuming test is not queued."
+ )
+ return False
+ if "net_prod-proposed-migration" in net_name_path.read_text():
+ autopkgtest_url = "https://autopkgtest.ubuntu.com"
+ elif "net_stg-proposed-migration" in net_name_path.read_text():
+ autopkgtest_url = "https://autopkgtest.staging.ubuntu.com"
+ else:
+ logging.warning(
+ "Couldn't check mojo stage, presuming test isn't queued."
+ )
+ return False
+ queue_req = requests.get(autopkgtest_url + "/queued.json")
+ if uuid in queue_req.content.decode("utf-8"):
+ return True
+ return False
+
+
+def already_checked_this_host(hostnames: List[str]) -> bool:
+ """
+ Checks if the hostname of the worker this is running on is in
+ the list of hostnames provided
+
+ :param hostnames: List of hostnames that have already checked for the test to be killed
+ """
+ return socket.getfqdn() in hostnames
+
+
+def get_num_workers() -> int:
+ """
+ Retrieves the value for the juju config autopkgtest-cloud-worker option "num_workers"
+ Falls back to a safe default if parsing fails
+ """
+ worker_conf_files = [
+ "/home/ubuntu/worker-lxd-armhf.conf",
+ "/home/ubuntu/worker-lcy02.conf",
+ ]
+ worker_conf = configparser.ConfigParser()
+ for file in worker_conf_files:
+ if pathlib.Path(file).exists():
+ worker_conf.read_file(file)
+ try:
+ return int(worker_conf["autopkgtest"]["num_workers"])
+ except Exception as _:
+ logging.info(
+ "Parsing worker conf for num_workers failed, falling back to default of 3"
+ )
+ return 3
+
+
+def process_message(msg: amqp.Message, amqp_con: amqp.Connection):
+ """
+ Callback function processing the amqp message
+
+ :param msg: The amqp message with the values required to kill a specified test
+ Should look like this when coming directly from the test_manager app:
+ {"uuid": "uuid-of-test", "not-running-on": []}
+ And will look like this after two units have failed to find the test
+ with the specified uuid:
+ {"uuid": "uuid-of-test", "not-running-on": [
+ "hostname1",
+ "hostname2",
+ ]
+ }
+ :param amqp_con: the amqp connection that test-killer will be using
+ """
+ body = msg.body
+ if isinstance(body, bytes):
+ body = body.decode("UTF-8", errors="replace")
+ info = json.loads(body)
+ num_workers = get_num_workers()
+ logging.info("Received request to kill test: %s" % json.dumps(info))
+ if not check_message(info):
+ logging.error(
+ "Message %s is invalid. Ignoring.", json.dumps(info, indent=2)
+ )
+ # Remove the message from the queue
+ msg.channel.basic_ack(msg.delivery_tag)
+ return
+ if len(info["not-running-on"]) == num_workers:
+ # If the test hasn't been found on any of the workers, we reach this
+ # Check if the test is currently queued - this could happen in the case
+ # of infinite looping.
+ if test_is_queued(info["uuid"]):
+ msg.channel.basic_ack(msg.delivery_tag)
+ info["not-running-on"] = []
+ place_message_in_queue(info, amqp_con)
+ else:
+ msg.channel.basic_ack(msg.delivery_tag)
+ return
+
+ if already_checked_this_host(info["not-running-on"]):
+ # We check to see if we've already checked for the job on this cloud worker unit.
+ msg.channel.basic_ack(msg.delivery_tag)
+ logging.info(
+ "Test already found to not be running on this host, placing back into queue."
+ )
+ place_message_in_queue(info, amqp_con)
+ return
+ # get the test pid
+ pid = get_test_pid(info["uuid"])
+ if pid is None:
+ # The test isn't running on this unit
+ # append this hostname to not-running-on
+ msg.channel.basic_ack(msg.delivery_tag)
+ if len(info["not-running-on"]) == (num_workers - 1):
+ logging.info(
+ "Job %s not found on any workers, not re-queueing."
+ % json.dumps(info)
+ )
+ return
+ info["not-running-on"].append(socket.getfqdn())
+ place_message_in_queue(info, amqp_con)
+ return
+ # Kill the process
+ if kill_process(pid, info["uuid"]):
+ logging.info("Job %s has been killed." % json.dumps(info))
+ else:
+ logging.error(
+ "Job %s couldn't be killed! Ignoring." % json.dumps(info)
+ )
+ msg.channel.basic_ack(msg.delivery_tag)
+
+
+if __name__ == "__main__":
+ logging.basicConfig(level=logging.INFO)
+ amqp_con = amqp_connect()
+ status_ch = amqp_con.channel()
+ status_ch.access_request("/complete", active=True, read=True, write=True)
+ status_ch.exchange_declare(
+ WRITER_EXCHANGE_NAME, "fanout", durable=True, auto_delete=False
+ )
+ queue_name = "tests-to-kill"
+ status_ch.queue_declare(queue_name, durable=True, auto_delete=False)
+ status_ch.queue_bind(queue_name, WRITER_EXCHANGE_NAME, queue_name)
+ logging.info("Listening to requests on %s", queue_name)
+ status_ch.basic_consume(
+ "", callback=lambda msg: process_message(msg, amqp_con)
+ )
+ while status_ch.callbacks:
+ status_ch.wait()
diff --git a/charms/focal/autopkgtest-cloud-worker/config.yaml b/charms/focal/autopkgtest-cloud-worker/config.yaml
index 3719fad..f374d48 100644
--- a/charms/focal/autopkgtest-cloud-worker/config.yaml
+++ b/charms/focal/autopkgtest-cloud-worker/config.yaml
@@ -119,3 +119,7 @@ options:
description: Percentage of workers that'll accept upstream tests.
This is useful to prioritise certain tests.
type: int
+ num-workers:
+ default: 3
+ description: The combined total of the number of cloud and lxd workers
+ type: int
diff --git a/charms/focal/autopkgtest-cloud-worker/reactive/autopkgtest_cloud_worker.py b/charms/focal/autopkgtest-cloud-worker/reactive/autopkgtest_cloud_worker.py
index c8874c3..4011346 100644
--- a/charms/focal/autopkgtest-cloud-worker/reactive/autopkgtest_cloud_worker.py
+++ b/charms/focal/autopkgtest-cloud-worker/reactive/autopkgtest_cloud_worker.py
@@ -506,6 +506,7 @@ def write_swift_config():
"config.changed.mirror",
"config.changed.net-name",
"config.changed.worker-upstream-percentage",
+ "config.changed.num-workers",
)
@when_any("config.set.nova-rcs", "config.set.lxd-remotes")
def write_worker_config():
@@ -533,6 +534,7 @@ def write_worker_config():
"worker_upstream_percentage": config().get(
"worker-upstream-percentage"
),
+ "num_workers": config().get("num-workers"),
},
"virt": {
"package_size_default": config().get("worker-default-flavor"),
diff --git a/charms/focal/autopkgtest-cloud-worker/units/test-killer.service b/charms/focal/autopkgtest-cloud-worker/units/test-killer.service
new file mode 100644
index 0000000..dcdf846
--- /dev/null
+++ b/charms/focal/autopkgtest-cloud-worker/units/test-killer.service
@@ -0,0 +1,13 @@
+[Unit]
+Description=Test killer
+StartLimitIntervalSec=60s
+StartLimitBurst=10
+
+[Service]
+User=ubuntu
+ExecStart=/home/ubuntu/autopkgtest-cloud/tools/test-killer
+Restart=on-failure
+RestartSec=1s
+
+[Install]
+WantedBy=autopkgtest.target
diff --git a/charms/focal/autopkgtest-web/config.yaml b/charms/focal/autopkgtest-web/config.yaml
index a60637a..ced70a9 100644
--- a/charms/focal/autopkgtest-web/config.yaml
+++ b/charms/focal/autopkgtest-web/config.yaml
@@ -57,3 +57,7 @@ options:
type: string
default: ~
description: "List of teams that are allowed to request autopkgtest tests."
+ admin-nicks:
+ type: string
+ default:
+ description: "Comma separated list of admin nicknames."
diff --git a/charms/focal/autopkgtest-web/reactive/autopkgtest_web.py b/charms/focal/autopkgtest-web/reactive/autopkgtest_web.py
index 1cc391d..fbe82da 100644
--- a/charms/focal/autopkgtest-web/reactive/autopkgtest_web.py
+++ b/charms/focal/autopkgtest-web/reactive/autopkgtest_web.py
@@ -31,6 +31,7 @@ CONFIG_DIR = pathlib.Path("/home/ubuntu/.config/autopkgtest-web/")
for parent in reversed(CONFIG_DIR.parents):
parent.mkdir(mode=0o770, exist_ok=True)
ALLOWED_REQUESTOR_TEAMS_PATH = CONFIG_DIR / "allowed-requestor-teams"
+ADMIN_NICKS_PATH = CONFIG_DIR / "admin-nicks"
PUBLIC_SWIFT_CREDS_PATH = os.path.expanduser("~ubuntu/public-swift-creds")
@@ -256,6 +257,7 @@ def set_up_web_config(apache):
# webcontrol CGI scripts
ScriptAlias /request.cgi {webcontrol_dir}/request.cgi/
+ ScriptAlias /test-manager.cgi {webcontrol_dir}/test-manager.cgi/
ScriptAlias /login {webcontrol_dir}/request.cgi/login
ScriptAlias /logout {webcontrol_dir}/request.cgi/logout
ScriptAlias /private-results {webcontrol_dir}/private-results.cgi/
@@ -284,6 +286,15 @@ def write_allowed_teams():
allowed_teams_path.write_text(allowed_requestor_teams, encoding="utf-8")
+@when_all(
+ "config.changed.admin-nicks",
+ "config.set.admin-nicks",
+)
+def write_admin_nicks():
+ admin_nicks = config().get("admin-nicks")
+ ADMIN_NICKS_PATH.write_text(admin_nicks, encoding="utf-8")
+
+
@when_all("config.changed.github-secrets", "config.set.github-secrets")
def write_github_secrets():
status.maintenance("Writing github secrets")
diff --git a/charms/focal/autopkgtest-web/webcontrol/browse.cgi b/charms/focal/autopkgtest-web/webcontrol/browse.cgi
index 309fb82..a1c88c3 100755
--- a/charms/focal/autopkgtest-web/webcontrol/browse.cgi
+++ b/charms/focal/autopkgtest-web/webcontrol/browse.cgi
@@ -13,27 +13,17 @@ import flask
from helpers.admin import select_abnormally_long_jobs
from helpers.exceptions import RunningJSONNotFound
from helpers.utils import (
+ get_admin_nicks,
get_all_releases,
get_autopkgtest_cloud_conf,
get_supported_releases,
- setup_key,
+ initialise_app,
)
-from werkzeug.middleware.proxy_fix import ProxyFix
# Initialize app
-PATH = os.path.join(
- os.path.sep, os.getenv("XDG_RUNTIME_DIR", "/run"), "autopkgtest_webcontrol"
-)
-os.makedirs(PATH, exist_ok=True)
-app = flask.Flask("browse")
-# we don't want a long cache, as we only serve files that are regularly updated
+PATH, app, secret_path, _ = initialise_app("browse")
app.config["SEND_FILE_MAX_AGE_DEFAULT"] = 60
-app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1)
-
-secret_path = os.path.join(PATH, "secret_key")
-setup_key(app, secret_path)
-
db_con = None
swift_container_url = None
@@ -326,6 +316,7 @@ def package_overview(package, _=None):
}
}
}
+ show_stop = flask.session.get("nickname", "") in get_admin_nicks()
return render(
"browse-package.html",
@@ -341,6 +332,7 @@ def package_overview(package, _=None):
title_suffix="- %s" % package,
running=running_info,
queues_info=queues_info,
+ show_stop=show_stop,
)
@@ -491,6 +483,7 @@ def package_release_arch(package, release, arch, _=None):
),
)
+ show_stop = flask.session.get("nickname", "") in get_admin_nicks()
return render(
"browse-results.html",
package=package,
@@ -498,6 +491,7 @@ def package_release_arch(package, release, arch, _=None):
arch=arch,
package_results=results,
title_suffix="- %s/%s/%s" % (package, release, arch),
+ show_stop=show_stop,
)
@@ -573,6 +567,7 @@ def running():
running_count = 0
for pkg in packages:
running_count += len(running_info[pkg].keys())
+ show_stop = flask.session.get("nickname", "") in get_admin_nicks()
return render(
"browse-running.html",
@@ -582,6 +577,7 @@ def running():
queues_lengths=queues_lengths,
running=running_info,
running_count=running_count,
+ show_stop=show_stop,
)
@@ -591,9 +587,11 @@ def admin():
pruned_running_info = select_abnormally_long_jobs(
running_info, get_test_id=get_test_id, db_con=db_con
)
+ show_stop = flask.session.get("nickname", "") in get_admin_nicks()
return render(
"browse-admin.html",
running=pruned_running_info,
+ show_stop=show_stop,
)
diff --git a/charms/focal/autopkgtest-web/webcontrol/helpers/utils.py b/charms/focal/autopkgtest-web/webcontrol/helpers/utils.py
index 4e26eb8..aed7730 100644
--- a/charms/focal/autopkgtest-web/webcontrol/helpers/utils.py
+++ b/charms/focal/autopkgtest-web/webcontrol/helpers/utils.py
@@ -14,12 +14,45 @@ import typing
# introduced in python3.7, we use 3.8
from dataclasses import dataclass
+from html import escape as _escape
import distro_info
+from flask import Flask
+from flask_openid import OpenID
+from werkzeug.middleware.proxy_fix import ProxyFix
sqlite3.paramstyle = "named"
+def get_admin_nicks() -> typing.List[str]:
+ admin_nicks_path = "/home/ubuntu/.config/autopkgtest-web/admin-nicks"
+ try:
+ return pathlib.Path(admin_nicks_path).read_text().split(",")
+ except FileNotFoundError as _:
+ return []
+
+
+def initialise_app(app_name: str) -> tuple[str, Flask, str, OpenID]:
+ PATH = os.path.join(
+ os.path.sep,
+ os.getenv("XDG_RUNTIME_DIR", "/run"),
+ "autopkgtest_webcontrol",
+ )
+ os.makedirs(PATH, exist_ok=True)
+ app = Flask(app_name)
+ app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1)
+ # keep secret persistent between CGI invocations
+ secret_path = os.path.join(PATH, "secret_key")
+ setup_key(app, secret_path)
+ oid = OpenID(app, os.path.join(PATH, "openid"), safe_roots=[])
+ return PATH, app, secret_path, oid
+
+
+def maybe_escape(value: str) -> str:
+ """Escape the value if it is True-ish"""
+ return _escape(value) if value else value
+
+
@dataclass
class SqliteWriterConfig:
writer_exchange_name = "sqlite-write-me.fanout"
@@ -220,3 +253,16 @@ def get_test_id(db_con, release, arch, src):
get_test_id._cache = {}
+
+HTML = """
+<!doctype html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>Autopkgtest Test Request</title>
+</head>
+<body>
+{}
+</body>
+</html>
+"""
diff --git a/charms/focal/autopkgtest-web/webcontrol/request/app.py b/charms/focal/autopkgtest-web/webcontrol/request/app.py
index 4fca679..8ee33d4 100644
--- a/charms/focal/autopkgtest-web/webcontrol/request/app.py
+++ b/charms/focal/autopkgtest-web/webcontrol/request/app.py
@@ -5,33 +5,17 @@ import logging
import os
import pathlib
from collections import ChainMap
-from html import escape as _escape
-from flask import Flask, redirect, request, session
-from flask_openid import OpenID
+from flask import redirect, request, session
from helpers.exceptions import WebControlException
-from helpers.utils import setup_key
+from helpers.utils import HTML, initialise_app, maybe_escape
from request.submit import Submit
-from werkzeug.middleware.proxy_fix import ProxyFix
# map multiple GET vars to AMQP JSON request parameter list
MULTI_ARGS = {"trigger": "triggers", "ppa": "ppas", "env": "env"}
EMPTY = ""
-HTML = """
-<!doctype html>
-<html>
-<head>
-<meta charset="utf-8">
-<title>Autopkgtest Test Request</title>
-</head>
-<body>
-{}
-</body>
-</html>
-"""
-
LOGIN = """
<form action="/login" method="post">
<input type="submit" value="Log in with Ubuntu SSO">
@@ -106,11 +90,6 @@ def invalid(inv_exception, code=400):
return HTML.format(html), code
-def maybe_escape(value):
- """Escape the value if it is True-ish"""
- return _escape(value) if value else value
-
-
def get_api_keys():
"""
API keys is a json file like this:
@@ -132,17 +111,7 @@ def get_api_keys():
# Initialize app
-PATH = os.path.join(
- os.path.sep, os.getenv("XDG_RUNTIME_DIR", "/run"), "autopkgtest_webcontrol"
-)
-os.makedirs(PATH, exist_ok=True)
-app = Flask("request")
-app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1)
-# keep secret persistent between CGI invocations
-secret_path = os.path.join(PATH, "secret_key")
-setup_key(app, secret_path)
-oid = OpenID(app, os.path.join(PATH, "openid"), safe_roots=[])
-
+PATH, app, secret_path, oid = initialise_app("request")
#
# Flask routes
diff --git a/charms/focal/autopkgtest-web/webcontrol/request/tests/test_app.py b/charms/focal/autopkgtest-web/webcontrol/request/tests/test_app.py
index 6fbcef5..aa0c407 100644
--- a/charms/focal/autopkgtest-web/webcontrol/request/tests/test_app.py
+++ b/charms/focal/autopkgtest-web/webcontrol/request/tests/test_app.py
@@ -7,6 +7,7 @@ from unittest.mock import mock_open, patch
import request.app
from helpers.exceptions import WebControlException
+from helpers.utils import setup_key
from request.submit import Submit
@@ -33,7 +34,7 @@ class DistroRequestTests(AppTestBase):
"""Secret key gets saved and loaded between app restarts."""
orig_key = request.app.app.secret_key
- request.app.setup_key(request.app, request.app.secret_path)
+ setup_key(request.app, request.app.secret_path)
self.assertEqual(request.app.app.secret_key, orig_key)
@patch("request.app.Submit")
diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html b/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html
index 72d5d5b..07c5ff2 100644
--- a/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html
+++ b/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html
@@ -9,7 +9,7 @@
<!-- Running tests -->
{% for p, info in running.items()|sort %}
<h2 id="pkg-{{ p }}"><a href="/packages/{{ p }}">{{ p }}</a></h2>
- {{ macros.display_running_job(p, info) }}
+ {{ macros.display_running_job(p, info, show_stop) }}
{% endfor %}
{% endblock %}
diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/browse-package.html b/charms/focal/autopkgtest-web/webcontrol/templates/browse-package.html
index 165cfd8..eff4667 100644
--- a/charms/focal/autopkgtest-web/webcontrol/templates/browse-package.html
+++ b/charms/focal/autopkgtest-web/webcontrol/templates/browse-package.html
@@ -32,7 +32,7 @@
<h3>Running tests</h3>
{% for p, info in running.items()|sort %}
- {{ macros.display_running_job(p, info) }}
+ {{ macros.display_running_job(p, info, show_stop) }}
{% endfor %}
<h3>Queued tests</h3>
diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/browse-results.html b/charms/focal/autopkgtest-web/webcontrol/templates/browse-results.html
index fadff6d..57af00f 100644
--- a/charms/focal/autopkgtest-web/webcontrol/templates/browse-results.html
+++ b/charms/focal/autopkgtest-web/webcontrol/templates/browse-results.html
@@ -58,6 +58,11 @@
{% endif %}
{% endif %}
</td>
+ <td class="nowrap">
+ {% if show_stop and row[6] == "running" and row[10] not in ["-", ""] %}
+ <a href="{{ base_url }}test-manager.cgi?uuid={{ row[10] }}">☠</a> <!-- Displays skull and crossbones-->
+ {% endif %}
+ </td>
</tr>
{% endfor %}
</table>
diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/browse-running.html b/charms/focal/autopkgtest-web/webcontrol/templates/browse-running.html
index 48080ce..091b52a 100644
--- a/charms/focal/autopkgtest-web/webcontrol/templates/browse-running.html
+++ b/charms/focal/autopkgtest-web/webcontrol/templates/browse-running.html
@@ -45,7 +45,7 @@
<!-- Running tests -->
{% for p, info in running.items()|sort %}
<h2 id="pkg-{{ p }}"><a href="/packages/{{ p }}">{{ p }}</a></h2>
- {{ macros.display_running_job(p, info) }}
+ {{ macros.display_running_job(p, info, show_stop) }}
{% endfor %}
<!-- queue contents -->
diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/macros.html b/charms/focal/autopkgtest-web/webcontrol/templates/macros.html
index 941dc77..f75ebcb 100644
--- a/charms/focal/autopkgtest-web/webcontrol/templates/macros.html
+++ b/charms/focal/autopkgtest-web/webcontrol/templates/macros.html
@@ -1,4 +1,4 @@
-{% macro display_running_job(package, info) -%}
+{% macro display_running_job(package, info, show_stop) -%}
{% for runhash, relinfo in info.items() %}
{% for release, archinfo in relinfo.items() %}
{% for arch, (params, duration, logtail) in archinfo.items() %}
@@ -15,6 +15,11 @@
{% endif %}
{% endfor %}
<tr><th>Running for:</th><td>{{ duration//3600 }}h {{ duration % 3600//60 }}m {{ duration % 60 }}s ({{ duration }}s)</td></tr>
+ {% if show_stop %}
+ {% if "uuid" in params.keys() %}
+ <tr><td><a href="{{ base_url }}test-manager.cgi?uuid={{ params.get("uuid") }}">Stop this test</a></td></tr>
+ {% endif %}
+ {% endif %}
</table>
<pre>
{{ logtail }}
diff --git a/charms/focal/autopkgtest-web/webcontrol/test-manager.cgi b/charms/focal/autopkgtest-web/webcontrol/test-manager.cgi
new file mode 100755
index 0000000..9df3db3
--- /dev/null
+++ b/charms/focal/autopkgtest-web/webcontrol/test-manager.cgi
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+
+"""Run test-manager app as CGI script """
+
+from wsgiref.handlers import CGIHandler
+
+from test_manager.app import app
+
+if __name__ == "__main__":
+ app.config["DEBUG"] = True
+ CGIHandler().run(app)
diff --git a/charms/focal/autopkgtest-web/webcontrol/test_manager/__init__.py b/charms/focal/autopkgtest-web/webcontrol/test_manager/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/charms/focal/autopkgtest-web/webcontrol/test_manager/__init__.py
diff --git a/charms/focal/autopkgtest-web/webcontrol/test_manager/app.py b/charms/focal/autopkgtest-web/webcontrol/test_manager/app.py
new file mode 100644
index 0000000..8a62aae
--- /dev/null
+++ b/charms/focal/autopkgtest-web/webcontrol/test_manager/app.py
@@ -0,0 +1,135 @@
+"""
+test-manager is an app for autopkgtest-web which sends kill requests to
+the worker units, detailing the test uuid.
+
+The worker units then kill the test with the matching uuid.
+
+On the running page, admins will have a hyperlink under running jobs which, when clicked,
+will send the kill request.
+
+Before sending the kill request, test_manager checks that the uuid is indeed in running.json
+
+After sending the kill request, the request is picked up by a systemd unit named "test-killer"
+on the cloud worker units.
+
+This unit, through all the cloud worker units, will find which unit the test is running on,
+and kill the test, removing the test request from the queue and making the worker unit move
+on to the next test in the queue.
+"""
+
+
+import configparser
+import json
+import logging
+import os
+import pathlib
+import urllib
+
+import amqplib.client_0_8 as amqp
+from flask import request, session
+from helpers.exceptions import RunningJSONNotFound
+from helpers.utils import (
+ HTML,
+ get_admin_nicks,
+ get_all_releases,
+ initialise_app,
+ maybe_escape,
+)
+
+ALL_UBUNTU_RELEASES = get_all_releases()
+
+RUNNING_FP = "/run/amqp-status-collector/running.json"
+RUNNING_FILE = pathlib.Path("/run/amqp-status-collector/running.json")
+WRITER_EXCHANGE_NAME = "stop-running.fanout"
+
+
+def submit_to_queue(message: dict):
+ """
+ Submits a dictionary as an amqp message to the WRITER_EXCHANGE_NAME exchange
+
+ :param message: Dictionary to be converted to an amqp.Message and placed into the queue
+ """
+ amqp_con = amqp_connect()
+ complete_amqp = amqp_con.channel()
+ complete_amqp.access_request(
+ "/complete", active=True, read=False, write=True
+ )
+ complete_amqp.exchange_declare(
+ WRITER_EXCHANGE_NAME, "fanout", durable=True, auto_delete=False
+ )
+ complete_amqp.basic_publish(
+ amqp.Message(json.dumps(message), delivery_mode=2),
+ WRITER_EXCHANGE_NAME,
+ "",
+ )
+
+
+# THIS CAN BE REFACTORED AFTER THE AUTO-QUEUE-CLEANUP MP IS MERGED!!!
+# AMQP_CONNECT IS NOW SHARED FUNCTION IN HELPERS/UTILS.PY
+def amqp_connect() -> amqp.Connection:
+ """Connect to AMQP server"""
+ cp = configparser.ConfigParser()
+ cp.read(os.path.expanduser("~ubuntu/autopkgtest-cloud.conf"))
+ amqp_uri = cp["amqp"]["uri"]
+ parts = urllib.parse.urlsplit(amqp_uri, allow_fragments=False)
+ amqp_con = amqp.Connection(
+ parts.hostname, userid=parts.username, password=parts.password
+ )
+ logging.info(
+ "Connected to AMQP server at %s@%s" % (parts.username, parts.hostname)
+ )
+ return amqp_con
+
+
+PATH, app, secret_path, oid = initialise_app("test_manager")
+
+
+@app.route("/", methods=["GET", "POST"])
+def index_root():
+ """Handle stop test requests"""
+ session.permanent = True
+ nick = maybe_escape(session.get("nickname"))
+ if nick not in get_admin_nicks():
+ return (
+ HTML.format(
+ (
+ "<p>You are not an admin. You are not "
+ "allowed to use this endpoint.</p>"
+ )
+ ),
+ 200,
+ )
+ params = {
+ maybe_escape(k): maybe_escape(v) for k, v in request.args.items()
+ }
+ if list(params.keys()) != ["uuid"]:
+ return (
+ HTML.format(
+ "<p>You have passed %s, please only pass the uuid</p>"
+ % ",".join(params.keys())
+ ),
+ 200,
+ )
+ if not RUNNING_FILE.is_file():
+ raise RunningJSONNotFound
+ running_data = json.loads(RUNNING_FILE.read_text())
+ if params["uuid"] not in json.dumps(running_data):
+ return (
+ HTML.format(
+ "<p>uuid %s not found in running jobs</p>" % params["uuid"]
+ ),
+ 200,
+ )
+ queue_message = {
+ "uuid": params["uuid"],
+ "not-running-on": [],
+ }
+ submit_to_queue(queue_message)
+ while params["uuid"] in RUNNING_FILE.read_text():
+ pass
+ return (
+ HTML.format(
+ "<p>Test with uuid %s has been killed.</p>" % params["uuid"]
+ ),
+ 200,
+ )
diff --git a/mojo/service-bundle b/mojo/service-bundle
index 6da0b03..02c95e7 100644
--- a/mojo/service-bundle
+++ b/mojo/service-bundle
@@ -1,11 +1,15 @@
{%- if stage_name == "production" %}
{%- set releases = "trusty xenial bionic focal jammy mantic noble" %}
{%- set channel = "latest/stable" %}
+ {%- set num_cloud = 2 %}
{%- elif stage_name == "staging" or stage_name == "devel" %}
{%- set releases = "focal jammy mantic noble" %}
{%- set channel = "latest/edge" %}
+ {%- set num_cloud = 1 %}
{%- endif %}
+{%- set num_lxd = 1 %}
+
{%- if stage_name == "production" %}
{%- set hostname = "autopkgtest.ubuntu.com" %}
{%- elif stage_name == "staging" %}
@@ -24,17 +28,14 @@ applications:
autopkgtest-cloud-worker:
charm: ubuntu-release-autopkgtest-cloud-worker
channel: {{ channel }}
-{%- if stage_name == "production" or stage_name == "staging" %}
- num_units: 2
-{%- else %}
- num_units: 1
-{%- endif %}
+ num_units: {{ num_cloud }}
constraints: mem=16G cores=8 root-disk=40G
{%- if stage_name == "production" or stage_name == "staging" %}
storage:
tmp: 350G
{%- endif %}
options: &common-options
+ num-workers: {{ num_cloud + num_lxd }}
swift-password: include-file://{{local_dir}}/swift_password
releases: {{ releases }}
influxdb-hostname: include-file://{{ local_dir }}/influx-hostname.txt
@@ -132,7 +133,7 @@ applications:
autopkgtest-lxd-worker:
charm: ubuntu-release-autopkgtest-cloud-worker
channel: {{ channel }}
- num_units: 1
+ num_units: {{ num_lxd }}
constraints: mem=16G cores=8 root-disk=40G
{%- if stage_name == "production" or stage_name == "staging" %}
storage:
@@ -141,6 +142,7 @@ applications:
options:
<<: *common-options
worker-args: lxd -r $LXD_REMOTE $LXD_REMOTE:autopkgtest/ubuntu/$RELEASE/$ARCHITECTURE
+ num-workers: {{ num_cloud + num_lxd }}
{%- if stage_name == "production" or stage_name == "staging" %}
worker-setup-command2: ln -s /dev/null /etc/systemd/system/bluetooth.service; printf "http_proxy={{ http_proxy }}\nhttps_proxy={{ https_proxy }}\nno_proxy={{ no_proxy }}\n" >> /etc/environment
{%- endif %}
@@ -206,6 +208,7 @@ applications:
canonical-security
canonical-server
canonical-ubuntu-qa
+ admin-nicks: andersson123,brian-murray,hyask,paride,sil2000,vorlon
{%- if stage_name == "production" %}
{%- set storage_host_internal = "objectstorage.prodstack5.canonical.com:443" %}
{%- set storage_path_internal = "/swift/v1/AUTH_0f9aae918d5b4744bf7b827671c86842" %}
Follow ups
-
Re: [Merge] ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master
From: Tim Andersson, 2024-04-29
-
Re: [Merge] ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master
From: Tim Andersson, 2024-04-29
-
Re: [Merge] ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master
From: Tim Andersson, 2024-04-26
-
Re: [Merge] ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master
From: Tim Andersson, 2024-04-26
-
Re: [Merge] ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master
From: Tim Andersson, 2024-04-26
-
Re: [Merge] ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master
From: Tim Andersson, 2024-04-25
-
Re: [Merge] ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master
From: Tim Andersson, 2024-04-25
-
[Merge] ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master
From: Tim Andersson, 2024-04-25
-
Re: [Merge] ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master
From: Tim Andersson, 2024-04-25
-
Re: [Merge] ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master
From: Tim Andersson, 2024-04-24