canonical-ubuntu-qa team mailing list archive
-
canonical-ubuntu-qa team
-
Mailing list archive
-
Message #04179
[Merge] ~hyask/autopkgtest-cloud:skia/admin_page into autopkgtest-cloud:master
Skia has proposed merging ~hyask/autopkgtest-cloud:skia/admin_page into autopkgtest-cloud:master.
Requested reviews:
Canonical's Ubuntu QA (canonical-ubuntu-qa)
For more details, see:
https://code.launchpad.net/~hyask/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/466734
Improve the admin page
--
Your team Canonical's Ubuntu QA is requested to review the proposed merge of ~hyask/autopkgtest-cloud:skia/admin_page into autopkgtest-cloud:master.
diff --git a/charms/focal/autopkgtest-web/webcontrol/browse.cgi b/charms/focal/autopkgtest-web/webcontrol/browse.cgi
index 309fb82..c1b1248 100755
--- a/charms/focal/autopkgtest-web/webcontrol/browse.cgi
+++ b/charms/focal/autopkgtest-web/webcontrol/browse.cgi
@@ -10,7 +10,12 @@ from collections import OrderedDict
from wsgiref.handlers import CGIHandler
import flask
-from helpers.admin import select_abnormally_long_jobs
+from helpers.admin import (
+ DELTA_BETWEEN_LAST_LOG_AND_DURATION,
+ DURATION_FACTOR_BEFORE_CONSIDERED_ABNORMAL,
+ select_abnormally_long_jobs,
+ select_duration_mismatch,
+)
from helpers.exceptions import RunningJSONNotFound
from helpers.utils import (
get_all_releases,
@@ -588,12 +593,16 @@ def running():
@app.route("/admin")
def admin():
running_info = get_running_jobs()
- pruned_running_info = select_abnormally_long_jobs(
+ too_long_jobs = select_abnormally_long_jobs(
running_info, get_test_id=get_test_id, db_con=db_con
)
+ stuck_jobs = select_duration_mismatch(running_info)
return render(
"browse-admin.html",
- running=pruned_running_info,
+ too_long_factor=DURATION_FACTOR_BEFORE_CONSIDERED_ABNORMAL,
+ too_long_jobs=too_long_jobs,
+ stuck_factor=DELTA_BETWEEN_LAST_LOG_AND_DURATION,
+ stuck_jobs=stuck_jobs,
)
diff --git a/charms/focal/autopkgtest-web/webcontrol/helpers/admin.py b/charms/focal/autopkgtest-web/webcontrol/helpers/admin.py
index afc406b..0c77003 100644
--- a/charms/focal/autopkgtest-web/webcontrol/helpers/admin.py
+++ b/charms/focal/autopkgtest-web/webcontrol/helpers/admin.py
@@ -1,32 +1,59 @@
-MAX_DURATION_FACTOR_BEFORE_CONSIDERED_WEIRD = 5
+DURATION_FACTOR_BEFORE_CONSIDERED_ABNORMAL = 8
+DELTA_BETWEEN_LAST_LOG_AND_DURATION = 30 # minutes
def select_abnormally_long_jobs(running_info, get_test_id, db_con):
- global MAX_DURATION_FACTOR_BEFORE_CONSIDERED_WEIRD
- pruned_running_info = {}
+ selected = {}
for running_pkg, running_dict in running_info.items():
for skey, sval in running_dict.items():
for release, values in sval.items():
for arch, vals in values.items():
- duration = vals[1]
- test_id = get_test_id(release, arch, running_pkg)
- if test_id is None:
- continue
- row = db_con.execute(
- "SELECT AVG(duration) FROM result WHERE test_id=?",
- (test_id,),
- )
- duration_avg = row.fetchone()[0]
- if (
- (duration)
- > duration_avg
- * MAX_DURATION_FACTOR_BEFORE_CONSIDERED_WEIRD
- ):
- pruned_running_info.setdefault(
- running_pkg, {}
- ).setdefault(skey, {}).setdefault(
- release, {}
- ).setdefault(
- arch, vals
+ try:
+ duration = vals[1]
+ test_id = get_test_id(release, arch, running_pkg)
+ if test_id is None:
+ continue
+ row = db_con.execute(
+ "SELECT AVG(duration) FROM result WHERE test_id=?",
+ (test_id,),
)
- return pruned_running_info
+ duration_avg = row.fetchone()[0]
+ if (
+ (duration)
+ > duration_avg
+ * DURATION_FACTOR_BEFORE_CONSIDERED_ABNORMAL
+ ):
+ selected.setdefault(running_pkg, {}).setdefault(
+ skey, {}
+ ).setdefault(release, {}).setdefault(arch, vals)
+ # Whatever happens when trying to select or not the job,
+ # let's not care and handle the next one.
+ except Exception:
+ pass
+ return selected
+
+
+def select_duration_mismatch(running_info):
+ selected = {}
+ for running_pkg, running_dict in running_info.items():
+ for skey, sval in running_dict.items():
+ for release, values in sval.items():
+ for arch, vals in values.items():
+ try:
+ duration = vals[1]
+ logs = vals[2]
+ last_printed_duration = int(
+ logs.split("\n")[-2].split(" ")[0][:-1]
+ )
+ if (
+ abs(duration - last_printed_duration)
+ > DELTA_BETWEEN_LAST_LOG_AND_DURATION * 60
+ ):
+ selected.setdefault(running_pkg, {}).setdefault(
+ skey, {}
+ ).setdefault(release, {}).setdefault(arch, vals)
+ # Whatever happens when trying to select or not the job,
+ # let's not care and handle the next one.
+ except Exception:
+ pass
+ return selected
diff --git a/charms/focal/autopkgtest-web/webcontrol/helpers/tests.py b/charms/focal/autopkgtest-web/webcontrol/helpers/tests.py
index 1c26fb9..8053282 100644
--- a/charms/focal/autopkgtest-web/webcontrol/helpers/tests.py
+++ b/charms/focal/autopkgtest-web/webcontrol/helpers/tests.py
@@ -196,7 +196,29 @@ def populate_dummy_running_cache(path):
""",
]
}
- }
+ },
+ "hello-hash2": {
+ supported_releases[4]: {
+ "amd64": [
+ {
+ "submit-time": "2024-02-21 11:01:21",
+ "triggers": [
+ "hello2/1.2.3-0ubuntu2",
+ ],
+ "uuid": "2368aa9c-ac08-46a3-a5fd-6247d0d2021c",
+ },
+ 5904,
+ """
+3071s hello2/test_XYZ.hello [ 54%]
+3153s hello2/test_XYZ.hello [ 64%]
+3271s hello2/test_XYZ.hello [ 74%]
+3292s hello2/test_XYZ.hello [ 84%]
+3493s hello2/test_XYZ.hello [ 94%]
+3494s hello2/test_XYZ.hello [ 98%]
+""",
+ ]
+ }
+ },
},
},
f,
diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html b/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html
index 72d5d5b..014ff81 100644
--- a/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html
+++ b/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html
@@ -7,7 +7,16 @@
<p>This page is simply a bunch of heuristics filtering all running jobs to try to get the problematic ones. Feel free to come help improve the heuristics <a href="https://code.launchpad.net/~ubuntu-release/autopkgtest-cloud/+git/autopkgtest-cloud/+ref/master">here.</a></p>
<!-- Running tests -->
- {% for p, info in running.items()|sort %}
+ <h2>Jobs taking abnormally long time</h2>
+ <p>More than {{ too_long_factor }} times the average time.</p>
+ {% for p, info in too_long_jobs.items()|sort %}
+ <h2 id="pkg-{{ p }}"><a href="/packages/{{ p }}">{{ p }}</a></h2>
+ {{ macros.display_running_job(p, info) }}
+ {% endfor %}
+
+ <h2>Jobs likely stuck</h2>
+ <p>More than {{ stuck_factor }} minutes of delta between recorded duration and last log line.</p>
+ {% for p, info in stuck_jobs.items()|sort %}
<h2 id="pkg-{{ p }}"><a href="/packages/{{ p }}">{{ p }}</a></h2>
{{ macros.display_running_job(p, info) }}
{% endfor %}