← Back to team overview

canonical-ubuntu-qa team mailing list archive

[Merge] ~hyask/autopkgtest-cloud:skia/admin_page into autopkgtest-cloud:master

 

Skia has proposed merging ~hyask/autopkgtest-cloud:skia/admin_page into autopkgtest-cloud:master.

Requested reviews:
  Canonical's Ubuntu QA (canonical-ubuntu-qa)

For more details, see:
https://code.launchpad.net/~hyask/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/466734

Improve the admin page
-- 
Your team Canonical's Ubuntu QA is requested to review the proposed merge of ~hyask/autopkgtest-cloud:skia/admin_page into autopkgtest-cloud:master.
diff --git a/charms/focal/autopkgtest-web/webcontrol/browse.cgi b/charms/focal/autopkgtest-web/webcontrol/browse.cgi
index 309fb82..c1b1248 100755
--- a/charms/focal/autopkgtest-web/webcontrol/browse.cgi
+++ b/charms/focal/autopkgtest-web/webcontrol/browse.cgi
@@ -10,7 +10,12 @@ from collections import OrderedDict
 from wsgiref.handlers import CGIHandler
 
 import flask
-from helpers.admin import select_abnormally_long_jobs
+from helpers.admin import (
+    DELTA_BETWEEN_LAST_LOG_AND_DURATION,
+    DURATION_FACTOR_BEFORE_CONSIDERED_ABNORMAL,
+    select_abnormally_long_jobs,
+    select_duration_mismatch,
+)
 from helpers.exceptions import RunningJSONNotFound
 from helpers.utils import (
     get_all_releases,
@@ -588,12 +593,16 @@ def running():
 @app.route("/admin")
 def admin():
     running_info = get_running_jobs()
-    pruned_running_info = select_abnormally_long_jobs(
+    too_long_jobs = select_abnormally_long_jobs(
         running_info, get_test_id=get_test_id, db_con=db_con
     )
+    stuck_jobs = select_duration_mismatch(running_info)
     return render(
         "browse-admin.html",
-        running=pruned_running_info,
+        too_long_factor=DURATION_FACTOR_BEFORE_CONSIDERED_ABNORMAL,
+        too_long_jobs=too_long_jobs,
+        stuck_factor=DELTA_BETWEEN_LAST_LOG_AND_DURATION,
+        stuck_jobs=stuck_jobs,
     )
 
 
diff --git a/charms/focal/autopkgtest-web/webcontrol/helpers/admin.py b/charms/focal/autopkgtest-web/webcontrol/helpers/admin.py
index afc406b..0c77003 100644
--- a/charms/focal/autopkgtest-web/webcontrol/helpers/admin.py
+++ b/charms/focal/autopkgtest-web/webcontrol/helpers/admin.py
@@ -1,32 +1,59 @@
-MAX_DURATION_FACTOR_BEFORE_CONSIDERED_WEIRD = 5
+DURATION_FACTOR_BEFORE_CONSIDERED_ABNORMAL = 8
+DELTA_BETWEEN_LAST_LOG_AND_DURATION = 30  # minutes
 
 
 def select_abnormally_long_jobs(running_info, get_test_id, db_con):
-    global MAX_DURATION_FACTOR_BEFORE_CONSIDERED_WEIRD
-    pruned_running_info = {}
+    selected = {}
     for running_pkg, running_dict in running_info.items():
         for skey, sval in running_dict.items():
             for release, values in sval.items():
                 for arch, vals in values.items():
-                    duration = vals[1]
-                    test_id = get_test_id(release, arch, running_pkg)
-                    if test_id is None:
-                        continue
-                    row = db_con.execute(
-                        "SELECT AVG(duration) FROM result WHERE test_id=?",
-                        (test_id,),
-                    )
-                    duration_avg = row.fetchone()[0]
-                    if (
-                        (duration)
-                        > duration_avg
-                        * MAX_DURATION_FACTOR_BEFORE_CONSIDERED_WEIRD
-                    ):
-                        pruned_running_info.setdefault(
-                            running_pkg, {}
-                        ).setdefault(skey, {}).setdefault(
-                            release, {}
-                        ).setdefault(
-                            arch, vals
+                    try:
+                        duration = vals[1]
+                        test_id = get_test_id(release, arch, running_pkg)
+                        if test_id is None:
+                            continue
+                        row = db_con.execute(
+                            "SELECT AVG(duration) FROM result WHERE test_id=?",
+                            (test_id,),
                         )
-    return pruned_running_info
+                        duration_avg = row.fetchone()[0]
+                        if (
+                            (duration)
+                            > duration_avg
+                            * DURATION_FACTOR_BEFORE_CONSIDERED_ABNORMAL
+                        ):
+                            selected.setdefault(running_pkg, {}).setdefault(
+                                skey, {}
+                            ).setdefault(release, {}).setdefault(arch, vals)
+                    # Whatever happens when trying to select or not the job,
+                    # let's not care and handle the next one.
+                    except Exception:
+                        pass
+    return selected
+
+
+def select_duration_mismatch(running_info):
+    selected = {}
+    for running_pkg, running_dict in running_info.items():
+        for skey, sval in running_dict.items():
+            for release, values in sval.items():
+                for arch, vals in values.items():
+                    try:
+                        duration = vals[1]
+                        logs = vals[2]
+                        last_printed_duration = int(
+                            logs.split("\n")[-2].split(" ")[0][:-1]
+                        )
+                        if (
+                            abs(duration - last_printed_duration)
+                            > DELTA_BETWEEN_LAST_LOG_AND_DURATION * 60
+                        ):
+                            selected.setdefault(running_pkg, {}).setdefault(
+                                skey, {}
+                            ).setdefault(release, {}).setdefault(arch, vals)
+                    # Whatever happens when trying to select or not the job,
+                    # let's not care and handle the next one.
+                    except Exception:
+                        pass
+    return selected
diff --git a/charms/focal/autopkgtest-web/webcontrol/helpers/tests.py b/charms/focal/autopkgtest-web/webcontrol/helpers/tests.py
index 1c26fb9..8053282 100644
--- a/charms/focal/autopkgtest-web/webcontrol/helpers/tests.py
+++ b/charms/focal/autopkgtest-web/webcontrol/helpers/tests.py
@@ -196,7 +196,29 @@ def populate_dummy_running_cache(path):
 """,
                             ]
                         }
-                    }
+                    },
+                    "hello-hash2": {
+                        supported_releases[4]: {
+                            "amd64": [
+                                {
+                                    "submit-time": "2024-02-21 11:01:21",
+                                    "triggers": [
+                                        "hello2/1.2.3-0ubuntu2",
+                                    ],
+                                    "uuid": "2368aa9c-ac08-46a3-a5fd-6247d0d2021c",
+                                },
+                                5904,
+                                """
+3071s hello2/test_XYZ.hello    [ 54%]
+3153s hello2/test_XYZ.hello    [ 64%]
+3271s hello2/test_XYZ.hello    [ 74%]
+3292s hello2/test_XYZ.hello    [ 84%]
+3493s hello2/test_XYZ.hello    [ 94%]
+3494s hello2/test_XYZ.hello    [ 98%]
+""",
+                            ]
+                        }
+                    },
                 },
             },
             f,
diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html b/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html
index 72d5d5b..014ff81 100644
--- a/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html
+++ b/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html
@@ -7,7 +7,16 @@
   <p>This page is simply a bunch of heuristics filtering all running jobs to try to get the problematic ones. Feel free to come help improve the heuristics <a href="https://code.launchpad.net/~ubuntu-release/autopkgtest-cloud/+git/autopkgtest-cloud/+ref/master";>here.</a></p>
 
   <!-- Running tests -->
-  {% for p, info in running.items()|sort %}
+  <h2>Jobs taking abnormally long time</h2>
+  <p>More than {{ too_long_factor }} times the average time.</p>
+  {% for p, info in too_long_jobs.items()|sort %}
+    <h2 id="pkg-{{ p }}"><a href="/packages/{{ p }}">{{ p }}</a></h2>
+    {{ macros.display_running_job(p, info) }}
+  {% endfor %}
+
+  <h2>Jobs likely stuck</h2>
+  <p>More than {{ stuck_factor }} minutes of delta between recorded duration and last log line.</p>
+  {% for p, info in stuck_jobs.items()|sort %}
     <h2 id="pkg-{{ p }}"><a href="/packages/{{ p }}">{{ p }}</a></h2>
     {{ macros.display_running_job(p, info) }}
   {% endfor %}