← Back to team overview

canonical-ubuntu-qa team mailing list archive

[Merge] ~hyask/autopkgtest-cloud:skia/improve_update-github-jobs into autopkgtest-cloud:master

 

Skia has proposed merging ~hyask/autopkgtest-cloud:skia/improve_update-github-jobs into autopkgtest-cloud:master.

Requested reviews:
  Canonical's Ubuntu QA (canonical-ubuntu-qa)

For more details, see:
https://code.launchpad.net/~hyask/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/470013

Various improvements in `update-github-jobs`:
* better logging for easier debugging
* use pathlib instead of old `os.path` API
* memoize the computed results for impressive performance gains
* fix some linter issues
-- 
Your team Canonical's Ubuntu QA is requested to review the proposed merge of ~hyask/autopkgtest-cloud:skia/improve_update-github-jobs into autopkgtest-cloud:master.
diff --git a/.gitignore b/.gitignore
index 0682011..da82cbb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 docs/_build
 *.charm
 __pycache__
+.ipynb_checkpoints
+*.db
diff --git a/charms/focal/autopkgtest-web/webcontrol/update-github-jobs b/charms/focal/autopkgtest-web/webcontrol/update-github-jobs
index 4308183..14a8888 100755
--- a/charms/focal/autopkgtest-web/webcontrol/update-github-jobs
+++ b/charms/focal/autopkgtest-web/webcontrol/update-github-jobs
@@ -1,26 +1,30 @@
 #!/usr/bin/python3
 
-import datetime
 import io
 import json
 import logging
 import os
-import pathlib
 import sys
 import tarfile
 from datetime import datetime, timedelta
+from pathlib import Path
 
 import swiftclient
 from helpers.utils import get_autopkgtest_cloud_conf, read_config_file
 from request.submit import Submit
 
-PENDING_DIR = "/run/autopkgtest_webcontrol/github-pending"
-RUNNING_CACHE = "/run/amqp-status-collector/running.json"
-SWIFT_CREDS_FILE = "/home/ubuntu/public-swift-creds"
+PENDING_DIR = Path("/run/autopkgtest_webcontrol/github-pending")
+RUNNING_CACHE = Path("/run/amqp-status-collector/running.json")
+SWIFT_CREDS_FILE = Path("/home/ubuntu/public-swift-creds")
 MAX_DAY_DIFF = 30
 
+swift_container_cache = None
+result_cache = {}
+
 
 def result_matches_job(swift_conn, container, result_obj, params):
+    global result_cache
+
     # before we check the environment variables, let's first check that the
     # arch, release and package are correct
     obj_split = result_obj.split("/")
@@ -34,19 +38,25 @@ def result_matches_job(swift_conn, container, result_obj, params):
         )
         return
 
-    _, contents = swift_conn.get_object(container, result_obj)
-    tar_bytes = io.BytesIO(contents)
-    try:
-        with tarfile.open(None, "r", tar_bytes) as tar:
-            exitcode = int(tar.extractfile("exitcode").read().strip())
-            info = json.loads(tar.extractfile("testinfo.json").read().decode())
-    except (KeyError, ValueError, tarfile.TarError) as e:
-        logging.error("broken result %s: %s", result_obj, e)
-        return
+    if result_obj not in result_cache:
+        tar_bytes = io.BytesIO(swift_conn.get_object(container, result_obj)[1])
+        try:
+            with tarfile.open(None, "r", tar_bytes) as tar:
+                exitcode = int(tar.extractfile("exitcode").read().strip())
+                info = json.loads(
+                    tar.extractfile("testinfo.json").read().decode()
+                )
+            result_cache[result_obj] = (exitcode, info)
+        except (KeyError, ValueError, tarfile.TarError) as e:
+            logging.error("broken result %s: %s", result_obj, e)
+            return
+    else:
+        exitcode, info = result_cache[result_obj]
+
     try:
         result_env = info["custom_environment"]
     except KeyError:
-        logging.info("result has no custom_environment, ignoring")
+        logging.debug("result has no custom_environment, ignoring")
         return
 
     # if the test result has the same parameters than the job, we have a winner
@@ -64,7 +74,7 @@ def result_matches_job(swift_conn, container, result_obj, params):
     return exitcode
 
 
-def finish_job(jobfile, params, code, log_url):
+def finish_job(jobfile: Path, params, code, log_url):
     """Tell GitHub that job is complete and delete the job file"""
 
     if code in (0, 2):
@@ -81,27 +91,35 @@ def finish_job(jobfile, params, code, log_url):
         "target_url": log_url,
     }
 
+    statuses_url = None
     # find status URL
     for e in params["env"]:
         if e.startswith("GITHUB_STATUSES_URL="):
             statuses_url = e.split("=", 1)[1]
-    # tell GitHub about the result
-    Submit.post_json(
-        statuses_url,
-        data,
-        "/home/ubuntu/github-status-credentials.txt",
-        params["package"],
-    )
+
+    # if we didn't find `statuses_url`, we'll want to delete the file anyway
+    if statuses_url is not None:
+        # tell GitHub about the result
+        Submit.post_json(
+            statuses_url,
+            data,
+            "/home/ubuntu/github-status-credentials.txt",
+            params["package"],
+        )
+    else:
+        logging.warning(
+            "did not find GITHUB_STATUSES_URL for %s", jobfile.name
+        )
 
     logging.debug("removing job file %s" % jobfile)
     try:
-        os.unlink(jobfile)
-    except FileNotFoundError as _:
+        jobfile.unlink()
+    except FileNotFoundError:
         logging.debug(
             "jobfile %s not found, maybe it was already deleted?" % jobfile
         )
         return
-    logging.debug("job %s finished!" % jobfile)
+    logging.info("%s processed, payload was %s", jobfile.name, data)
 
 
 def is_job_running(params):
@@ -113,8 +131,7 @@ def is_job_running(params):
         params["env"],
     )
 
-    running_file = pathlib.Path(RUNNING_CACHE)
-    running_json = json.loads(running_file.read_text())
+    running_json = json.loads(RUNNING_CACHE.read_text())
     packages = running_json.keys()
     if params["package"] not in packages:
         return False
@@ -135,12 +152,12 @@ def is_job_running(params):
     return False
 
 
-def process_job(jobfile, swift_conn, ext_url):
+def process_job(jobfile: Path, swift_conn, ext_url):
+    global swift_container_cache
     try:
-        with open(jobfile) as f:
-            params = json.load(f)
-            mtime = os.fstat(f.fileno()).st_mtime
-    except json.decoder.JSONDecodeError as e:
+        params = json.loads(jobfile.read_text())
+        mtime = jobfile.stat().st_mtime
+    except Exception as e:
         logging.error("couldn't read %s, skipping: %s", jobfile, e)
         return
     # if the job is currently running, it indicates a re-trigger has occurred!
@@ -148,9 +165,9 @@ def process_job(jobfile, swift_conn, ext_url):
         logging.debug("job %s is currently running, skipping.")
         return
 
-    logging.debug(
+    logging.info(
         "\n\n--------------------\nprocessing job %s:\n   %s",
-        os.path.basename(jobfile),
+        jobfile.name,
         params,
     )
 
@@ -163,7 +180,12 @@ def process_job(jobfile, swift_conn, ext_url):
     except (KeyError, IndexError):
         pass
 
-    _, object_list = swift_conn.get_container(container, full_listing=True)
+    if swift_container_cache is None:
+        _, swift_container_cache = swift_conn.get_container(
+            container, full_listing=True
+        )
+
+    object_list = swift_container_cache
 
     result_for_finishing_job = {}
     for obj in object_list:
@@ -212,7 +234,13 @@ def process_job(jobfile, swift_conn, ext_url):
 if __name__ == "__main__":
     if "DEBUG" in os.environ:
         logging.basicConfig(level="DEBUG")
-    if not os.path.isdir(PENDING_DIR):
+    else:
+        logging.basicConfig(level="INFO")
+    # We don't want that much details
+    requests_log = logging.getLogger("urllib3").setLevel(logging.INFO)
+    swift_log = logging.getLogger("swiftclient").setLevel(logging.INFO)
+
+    if not PENDING_DIR.is_dir():
         logging.info("%s does not exist, nothing to do", PENDING_DIR)
         sys.exit(0)
 
@@ -240,7 +268,13 @@ if __name__ == "__main__":
     jobs = sys.argv[1:]
 
     if not jobs:
-        jobs = os.listdir(PENDING_DIR)
+        jobs = PENDING_DIR.iterdir()
+    else:
+        jobs = [Path(job) for job in jobs]
+
+    logging.info("start processing jobs")
 
     for job in jobs:
-        process_job(os.path.join(PENDING_DIR, job), swift_conn, external_url)
+        process_job(job, swift_conn, external_url)
+
+    logging.info("done processing jobs")