canonical-ubuntu-qa team mailing list archive
-
canonical-ubuntu-qa team
-
Mailing list archive
-
Message #04381
Re: [Merge] ~hyask/autopkgtest-cloud:skia/stats into autopkgtest-cloud:master
Review: Needs Fixing
Hey! I took a look through this. Looks great. Two small inline comments but then I'm happy with it :)
Diff comments:
> diff --git a/charms/focal/autopkgtest-web/webcontrol/stats.py b/charms/focal/autopkgtest-web/webcontrol/stats.py
> new file mode 100755
> index 0000000..9080d36
> --- /dev/null
> +++ b/charms/focal/autopkgtest-web/webcontrol/stats.py
> @@ -0,0 +1,321 @@
> +#!/usr/bin/python3
> +
> +import argparse
> +import datetime
> +import logging
> +import os
> +import re
> +import sqlite3
> +import sys
> +from urllib.request import urlretrieve
> +
> +import requests
> +
> +sqlite3.paramstyle = "named"
> +
> +
> +class Run:
> + """
> + This represent a job run.
typo, should be represents
> + To build this object, give its constructor a dict with at least the
> + following keys:
> + - package
> + - release
> + - arch
> + - run_id
> + It is very common to build that from a row coming from a DB query selecting
> + at least those fields.
> + """
> +
> + def __init__(self, row):
> + self.package = row["package"]
> + self.release = row["release"]
> + self.arch = row["arch"]
> + self.run_id = row["run_id"]
> + self.log = None
> + self.log_lines = None
> + self.host = None
> + self.datacenter = None
> + self.first_boot_time = None
> + self.boot_attempts = None
> +
> + def __str__(self):
> + return (
> + f"{self.package} ({self.release}/{self.arch}) - {self.log_url()}"
> + )
> +
> + def as_row_dict(self):
> + return {
> + "run_id": self.run_id,
> + "first_boot_time": self.first_boot_time,
> + "boot_attempts": self.boot_attempts,
> + "host": self.host,
> + "datacenter": self.datacenter,
> + }
> +
> + def log_url(self):
> + log_address_template = (
> + "https://autopkgtest.ubuntu.com/"
> + "results/autopkgtest-%(release)s/%(release)s/%(arch)s/"
> + "%(package_prefix)s/%(package)s/%(run_id)s/log.gz"
> + )
> + if self.package.startswith("lib"):
> + prefix = "lib" + self.package[3]
> + else:
> + prefix = self.package[0]
> + return log_address_template % {
> + "release": self.release,
> + "arch": self.arch,
> + "package_prefix": prefix,
> + "package": self.package,
> + "run_id": self.run_id,
> + }
> +
> + def download_log(self):
> + try:
> + r = requests.get(self.log_url())
> + self.log = r.text
> + self.log_lines = self.log.splitlines()
> + except Exception as e:
> + print(e)
> + self.log = ""
> + self.log_lines = []
> +
> + def extract_data(self):
> + """
> + This is the main part of this script, where we call all the heuristics
> + to extract the data from the logs:
> + - time to shell in testbed
> + - number of reboots
> + - number of nova retries
> + - datacenter
> + """
> + self.download_log()
> + self.extract_host()
> + self.extract_datacenter()
> + self.extract_first_boot_time()
> + self.extract_boot_attempts()
> +
> + host_pattern = re.compile(
> + r"host juju-7f2275-prod-proposed-migration-environment-(\d);"
> + )
> +
> + def extract_host(self):
> + try:
> + lines = [
> + l
> + for l in self.log_lines
> + if "command line: /home/ubuntu/autopkgtest/runner/autopkgtest"
> + in l
> + ]
> + m = self.host_pattern.search(lines[0])
> + if m:
> + self.host = m.group(1)
> + except Exception as e:
> + print(e)
> +
> + datacenter_pattern = re.compile(r"@(.*)\.secgroup ")
> + lxd_remote_pattern = re.compile(r"lxd-armhf-(\d*\.\d*\.\d*\.\d*):")
> +
> + def extract_datacenter(self):
> + try:
> + lines = [
> + l
> + for l in self.log_lines
> + if "command line: /home/ubuntu/autopkgtest/runner/autopkgtest"
> + in l
> + ]
> + if self.arch == "armhf":
> + m = self.lxd_remote_pattern.search(lines[0])
> + if m:
> + self.datacenter = m.group(1)
> + else:
> + m = self.datacenter_pattern.search(lines[0])
> + if m:
> + self.datacenter = m.group(1).split("-")[0]
> + except Exception as e:
> + print(e)
> +
> + boot_time_pattern = re.compile(r"^ *(\d*)s .* testbed dpkg architecture: ")
> +
> + def extract_first_boot_time(self):
> + try:
> + lines = [
> + l for l in self.log_lines if "testbed dpkg architecture: " in l
> + ]
> + if lines:
> + m = self.boot_time_pattern.search(lines[0])
> + if m:
> + self.first_boot_time = int(m.group(1))
> + except Exception as e:
> + print(e)
> +
> + boot_attempts_pattern = re.compile(r"nova boot failed \(attempt #(\d*)\)")
> +
> + def extract_boot_attempts(self):
> + try:
> + lines = [
> + l for l in self.log_lines if "nova boot failed (attempt #" in l
> + ]
> + if lines:
> + m = self.boot_attempts_pattern.search(lines[0])
> + if m:
> + self.boot_attempts = int(m.group(1))
> + except Exception as e:
> + print(e)
> +
> +
> +def get_stats(db_con, since_days_ago, until_days_ago, limit):
> + try:
> + db_con.execute(
> + "CREATE TABLE IF NOT EXISTS tests_stats("
> + " run_id CHAR[30], "
> + " first_boot_time INTEGER, "
> + " boot_attempts INTEGER, "
> + " datacenter CHAR[10],"
> + " host CHAR[10],"
> + " PRIMARY KEY(run_id))"
> + )
> + print("Created tests_stats table")
> + except sqlite3.OperationalError as e:
> + if "already exists" not in str(e):
> + raise
> +
> + start_date = datetime.datetime.now() - datetime.timedelta(
> + days=since_days_ago
> + )
> + end_date = datetime.datetime.now() - datetime.timedelta(
> + days=until_days_ago
> + )
> + print(f"Fetching stats from log files from {start_date} to {end_date}")
> +
> + db_con.row_factory = sqlite3.Row
> + i = 0
> + failed = []
> + date = start_date
> + while date <= end_date:
> + for row in db_con.execute(
> + f"""
> + SELECT *
> + FROM result
> + JOIN test ON test.id=result.test_id
> + LEFT JOIN tests_stats ON tests_stats.run_id = result.run_id
> + WHERE tests_stats.run_id IS NULL
> + AND result.run_id LIKE '{date.strftime("%Y%m%d")}%'
> + ORDER BY RANDOM()
> + LIMIT {limit}
> + """
> + ):
> + r = Run(row)
> + r.download_log()
> + print(r)
> + r.extract_data()
> + try:
> + r.extract_data()
> + db_con.execute(
> + """INSERT INTO tests_stats(
> + run_id,
> + first_boot_time,
> + boot_attempts,
> + host,
> + datacenter
> + )
> + VALUES (
> + :run_id,
> + :first_boot_time,
> + :boot_attempts,
> + :host,
> + :datacenter
> + )""",
> + r.as_row_dict(),
> + )
> + db_con.commit()
> + print(f"{i}\t| Extracted {r}")
> + print(f"\t| {r.as_row_dict()}")
> + except Exception as e:
> + failed.append((str(r), repr(e)))
> + i += 1
> + date += datetime.timedelta(days=1)
> +
> +
> +def download_db():
> + url = "http://autopkgtest.ubuntu.com/static/autopkgtest.db"
> + dst = f"./autopkgtest_{datetime.datetime.now()}_with_stats.db"
> + print(f"Downloading database in {dst}")
> + urlretrieve(url, dst)
> + return dst
> +
> +
> +if __name__ == "__main__":
> + parser = argparse.ArgumentParser(
> + description="Fetch AMQP queues into a file"
> + )
> + parser.add_argument(
> + "--debug",
> + action="store_true",
> + help="Show debugging logs",
> + )
> + parser.add_argument(
> + "--collect-stats",
> + action="store_true",
> + help="Collect the data from the logs, to populate the DB",
> + )
> + parser.add_argument(
> + "--download-db",
> + action="store_true",
> + help="Download a fresh database from autopkgtest.ubuntu.com to populate with stats",
> + )
> + parser.add_argument(
> + "--database",
> + type=str,
> + default="",
> + help="Populate this given database with stats",
> + )
> + parser.add_argument(
> + "--since-days-ago",
> + type=int,
> + default=30,
> + help="Fetching data for a time range starting X days ago (default: 30)",
> + )
> + parser.add_argument(
> + "--until-days-ago",
> + type=int,
> + default=1,
> + help="Fetch data for a time range ending X days ago (default: 1, meaning yesterday)",
> + )
> + parser.add_argument(
> + "--limit",
> + type=int,
> + default=2000,
> + help="Number of jobs per days to fetch stats from (default: 2000)",
> + )
> + parser.add_argument(
> + "--clear-stats",
> + action="store_true",
> + help="Clear the DB from the existing stats, to start from scratch",
> + )
> +
> + args = parser.parse_args()
> +
> + if "DEBUG" in os.environ or args.debug:
the logging module is set up here but then laterally prints are used - I think this should be amended. I'd like it if most of the current prints were logging.debug as when I was using my customised version of the script to scrape for log messages, I had to disable a bunch of prints in order to make it a bit easier to read.
> + logging.basicConfig(level=logging.DEBUG)
> +
> + if args.download_db:
> + db_path = download_db()
> + elif args.database:
> + db_path = args.database
> + else:
> + print(
> + "You need to supply either --download-db or --database for the data collection to work"
> + )
> + sys.exit(1)
> +
> + autopkgtest_db = sqlite3.connect(db_path)
> + with autopkgtest_db as db_con:
> + if args.clear_stats:
> + print("Clearing stats table")
> + db_con.execute("DROP TABLE IF EXISTS tests_stats")
> + if args.collect_stats:
> + get_stats(
> + db_con, args.since_days_ago, args.until_days_ago, args.limit
> + )
--
https://code.launchpad.net/~hyask/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/467038
Your team Canonical's Ubuntu QA is requested to review the proposed merge of ~hyask/autopkgtest-cloud:skia/stats into autopkgtest-cloud:master.
References