← Back to team overview

canonical-ubuntu-qa team mailing list archive

[Merge] ~hyask/autopkgtest-cloud:skia/metrics_cleanup into autopkgtest-cloud:master

 

Skia has proposed merging ~hyask/autopkgtest-cloud:skia/metrics_cleanup into autopkgtest-cloud:master.

Requested reviews:
  Canonical's Ubuntu QA (canonical-ubuntu-qa)

For more details, see:
https://code.launchpad.net/~hyask/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/466134

Some small improvements to the 'metrics' script.
-- 
Your team Canonical's Ubuntu QA is requested to review the proposed merge of ~hyask/autopkgtest-cloud:skia/metrics_cleanup into autopkgtest-cloud:master.
diff --git a/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/metrics b/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/metrics
index a9fceb2..cfbf6c4 100755
--- a/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/metrics
+++ b/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/metrics
@@ -47,7 +47,7 @@ def make_submission(counts, measurement):
             },
         }
         out.append(m)
-    logger.debug("measurements sent: %s", out)
+    logger.debug("submission: %s", out)
     return out
 
 
@@ -118,67 +118,51 @@ def get_list_of_intended_remote_ips(arch):
 
 def get_remotes():
     logger.info("Collecting remotes")
-    cluster_counts = {}
-    noncluster_counts = {}
+    counts = {}
     out = subprocess.check_output(
         ["lxc", "remote", "list", "--format=json"], universal_newlines=True
     )
     functional_ips = []
     remotes = json.loads(out)
+    logger.debug("Remote %s", remotes)
     for r in list(remotes.keys()):
         if not r.startswith("lxd"):
             del remotes[r]
 
     for r in remotes:
+        logger.info("Checking remote %s", r)
         (_, arch, _) = r.split("-", 3)
         prefix = f"lxd-{arch}-"
-        cluster_counts.setdefault(arch, [0, 0])
-        noncluster_counts.setdefault(arch, [0, 0])
+        counts.setdefault(arch, [0, 0])
         intended_ips = get_list_of_intended_remote_ips(arch)
         if r.replace(prefix, "") not in intended_ips:
             # we increment noncluster_counts here as we want to record remotes
             # that still exist that we don't want.
-            noncluster_counts[arch][1] += 1
+            counts[arch][1] += 1
             logger.warning("extra remote found, counting as error: %s", r)
             continue
 
         try:
-            cl = subprocess.check_output(
-                ["lxc", "cluster", "list", f"{r}:", "--format=json"],
+            subprocess.check_call(
+                ["lxc", "list", f"{r}:"],
+                stdout=subprocess.DEVNULL,
                 stderr=subprocess.DEVNULL,
-                universal_newlines=True,
+                timeout=30,
             )
-            for node in json.loads(cl):
-                if node["status"] == "Online":
-                    cluster_counts[arch][0] += 1
-                else:
-                    cluster_counts[arch][1] += 1
-        except subprocess.CalledProcessError:  # it's not a cluster node
-            try:
-                subprocess.check_call(
-                    ["lxc", "list", f"{r}:"],
-                    stdout=subprocess.DEVNULL,
-                    stderr=subprocess.DEVNULL,
-                    timeout=30,
-                )
-                noncluster_counts[arch][0] += 1
-                functional_ips.append(r.replace(prefix, ""))
-            except (
-                subprocess.CalledProcessError,
-                subprocess.TimeoutExpired,
-            ) as e:
-                logger.warning("remote error: %s", repr(e))
-                noncluster_counts[arch][1] += 1
-
-    cluster_status = make_submission(
-        cluster_counts, "autopkgtest_cluster_status"
-    )
-    noncluster_status = make_submission(
-        noncluster_counts, "autopkgtest_lxd_status"
-    )
-    return cluster_status + noncluster_status
+            counts[arch][0] += 1
+            functional_ips.append(r.replace(prefix, ""))
+        except (
+            subprocess.CalledProcessError,
+            subprocess.TimeoutExpired,
+        ) as e:
+            logger.warning("remote error: %s", repr(e))
+            counts[arch][1] += 1
+
+    status = make_submission(counts, "autopkgtest_lxd_status")
+    return status
 
 
+logger.debug("Connecting to influxdb")
 influx_client = InfluxDBClient(
     INFLUXDB_HOSTNAME,
     INFLUXDB_PORT,
@@ -187,4 +171,7 @@ influx_client = InfluxDBClient(
     INFLUXDB_DATABASE,
 )
 
+logger.debug("Writing to influxdb")
 influx_client.write_points(get_units() + get_remotes())
+
+logger.info("Metrics collection completed and data sent to influxdb")