sts-sponsors team mailing list archive

Thread
Date

[Merge] ~ack/maas-kpi:aggregated-stats into maas-kpi:master

To: mp+442999@xxxxxxxxxxxxxxxxxx
From: Alberto Donato <mp+442999@xxxxxxxxxxxxxxxxxx>
Date: Tue, 16 May 2023 14:58:20 -0000
In-reply-to: <168424906665.397.14447718791560650500.launchpad@juju-4112d9-prod-launchpad-manual-servers-36.openstack.prodstack5.lan>
Reply-to: mp+442999@xxxxxxxxxxxxxxxxxx
Sender: noreply@xxxxxxxxxxxxx

Alberto Donato has proposed merging ~ack/maas-kpi:aggregated-stats into maas-kpi:master.

Commit message:
aggregate stats for deployments out of the top 50

add maas.deployment_size metric


Requested reviews:
  MAAS Committers (maas-committers)

For more details, see:
https://code.launchpad.net/~ack/maas-kpi/+git/maas-kpi/+merge/442999
-- 
Your team MAAS Committers is requested to review the proposed merge of ~ack/maas-kpi:aggregated-stats into maas-kpi:master.

diff --git a/maaskpi/dailystats.py b/maaskpi/dailystats.py
index 2573fe1..3060941 100644
--- a/maaskpi/dailystats.py
+++ b/maaskpi/dailystats.py
@@ -3,7 +3,7 @@ import calendar
 import gzip
 import json
 import re
-from collections import defaultdict
+from collections import Counter, defaultdict
 from dataclasses import asdict, dataclass, field, fields
 from datetime import date, datetime, timedelta
 from pathlib import Path
@@ -94,17 +94,11 @@ class DailyImageDownloadsSeries(SeriesHelper):
 
     class Meta:
         series_name = "maas.image_downloads"
-        # The field_uuid is only there since the measurement needs to
-        # have at least one field. If another field is added, the
-        # field_uuid one can be removed.
-        fields = [
-            "field_uuid",
-        ]
+        fields = ["count"]
         tags = [
             "maas_version",
             "series",
             "architecture",
-            "uuid",
         ]
         autocommit = False
 
@@ -120,18 +114,35 @@ class DailyRequestsSeries(SeriesHelper):
 
     class Meta:
         series_name = "maas.daily_requests"
-        # The field_uuid is only there since the measurement needs to
-        # have at least one field. If another field is added, the
-        # field_uuid one can be removed.
-        fields = [
-            "field_uuid",
-        ]
-        tags = ["maas_version", "uuid"]
+        fields = ["count"]
+        tags = ["maas_version"]
+        autocommit = False
+
+
+class DailyDeploymentSizeSeries(SeriesHelper):
+    """Bucket-based size of deployments.
+
+    This records the number of deployments in each size bucket, by number of
+    registered machines.
+    """
+
+    class Meta:
+        series_name = "maas.deployment_size"
+        fields = ["count"]
+        tags = ["size"]
         autocommit = False
 
 
+class StatsMixin:
+    """Mixin class for stats."""
+
+    def update_stats(self, other_stats):
+        for stat in (field.name for field in fields(self)):
+            setattr(self, stat, getattr(self, stat) + getattr(other_stats, stat))
+
+
 @dataclass
-class DeploymentStats:
+class DeploymentStats(StatsMixin):
     """Stats for a deployment."""
 
     machines: int = 0
@@ -164,6 +175,8 @@ class DeploymentStats:
     vm_hosts_virsh_utilized_memory: int = 0
     subnets_v4: int = 0
     subnets_v6: int = 0
+    # number of deployments this stat is aggregating counts for
+    deployment_count: int = 1
 
 
 class DailyStatsSeries(SeriesHelper):
@@ -177,9 +190,7 @@ class DailyStatsSeries(SeriesHelper):
 class DailyPowerDriverSeries(SeriesHelper):
     class Meta:
         series_name = "maas.daily_power_drivers"
-        fields = [
-            "count",
-        ]
+        fields = ["count", "deployment_count"]
         tags = ["maas_version", "uuid", "power_driver_name", "power_driver_creation"]
         autocommit = False
 
@@ -321,6 +332,19 @@ def get_bmc_stats(data):
     return data.get("bmcs", default)
 
 
+@dataclass
+class DeploymentSizeBucket:
+    """A bucket counting number of deployments by machine size."""
+
+    min: int
+    max: int | None = None
+    count: int = 0
+    tag: str = field(init=False)
+
+    def __post_init__(self):
+        self.tag = f"{self.min}+" if self.max is None else f"{self.min}-{self.max}"
+
+
 class DailyStats:
     """Keep track of stats from MAAS deployment send for a single day.
 
@@ -328,12 +352,21 @@ class DailyStats:
     is kept.
     """
 
-    def __init__(self, day):
+    def __init__(self, day, top_deploys_count):
         self.day = day
+        self.top_deploys_count = top_deploys_count
         self.entries = defaultdict(dict)
         self.image_entries = defaultdict(set)
         self.no_uuids = []
         self.with_data = 0
+        self.deployment_size_buckets = [
+            DeploymentSizeBucket(min=0, max=0),
+            DeploymentSizeBucket(min=1, max=10),
+            DeploymentSizeBucket(min=11, max=20),
+            DeploymentSizeBucket(min=21, max=50),
+            DeploymentSizeBucket(min=51, max=100),
+            DeploymentSizeBucket(min=100),
+        ]
 
     def process_item(self, item: LogItem):
         """Process a log line and update the internal data structure."""
@@ -376,43 +409,106 @@ class DailyStats:
             f"{self.day}: {len(self.entries)} entries, {len(self.no_uuids)} "
             f"without uuids, {self.with_data} with data"
         )
-        timestamp = get_nanosecond_timestamp(self.day)
+
+        top_deploys = self._get_top_deployments()
+
+        requests_series = defaultdict(int)
+        # track [count, deployment_count]
+        power_driver_series = defaultdict(lambda: [0, 0])
+        stats_series = defaultdict(DeploymentStats)
+        image_download_series = defaultdict(int)
+
         for uuid, info in self.entries.items():
-            DailyRequestsSeries(
-                time=timestamp,
-                maas_version=info["version"],
-                uuid=uuid,
-                field_uuid=uuid,
-            )
+            self._update_deployments_size_count(info)
+
+            if uuid not in top_deploys:
+                uuid = ""
+
+            version = info["version"]
+            requests_series[version] += 1
+
             if bmc_stats := info.get("bmc_stats"):
                 for creation_method, bmcs in bmc_stats.items():
                     for driver_name, count in bmcs.items():
-                        DailyPowerDriverSeries(
-                            time=timestamp,
-                            maas_version=info["version"],
-                            uuid=uuid,
-                            count=count,
-                            power_driver_name=driver_name,
-                            power_driver_creation=creation_method,
-                        )
+                        counts = power_driver_series[
+                            uuid, version, driver_name, creation_method
+                        ]
+                        counts[0] += count
+                        counts[1] += 1  # deployment count
             if stats := info.get("stats"):
-                DailyStatsSeries(
-                    time=timestamp,
-                    maas_version=info["version"],
-                    uuid=uuid,
-                    **asdict(stats),
-                )
-
-        for uuid, image_requests in sorted(self.image_entries.items()):
+                stats_series[uuid, version].update_stats(stats)
+
+        for uuid, image_requests in self.image_entries.items():
             for maas_version, series, architecture in image_requests:
-                DailyImageDownloadsSeries(
-                    time=timestamp,
-                    maas_version=maas_version,
-                    uuid=uuid,
-                    field_uuid=uuid,
-                    series=series,
-                    architecture=architecture,
-                )
+                image_download_series[maas_version, series, architecture] += 1
+
+        # create influxdb series
+        timestamp = get_nanosecond_timestamp(self.day)
+        for version, count in requests_series.items():
+            DailyRequestsSeries(
+                time=timestamp,
+                maas_version=version,
+                count=count,
+            )
+        for (
+            version,
+            series,
+            architecture,
+        ), count in image_download_series.items():
+            DailyImageDownloadsSeries(
+                time=timestamp,
+                maas_version=version,
+                series=series,
+                architecture=architecture,
+                count=count,
+            )
+        for (
+            uuid,
+            version,
+            driver_name,
+            creation_method,
+        ), (count, deployment_count) in power_driver_series.items():
+            DailyPowerDriverSeries(
+                time=timestamp,
+                maas_version=version,
+                uuid=uuid,
+                power_driver_name=driver_name,
+                power_driver_creation=creation_method,
+                count=count,
+                deployment_count=deployment_count,
+            )
+        for (uuid, version), stats in stats_series.items():
+            DailyStatsSeries(
+                time=timestamp,
+                maas_version=version,
+                uuid=uuid,
+                **asdict(stats),
+            )
+        for bucket in self.deployment_size_buckets:
+            DailyDeploymentSizeSeries(
+                time=timestamp,
+                size=bucket.tag,
+                count=bucket.count,
+            )
+
+    def _get_top_deployments(self) -> set[str]:
+        counter = Counter(
+            {
+                uuid: info["stats"].machines if "stats" in info else 0
+                for uuid, info in self.entries.items()
+            }
+        )
+        return set(uuid for uuid, _ in counter.most_common(self.top_deploys_count))
+
+    def _update_deployments_size_count(self, info):
+        stats = info.get("stats")
+        if stats is None:
+            return
+        # this assumes buckts are ordered
+        for bucket in self.deployment_size_buckets:
+            if bucket.max is None or stats.machines <= bucket.max:
+                bucket.count += 1
+                return
 
 
 class LogFilesHost:
@@ -555,12 +651,10 @@ class DailyStatsCollector(Collector):
         self.parser.add_argument(
             "--swift-key",
             default="",
-            nargs="?",
             help="Path to the file containing the Swift key",
         )
         self.parser.add_argument(
             "--cache-dir",
-            nargs="?",
             help="Path to the dir to cache the downloaded logs",
         )
         self.parser.add_argument(
@@ -568,11 +662,20 @@ class DailyStatsCollector(Collector):
             "--days",
             # Go back a few days by default, since the logs for the last
             # day or two usually are missing.
-            default="3",
+            default=3,
             type=int,
-            nargs="?",
             help="Days back to collect metrics for",
         )
+        self.parser.add_argument(
+            "--top-deploys",
+            default=50,
+            type=int,
+            help=(
+                "Number of deployments to report individually "
+                "(top ones by machine count). "
+                "Others will be aggregated",
+            ),
+        )
 
     def run_collect(self, args):
         if args.swift_key:
@@ -594,9 +697,9 @@ class DailyStatsCollector(Collector):
             assert args.cache_dir, "Have to specify either swift or cache."
             swift = None
 
-        return self.collect(args.days, swift, args.cache_dir)
+        return self.collect(args.days, args.top_deploys, swift, args.cache_dir)
 
-    def collect(self, days, swift, cache_dir):
+    def collect(self, days, top_deploys_count, swift, cache_dir):
         log_files = LogFiles(swift, days, cache_dir)
         log_files.init()
         if not log_files.hosts:
@@ -617,7 +720,7 @@ class DailyStatsCollector(Collector):
                 # This has the effect that we won't process the last day,
                 # which most likely will be incomplete.
                 day_stats.create_series()
-            day_stats = DailyStats(log_files.current_day)
+            day_stats = DailyStats(log_files.current_day, top_deploys_count)
             for item in items:
                 day_stats.process_item(item)
 
@@ -625,6 +728,7 @@ class DailyStatsCollector(Collector):
         yield DailyRequestsSeries
         yield DailyImageDownloadsSeries
         yield DailyPowerDriverSeries
+        yield DailyDeploymentSizeSeries
 
 
 run = DailyStatsCollector().run

References

[Merge] ~ack/maas-kpi:aggregated-stats into maas-kpi:master
From: Alberto Donato, 2023-05-16