sts-sponsors team mailing list archive
-
sts-sponsors team
-
Mailing list archive
-
Message #08439
[Merge] ~ack/maas-kpi:aggregated-stats into maas-kpi:master
Alberto Donato has proposed merging ~ack/maas-kpi:aggregated-stats into maas-kpi:master.
Commit message:
aggregate stats for deployments out of the top 50
add maas.deployment_size metric
Requested reviews:
MAAS Committers (maas-committers)
For more details, see:
https://code.launchpad.net/~ack/maas-kpi/+git/maas-kpi/+merge/442999
--
Your team MAAS Committers is requested to review the proposed merge of ~ack/maas-kpi:aggregated-stats into maas-kpi:master.
diff --git a/maaskpi/dailystats.py b/maaskpi/dailystats.py
index 2573fe1..3060941 100644
--- a/maaskpi/dailystats.py
+++ b/maaskpi/dailystats.py
@@ -3,7 +3,7 @@ import calendar
import gzip
import json
import re
-from collections import defaultdict
+from collections import Counter, defaultdict
from dataclasses import asdict, dataclass, field, fields
from datetime import date, datetime, timedelta
from pathlib import Path
@@ -94,17 +94,11 @@ class DailyImageDownloadsSeries(SeriesHelper):
class Meta:
series_name = "maas.image_downloads"
- # The field_uuid is only there since the measurement needs to
- # have at least one field. If another field is added, the
- # field_uuid one can be removed.
- fields = [
- "field_uuid",
- ]
+ fields = ["count"]
tags = [
"maas_version",
"series",
"architecture",
- "uuid",
]
autocommit = False
@@ -120,18 +114,35 @@ class DailyRequestsSeries(SeriesHelper):
class Meta:
series_name = "maas.daily_requests"
- # The field_uuid is only there since the measurement needs to
- # have at least one field. If another field is added, the
- # field_uuid one can be removed.
- fields = [
- "field_uuid",
- ]
- tags = ["maas_version", "uuid"]
+ fields = ["count"]
+ tags = ["maas_version"]
+ autocommit = False
+
+
+class DailyDeploymentSizeSeries(SeriesHelper):
+ """Bucket-based size of deployments.
+
+ This records the number of deployments in each size bucket, by number of
+ registered machines.
+ """
+
+ class Meta:
+ series_name = "maas.deployment_size"
+ fields = ["count"]
+ tags = ["size"]
autocommit = False
+class StatsMixin:
+ """Mixin class for stats."""
+
+ def update_stats(self, other_stats):
+ for stat in (field.name for field in fields(self)):
+ setattr(self, stat, getattr(self, stat) + getattr(other_stats, stat))
+
+
@dataclass
-class DeploymentStats:
+class DeploymentStats(StatsMixin):
"""Stats for a deployment."""
machines: int = 0
@@ -164,6 +175,8 @@ class DeploymentStats:
vm_hosts_virsh_utilized_memory: int = 0
subnets_v4: int = 0
subnets_v6: int = 0
+ # number of deployments this stat is aggregating counts for
+ deployment_count: int = 1
class DailyStatsSeries(SeriesHelper):
@@ -177,9 +190,7 @@ class DailyStatsSeries(SeriesHelper):
class DailyPowerDriverSeries(SeriesHelper):
class Meta:
series_name = "maas.daily_power_drivers"
- fields = [
- "count",
- ]
+ fields = ["count", "deployment_count"]
tags = ["maas_version", "uuid", "power_driver_name", "power_driver_creation"]
autocommit = False
@@ -321,6 +332,19 @@ def get_bmc_stats(data):
return data.get("bmcs", default)
+@dataclass
+class DeploymentSizeBucket:
+ """A bucket counting number of deployments by machine size."""
+
+ min: int
+ max: int | None = None
+ count: int = 0
+ tag: str = field(init=False)
+
+ def __post_init__(self):
+ self.tag = f"{self.min}+" if self.max is None else f"{self.min}-{self.max}"
+
+
class DailyStats:
"""Keep track of stats from MAAS deployment send for a single day.
@@ -328,12 +352,21 @@ class DailyStats:
is kept.
"""
- def __init__(self, day):
+ def __init__(self, day, top_deploys_count):
self.day = day
+ self.top_deploys_count = top_deploys_count
self.entries = defaultdict(dict)
self.image_entries = defaultdict(set)
self.no_uuids = []
self.with_data = 0
+ self.deployment_size_buckets = [
+ DeploymentSizeBucket(min=0, max=0),
+ DeploymentSizeBucket(min=1, max=10),
+ DeploymentSizeBucket(min=11, max=20),
+ DeploymentSizeBucket(min=21, max=50),
+ DeploymentSizeBucket(min=51, max=100),
+ DeploymentSizeBucket(min=100),
+ ]
def process_item(self, item: LogItem):
"""Process a log line and update the internal data structure."""
@@ -376,43 +409,106 @@ class DailyStats:
f"{self.day}: {len(self.entries)} entries, {len(self.no_uuids)} "
f"without uuids, {self.with_data} with data"
)
- timestamp = get_nanosecond_timestamp(self.day)
+
+ top_deploys = self._get_top_deployments()
+
+ requests_series = defaultdict(int)
+ # track [count, deployment_count]
+ power_driver_series = defaultdict(lambda: [0, 0])
+ stats_series = defaultdict(DeploymentStats)
+ image_download_series = defaultdict(int)
+
for uuid, info in self.entries.items():
- DailyRequestsSeries(
- time=timestamp,
- maas_version=info["version"],
- uuid=uuid,
- field_uuid=uuid,
- )
+ self._update_deployments_size_count(info)
+
+ if uuid not in top_deploys:
+ uuid = ""
+
+ version = info["version"]
+ requests_series[version] += 1
+
if bmc_stats := info.get("bmc_stats"):
for creation_method, bmcs in bmc_stats.items():
for driver_name, count in bmcs.items():
- DailyPowerDriverSeries(
- time=timestamp,
- maas_version=info["version"],
- uuid=uuid,
- count=count,
- power_driver_name=driver_name,
- power_driver_creation=creation_method,
- )
+ counts = power_driver_series[
+ uuid, version, driver_name, creation_method
+ ]
+ counts[0] += count
+ counts[1] += 1 # deployment count
if stats := info.get("stats"):
- DailyStatsSeries(
- time=timestamp,
- maas_version=info["version"],
- uuid=uuid,
- **asdict(stats),
- )
-
- for uuid, image_requests in sorted(self.image_entries.items()):
+ stats_series[uuid, version].update_stats(stats)
+
+ for uuid, image_requests in self.image_entries.items():
for maas_version, series, architecture in image_requests:
- DailyImageDownloadsSeries(
- time=timestamp,
- maas_version=maas_version,
- uuid=uuid,
- field_uuid=uuid,
- series=series,
- architecture=architecture,
- )
+ image_download_series[maas_version, series, architecture] += 1
+
+ # create influxdb series
+ timestamp = get_nanosecond_timestamp(self.day)
+ for version, count in requests_series.items():
+ DailyRequestsSeries(
+ time=timestamp,
+ maas_version=version,
+ count=count,
+ )
+ for (
+ version,
+ series,
+ architecture,
+ ), count in image_download_series.items():
+ DailyImageDownloadsSeries(
+ time=timestamp,
+ maas_version=version,
+ series=series,
+ architecture=architecture,
+ count=count,
+ )
+ for (
+ uuid,
+ version,
+ driver_name,
+ creation_method,
+ ), (count, deployment_count) in power_driver_series.items():
+ DailyPowerDriverSeries(
+ time=timestamp,
+ maas_version=version,
+ uuid=uuid,
+ power_driver_name=driver_name,
+ power_driver_creation=creation_method,
+ count=count,
+ deployment_count=deployment_count,
+ )
+ for (uuid, version), stats in stats_series.items():
+ DailyStatsSeries(
+ time=timestamp,
+ maas_version=version,
+ uuid=uuid,
+ **asdict(stats),
+ )
+ for bucket in self.deployment_size_buckets:
+ DailyDeploymentSizeSeries(
+ time=timestamp,
+ size=bucket.tag,
+ count=bucket.count,
+ )
+
+ def _get_top_deployments(self) -> set[str]:
+ counter = Counter(
+ {
+ uuid: info["stats"].machines if "stats" in info else 0
+ for uuid, info in self.entries.items()
+ }
+ )
+ return set(uuid for uuid, _ in counter.most_common(self.top_deploys_count))
+
+ def _update_deployments_size_count(self, info):
+ stats = info.get("stats")
+ if stats is None:
+ return
+ # this assumes buckts are ordered
+ for bucket in self.deployment_size_buckets:
+ if bucket.max is None or stats.machines <= bucket.max:
+ bucket.count += 1
+ return
class LogFilesHost:
@@ -555,12 +651,10 @@ class DailyStatsCollector(Collector):
self.parser.add_argument(
"--swift-key",
default="",
- nargs="?",
help="Path to the file containing the Swift key",
)
self.parser.add_argument(
"--cache-dir",
- nargs="?",
help="Path to the dir to cache the downloaded logs",
)
self.parser.add_argument(
@@ -568,11 +662,20 @@ class DailyStatsCollector(Collector):
"--days",
# Go back a few days by default, since the logs for the last
# day or two usually are missing.
- default="3",
+ default=3,
type=int,
- nargs="?",
help="Days back to collect metrics for",
)
+ self.parser.add_argument(
+ "--top-deploys",
+ default=50,
+ type=int,
+ help=(
+ "Number of deployments to report individually "
+ "(top ones by machine count). "
+ "Others will be aggregated",
+ ),
+ )
def run_collect(self, args):
if args.swift_key:
@@ -594,9 +697,9 @@ class DailyStatsCollector(Collector):
assert args.cache_dir, "Have to specify either swift or cache."
swift = None
- return self.collect(args.days, swift, args.cache_dir)
+ return self.collect(args.days, args.top_deploys, swift, args.cache_dir)
- def collect(self, days, swift, cache_dir):
+ def collect(self, days, top_deploys_count, swift, cache_dir):
log_files = LogFiles(swift, days, cache_dir)
log_files.init()
if not log_files.hosts:
@@ -617,7 +720,7 @@ class DailyStatsCollector(Collector):
# This has the effect that we won't process the last day,
# which most likely will be incomplete.
day_stats.create_series()
- day_stats = DailyStats(log_files.current_day)
+ day_stats = DailyStats(log_files.current_day, top_deploys_count)
for item in items:
day_stats.process_item(item)
@@ -625,6 +728,7 @@ class DailyStatsCollector(Collector):
yield DailyRequestsSeries
yield DailyImageDownloadsSeries
yield DailyPowerDriverSeries
+ yield DailyDeploymentSizeSeries
run = DailyStatsCollector().run
References