sts-sponsors team mailing list archive
-
sts-sponsors team
-
Mailing list archive
-
Message #08465
[Merge] ~ack/maas-kpi:aggregated-stats into maas-kpi:master
Alberto Donato has proposed merging ~ack/maas-kpi:aggregated-stats into maas-kpi:master.
Commit message:
aggregate stats for deployments out of the top 50
add maas.deployment_size metric
Requested reviews:
MAAS Lander (maas-lander): unittests
MAAS Committers (maas-committers)
For more details, see:
https://code.launchpad.net/~ack/maas-kpi/+git/maas-kpi/+merge/442999
--
Your team MAAS Committers is requested to review the proposed merge of ~ack/maas-kpi:aggregated-stats into maas-kpi:master.
diff --git a/grafana/features.dashboard.py b/grafana/features.dashboard.py
index 89f0897..33d3372 100644
--- a/grafana/features.dashboard.py
+++ b/grafana/features.dashboard.py
@@ -43,18 +43,16 @@ dashboard = Dashboard(
"Machines",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
- select=Select(
- [
- [
- Field("machines"),
- Sum(),
- Alias("Total number of machines"),
- ],
- ]
+ rawQuery=True,
+ query=(
+ """
+ SELECT SUM("machines")
+ FROM maas..daily_stats
+ WHERE $timeFilter
+ GROUP BY time(1d)
+ """
),
- groupBy=[InfluxDBTime("1d")],
- alias="$col",
+ alias="Total number of machines",
)
],
),
@@ -62,20 +60,16 @@ dashboard = Dashboard(
"Reporting deployments",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
- select=Select(
- [
- [
- Field("machines"),
- Count(),
- Alias(
- "Number of deployments that sent a report"
- ),
- ],
- ]
+ rawQuery=True,
+ query=(
+ """
+ SELECT SUM("deployment_count")
+ FROM maas..daily_stats
+ WHERE $timeFilter
+ GROUP BY time(1d)
+ """
),
- groupBy=[InfluxDBTime("1d")],
- alias="$col",
+ alias="Number of deployments that sent a report",
)
],
),
@@ -88,32 +82,19 @@ dashboard = Dashboard(
"Annotation Counts",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
- select=Select(
- [
- [
- Field("workload_annotations_machines"),
- Sum(),
- Alias("Machines with annotations"),
- ],
- [
- Field("workload_annotations_total"),
- Sum(),
- Alias("Total annotations"),
- ],
- [
- Field("workload_annotations_unique_keys"),
- Sum(),
- Alias("Unique keys"),
- ],
- [
- Field("workload_annotations_unique_values"),
- Sum(),
- Alias("Unique values"),
- ],
- ]
+ rawQuery=True,
+ query=(
+ """
+ SELECT
+ SUM("workload_annotations_machines") AS "Machines with annotations",
+ SUM("workload_annotations_total") AS "Total annotations",
+ SUM("workload_annotations_unique_keys") AS "Unique keys",
+ SUM("workload_annotations_unique_values") AS "Unique values"
+ FROM maas..daily_stats
+ WHERE $timeFilter
+ GROUP BY time(1d)
+ """
),
- groupBy=[InfluxDBTime("1d")],
alias="$col",
)
],
@@ -127,22 +108,17 @@ dashboard = Dashboard(
"Registered hosts",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
- select=Select(
- [
- [
- Field("vm_hosts_lxd_total"),
- Sum(),
- Alias("LXD"),
- ],
- [
- Field("vm_hosts_virsh_total"),
- Sum(),
- Alias("Virsh"),
- ],
- ]
+ rawQuery=True,
+ query=(
+ """
+ SELECT
+ SUM("vm_hosts_lxd_total") AS "LXD",
+ SUM("vm_hosts_virsh_total") AS "Virsh"
+ FROM maas..daily_stats
+ WHERE $timeFilter
+ GROUP BY time(1d)
+ """
),
- groupBy=[InfluxDBTime("1d")],
alias="$col",
)
],
@@ -153,22 +129,17 @@ dashboard = Dashboard(
"VMs",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
- select=Select(
- [
- [
- Field("vm_hosts_lxd_vms"),
- Sum(),
- Alias("LXD"),
- ],
- [
- Field("vm_hosts_virsh_vms"),
- Sum(),
- Alias("Virsh"),
- ],
- ]
+ rawQuery=True,
+ query=(
+ """
+ SELECT
+ SUM("vm_hosts_lxd_vms") AS "LXD",
+ SUM("vm_hosts_virsh_vms") AS "Virsh"
+ FROM maas..daily_stats
+ WHERE $timeFilter
+ GROUP BY time(1d)
+ """
),
- groupBy=[InfluxDBTime("1d")],
alias="$col",
)
],
@@ -179,32 +150,19 @@ dashboard = Dashboard(
"Cores",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
- select=Select(
- [
- [
- Field("vm_hosts_lxd_available_cores"),
- Sum(),
- Alias("LXD available"),
- ],
- [
- Field("vm_hosts_lxd_utilized_cores"),
- Sum(),
- Alias("LXD utilized"),
- ],
- [
- Field("vm_hosts_virsh_available_cores"),
- Sum(),
- Alias("Virsh available"),
- ],
- [
- Field("vm_hosts_virsh_utilized_cores"),
- Sum(),
- Alias("Virsh utilized"),
- ],
- ]
+ rawQuery=True,
+ query=(
+ """
+ SELECT
+ SUM("vm_hosts_lxd_available_cores") AS "LXD available",
+ SUM("vm_hosts_lxd_utilized_cores") AS "LXD utilized",
+ SUM("vm_hosts_virsh_available_cores") AS "Virsh available",
+ SUM("vm_hosts_virsh_utilized_cores") AS "Virsh utilized"
+ FROM maas..daily_stats
+ WHERE $timeFilter
+ GROUP BY time(1d)
+ """
),
- groupBy=[InfluxDBTime("1d")],
alias="$col",
)
],
@@ -215,32 +173,19 @@ dashboard = Dashboard(
"Memory",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
- select=Select(
- [
- [
- Field("vm_hosts_lxd_available_memory"),
- Sum(),
- Alias("LXD available"),
- ],
- [
- Field("vm_hosts_lxd_utilized_memory"),
- Sum(),
- Alias("LXD utilized"),
- ],
- [
- Field("vm_hosts_virsh_available_memory"),
- Sum(),
- Alias("Virsh available"),
- ],
- [
- Field("vm_hosts_virsh_utilized_memory"),
- Sum(),
- Alias("Virsh utilized"),
- ],
- ]
+ rawQuery=True,
+ query=(
+ """
+ SELECT
+ SUM("vm_hosts_lxd_available_memory") AS "LXD available",
+ SUM("vm_hosts_lxd_utilized_memory") AS "LXD utilized",
+ SUM("vm_hosts_virsh_available_memory") AS "Virsh available",
+ SUM("vm_hosts_virsh_utilized_memory") AS "Virsh utilized"
+ FROM maas..daily_stats
+ WHERE $timeFilter
+ GROUP BY time(1d)
+ """
),
- groupBy=[InfluxDBTime("1d")],
alias="$col",
)
],
@@ -258,20 +203,14 @@ dashboard = Dashboard(
"Power drivers",
[
InfluxDBTarget(
- measurement="maas.daily_power_drivers",
- select=Select(
- [
- [
- Field("count"),
- Sum(),
- ],
- ]
+ rawQuery=True,
+ query=(
+ """
+ SELECT SUM("count") FROM maas..daily_power_drivers
+ WHERE $timeFilter
+ GROUP BY time(1d), "power_driver_name", "power_driver_creation"
+ """
),
- groupBy=[
- InfluxDBTime("1d"),
- TagField("power_driver_name"),
- TagField("power_driver_creation"),
- ],
)
],
),
diff --git a/grafana/kpis.dashboard.py b/grafana/kpis.dashboard.py
index 478db6e..f56c8e1 100644
--- a/grafana/kpis.dashboard.py
+++ b/grafana/kpis.dashboard.py
@@ -12,9 +12,27 @@ from grafanalib.core import (
YAxis,
)
-from maaskpi.grafana import Alias, Count, Field, InfluxDBTarget, Select, Tag, TagField
-from maaskpi.grafana import Time as InfluxDBTime
-from maaskpi.grafana import get_datasource
+from maaskpi.dailystats import get_deployment_buckets
+from maaskpi.grafana import InfluxDBTarget, get_datasource
+
+MAAS_VERSION_REGEXPS = (
+ ("1.x", r"1\..*"),
+ ("2.0", r"2\.0\..*"),
+ ("2.1", r"2\.1\..*"),
+ ("2.2", r"2\.2\..*"),
+ ("2.3", r"2\.3\..*"),
+ ("2.4", r"2\.4\..*"),
+ ("2.5", r"2\.5\..*"),
+ ("2.6", r"2\.6\..*"),
+ ("2.7", r"2\.7\..*"),
+ ("2.8", r"2\.8\..*"),
+ ("2.9", r"2\.9\..*"),
+ ("3.0", r"3\.0\..*"),
+ ("3.1", r"3\.1\..*"),
+ ("3.2", r"3\.2\..*"),
+ ("3.3", r"3\.3\..*"),
+ ("3.4", r"3\.4\..*"),
+)
def create_graph(title, targets, **kwargs):
@@ -35,52 +53,19 @@ def create_graph(title, targets, **kwargs):
def create_major_version_target(label, regexp):
return InfluxDBTarget(
- measurement="maas.daily_requests",
- select=Select(
- [
- [
- Field("field_uuid"),
- Count(),
- Alias(label),
- ],
- ]
+ rawQuery=True,
+ query=(
+ rf"""
+ SELECT SUM("count") AS "{label}"
+ FROM maas..daily_requests
+ WHERE ("maas_version" =~ /{regexp}/)
+ AND $timeFilter
+ GROUP BY time(1d)
+ """
),
- tags=[Tag("maas_version", "=~", f"/{regexp}/")],
- groupBy=[InfluxDBTime("1d")],
- alias="$col",
)
-# XXX: This doesn't quite work. It seems to return the correct counts,
-# but all versions are returned, not only top 10.
-top_versions_query = """
- SELECT top("version_count", 10)
- FROM (
- SELECT COUNT("field_uuid") as "version_count"
- FROM "maas.daily_requests" WHERE $timeFilter)
- GROUP BY time(1d), "maas_version"
-"""
-
-ipv6_query = """
- SELECT COUNT("subnets_v6") FROM "maas.daily_stats"
- WHERE $timeFilter AND "subnets_v6" > 0
- GROUP BY time(1d)
-"""
-
-large_maases_query = """
- SELECT "machines" FROM "maas.daily_stats"
- WHERE $timeFilter AND "machines" > 1000
- GROUP BY "uuid"
-"""
-
-by_size_query = """
- SELECT COUNT("machines")
- FROM "maas.daily_stats"
- WHERE $timeFilter AND {}
- GROUP BY time(1d)
-"""
-
-
dashboard = Dashboard(
title="MAAS KPIs",
rows=[
@@ -91,19 +76,17 @@ dashboard = Dashboard(
"MAAS Regions",
[
InfluxDBTarget(
- measurement="maas.daily_requests",
- select=Select(
- [
- [
- Field("field_uuid"),
- Count(),
- Alias("Total number of regions"),
- ],
- ]
+ rawQuery=True,
+ query=(
+ """
+ SELECT SUM("count")
+ FROM maas..daily_requests
+ WHERE $timeFilter
+ GROUP BY time(1d)
+ """
),
- groupBy=[InfluxDBTime("1d")],
- alias="$col",
- )
+ alias="Total number of regions",
+ ),
],
span=12,
),
@@ -111,14 +94,15 @@ dashboard = Dashboard(
"Machines",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
rawQuery=True,
- query="""
+ query=(
+ """
SELECT SUM("machines")
- FROM "maas.daily_stats"
+ FROM maas..daily_stats
WHERE $timeFilter
GROUP BY time(1d)
- """,
+ """
+ ),
alias="Total number of machines",
)
],
@@ -127,14 +111,15 @@ dashboard = Dashboard(
"Devices",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
rawQuery=True,
- query="""
+ query=(
+ """
SELECT SUM("devices")
- FROM "maas.daily_stats"
+ FROM maas..daily_stats
WHERE $timeFilter
GROUP BY time(1d)
- """,
+ """
+ ),
alias="Total number of devices",
)
],
@@ -143,36 +128,39 @@ dashboard = Dashboard(
"Controllers",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
rawQuery=True,
- query="""
+ query=(
+ """
SELECT SUM("regionrack_controllers")
- FROM "maas.daily_stats"
+ FROM maas..daily_stats
WHERE $timeFilter
GROUP BY time(1d)
- """,
+ """
+ ),
alias="Total number of region+rack controllers",
),
InfluxDBTarget(
- measurement="maas.daily_stats",
rawQuery=True,
- query="""
+ query=(
+ """
SELECT SUM("region_controllers")
- FROM "maas.daily_stats"
+ FROM maas..daily_stats
WHERE $timeFilter
GROUP BY time(1d)
- """,
+ """
+ ),
alias="Total number of region controllers",
),
InfluxDBTarget(
- measurement="maas.daily_stats",
rawQuery=True,
- query="""
+ query=(
+ """
SELECT SUM("rack_controllers")
- FROM "maas.daily_stats"
+ FROM maas..daily_stats
WHERE $timeFilter
GROUP BY time(1d)
- """,
+ """
+ ),
alias="Total number of rack controllers",
),
],
@@ -186,21 +174,19 @@ dashboard = Dashboard(
create_graph(
"Major versions",
[
- create_major_version_target("1.x", r"1\..*"),
- create_major_version_target("2.0", r"2\.0\..*"),
- create_major_version_target("2.1", r"2\.1\..*"),
- create_major_version_target("2.2", r"2\.2\..*"),
- create_major_version_target("2.3", r"2\.3\..*"),
- create_major_version_target("2.4", r"2\.4\..*"),
- create_major_version_target("2.5", r"2\.5\..*"),
- create_major_version_target("2.6", r"2\.6\..*"),
- create_major_version_target("2.7", r"2\.7\..*"),
- create_major_version_target("2.8", r"2\.8\..*"),
- create_major_version_target("2.9", r"2\.9\..*"),
- create_major_version_target("3.0", r"3\.0\..*"),
- create_major_version_target("3.1", r"3\.1\..*"),
- create_major_version_target("3.2", r"3\.2\..*"),
- create_major_version_target("3.3", r"3\.3\..*"),
+ InfluxDBTarget(
+ rawQuery=True,
+ query=(
+ rf"""
+ SELECT SUM("count") AS "{label}"
+ FROM maas..daily_requests
+ WHERE ("maas_version" =~ /{regexp}/)
+ AND $timeFilter
+ GROUP BY time(1d)
+ """
+ ),
+ )
+ for label, regexp in MAAS_VERSION_REGEXPS
],
stack=True,
),
@@ -208,9 +194,21 @@ dashboard = Dashboard(
"Top versions",
[
InfluxDBTarget(
- measurement="maas.daily_requests",
rawQuery=True,
- query=top_versions_query,
+ # XXX: This doesn't quite work. It seems to return
+ # the correct counts, but all versions are
+ # returned, not only top 10.
+ query=(
+ """
+ SELECT top("version_count", 10)
+ FROM (
+ SELECT SUM("count") AS "version_count"
+ FROM maas..daily_requests
+ WHERE $timeFilter
+ )
+ GROUP BY time(1d), "maas_version"
+ """
+ ),
alias="$tag_maas_version",
),
],
@@ -225,17 +223,15 @@ dashboard = Dashboard(
"Image downloads by architecture",
[
InfluxDBTarget(
- measurement="maas.image_downloads",
- query=top_versions_query,
- select=Select(
- [
- [
- Field("field_uuid"),
- Count(),
- ],
- ]
+ rawQuery=True,
+ query=(
+ """
+ SELECT SUM("count")
+ FROM maas..image_downloads
+ WHERE $timeFilter
+ GROUP BY time(1d), "architecture"
+ """
),
- groupBy=[InfluxDBTime("1d"), TagField("architecture")],
alias="$tag_architecture",
),
],
@@ -245,17 +241,15 @@ dashboard = Dashboard(
"Image downloads by series",
[
InfluxDBTarget(
- measurement="maas.image_downloads",
- query=top_versions_query,
- select=Select(
- [
- [
- Field("field_uuid"),
- Count(),
- ],
- ]
+ rawQuery=True,
+ query=(
+ """
+ SELECT SUM("count")
+ FROM maas..image_downloads
+ WHERE $timeFilter
+ GROUP BY time(1d), "series"
+ """
),
- groupBy=[InfluxDBTime("1d"), TagField("series")],
alias="$tag_series",
),
],
@@ -270,9 +264,16 @@ dashboard = Dashboard(
"IPv6",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
rawQuery=True,
- query=ipv6_query,
+ query=(
+ """
+ SELECT SUM("deployment_count")
+ FROM maas..daily_stats
+ WHERE $timeFilter
+ AND "subnets_v6" > 0
+ GROUP BY time(1d)
+ """
+ ),
alias="Deployments with IPv6",
),
],
@@ -281,15 +282,17 @@ dashboard = Dashboard(
"CentOS",
[
InfluxDBTarget(
- measurement="maas.image_downloads",
rawQuery=True,
- query="""
- SELECT COUNT("field_uuid") FROM "maas.image_downloads"
- WHERE $timeFilter AND
- "series" = '8' OR
- "series" = 'centos70'
+ query=(
+ """
+ SELECT SUM("count")
+ FROM maas..image_downloads
+ WHERE $timeFilter
+ AND "series" = '8'
+ OR "series" = 'centos70'
GROUP BY time(1d)
- """,
+ """
+ ),
alias="Deployments downloading CentOS",
),
],
@@ -304,9 +307,17 @@ dashboard = Dashboard(
"MAAS deployments with over 1k machines",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
rawQuery=True,
- query=large_maases_query,
+ query=(
+ """
+ SELECT "machines"
+ FROM maas..daily_stats
+ WHERE $timeFilter
+ AND "uuid" != ''
+ AND "machines" > 1000
+ GROUP BY "uuid"
+ """
+ ),
alias="$tag_uuid",
),
],
@@ -318,49 +329,17 @@ dashboard = Dashboard(
"MAAS deployments by size",
[
InfluxDBTarget(
- measurement="maas.daily_stats",
- rawQuery=True,
- query=by_size_query.format('"machines" = 0'),
- alias="0 Machines",
- ),
- InfluxDBTarget(
- measurement="maas.daily_stats",
- rawQuery=True,
- query=by_size_query.format(
- '"machines" > 0 AND "machines" <= 10'
- ),
- alias="1-10 Machines",
- ),
- InfluxDBTarget(
- measurement="maas.daily_stats",
rawQuery=True,
- query=by_size_query.format(
- '"machines" > 10 AND "machines" <= 20'
+ query=(
+ f"""
+ SELECT "count"
+ FROM maas..deployment_size
+ WHERE $timeFilter AND size = '{bucket.tag}'
+ """
),
- alias="11-20 Machines",
- ),
- InfluxDBTarget(
- measurement="maas.daily_stats",
- rawQuery=True,
- query=by_size_query.format(
- '"machines" > 20 AND "machines" <= 50'
- ),
- alias="21-50 Machines",
- ),
- InfluxDBTarget(
- measurement="maas.daily_stats",
- rawQuery=True,
- query=by_size_query.format(
- '"machines" > 50 AND "machines" <= 100'
- ),
- alias="51-100 Machines",
- ),
- InfluxDBTarget(
- measurement="maas.daily_stats",
- rawQuery=True,
- query=by_size_query.format('"machines" > 100'),
- alias="101+ Machines",
- ),
+ alias=bucket.message,
+ )
+ for bucket in get_deployment_buckets()
],
stack=True,
span=12,
diff --git a/maaskpi/dailystats.py b/maaskpi/dailystats.py
index 2573fe1..a0af3ac 100644
--- a/maaskpi/dailystats.py
+++ b/maaskpi/dailystats.py
@@ -3,7 +3,7 @@ import calendar
import gzip
import json
import re
-from collections import defaultdict
+from collections import Counter, defaultdict
from dataclasses import asdict, dataclass, field, fields
from datetime import date, datetime, timedelta
from pathlib import Path
@@ -93,18 +93,12 @@ class DailyImageDownloadsSeries(SeriesHelper):
"""A request to download an image from images.maas.io."""
class Meta:
- series_name = "maas.image_downloads"
- # The field_uuid is only there since the measurement needs to
- # have at least one field. If another field is added, the
- # field_uuid one can be removed.
- fields = [
- "field_uuid",
- ]
+ series_name = "image_downloads"
+ fields = ["count"]
tags = [
"maas_version",
"series",
"architecture",
- "uuid",
]
autocommit = False
@@ -119,19 +113,36 @@ class DailyRequestsSeries(SeriesHelper):
"""
class Meta:
- series_name = "maas.daily_requests"
- # The field_uuid is only there since the measurement needs to
- # have at least one field. If another field is added, the
- # field_uuid one can be removed.
- fields = [
- "field_uuid",
- ]
- tags = ["maas_version", "uuid"]
+ series_name = "daily_requests"
+ fields = ["count"]
+ tags = ["maas_version"]
+ autocommit = False
+
+
+class DailyDeploymentSizeSeries(SeriesHelper):
+ """Bucket-based size of deployments.
+
+ This records the number of deployments in each size bucket, by number of
+ registered machines.
+ """
+
+ class Meta:
+ series_name = "deployment_size"
+ fields = ["count"]
+ tags = ["size"]
autocommit = False
+class StatsMixin:
+ """Mixin class for stats."""
+
+ def update_stats(self, other_stats):
+ for stat in (field.name for field in fields(self)):
+ setattr(self, stat, getattr(self, stat) + getattr(other_stats, stat))
+
+
@dataclass
-class DeploymentStats:
+class DeploymentStats(StatsMixin):
"""Stats for a deployment."""
machines: int = 0
@@ -164,11 +175,13 @@ class DeploymentStats:
vm_hosts_virsh_utilized_memory: int = 0
subnets_v4: int = 0
subnets_v6: int = 0
+ # number of deployments this stat is aggregating counts for
+ deployment_count: int = 1
class DailyStatsSeries(SeriesHelper):
class Meta:
- series_name = "maas.daily_stats"
+ series_name = "daily_stats"
fields = [field.name for field in fields(DeploymentStats)]
tags = ["maas_version", "uuid"]
autocommit = False
@@ -176,10 +189,8 @@ class DailyStatsSeries(SeriesHelper):
class DailyPowerDriverSeries(SeriesHelper):
class Meta:
- series_name = "maas.daily_power_drivers"
- fields = [
- "count",
- ]
+ series_name = "daily_power_drivers"
+ fields = ["count", "deployment_count"]
tags = ["maas_version", "uuid", "power_driver_name", "power_driver_creation"]
autocommit = False
@@ -321,6 +332,36 @@ def get_bmc_stats(data):
return data.get("bmcs", default)
+@dataclass
+class DeploymentSizeBucket:
+ """A bucket counting number of deployments by machine size."""
+
+ min: int
+ max: int | None = None
+ count: int = 0
+ tag: str = field(init=False)
+ message: str = field(init=False)
+
+ def __post_init__(self):
+ self.tag = f"{self.min}+" if self.max is None else f"{self.min}-{self.max}"
+ interval = str(self.max) if self.min == self.max else self.tag
+ self.message = f"{interval} Machines"
+
+
+def get_deployment_buckets():
+ return [
+ DeploymentSizeBucket(min=min_value, max=max_value)
+ for min_value, max_value in (
+ (0, 0),
+ (1, 10),
+ (11, 20),
+ (21, 50),
+ (51, 100),
+ (100, None),
+ )
+ ]
+
+
class DailyStats:
"""Keep track of stats from MAAS deployment send for a single day.
@@ -328,12 +369,14 @@ class DailyStats:
is kept.
"""
- def __init__(self, day):
+ def __init__(self, day, top_deploys_count):
self.day = day
+ self.top_deploys_count = top_deploys_count
self.entries = defaultdict(dict)
self.image_entries = defaultdict(set)
self.no_uuids = []
self.with_data = 0
+ self.deployment_size_buckets = get_deployment_buckets()
def process_item(self, item: LogItem):
"""Process a log line and update the internal data structure."""
@@ -376,43 +419,106 @@ class DailyStats:
f"{self.day}: {len(self.entries)} entries, {len(self.no_uuids)} "
f"without uuids, {self.with_data} with data"
)
- timestamp = get_nanosecond_timestamp(self.day)
+
+ top_deploys = self._get_top_deployments()
+
+ requests_series = defaultdict(int)
+ # track [count, deployment_count]
+ power_driver_series = defaultdict(lambda: [0, 0])
+ stats_series = defaultdict(DeploymentStats)
+ image_download_series = defaultdict(int)
+
for uuid, info in self.entries.items():
- DailyRequestsSeries(
- time=timestamp,
- maas_version=info["version"],
- uuid=uuid,
- field_uuid=uuid,
- )
+ self._update_deployments_size_count(info)
+
+ if uuid not in top_deploys:
+ uuid = ""
+
+ version = info["version"]
+ requests_series[version] += 1
+
if bmc_stats := info.get("bmc_stats"):
for creation_method, bmcs in bmc_stats.items():
for driver_name, count in bmcs.items():
- DailyPowerDriverSeries(
- time=timestamp,
- maas_version=info["version"],
- uuid=uuid,
- count=count,
- power_driver_name=driver_name,
- power_driver_creation=creation_method,
- )
+ counts = power_driver_series[
+ uuid, version, driver_name, creation_method
+ ]
+ counts[0] += count
+ counts[1] += 1 # deployment count
if stats := info.get("stats"):
- DailyStatsSeries(
- time=timestamp,
- maas_version=info["version"],
- uuid=uuid,
- **asdict(stats),
- )
-
- for uuid, image_requests in sorted(self.image_entries.items()):
+ stats_series[uuid, version].update_stats(stats)
+
+ for uuid, image_requests in self.image_entries.items():
for maas_version, series, architecture in image_requests:
- DailyImageDownloadsSeries(
- time=timestamp,
- maas_version=maas_version,
- uuid=uuid,
- field_uuid=uuid,
- series=series,
- architecture=architecture,
- )
+ image_download_series[maas_version, series, architecture] += 1
+
+ # create influxdb series
+ timestamp = get_nanosecond_timestamp(self.day)
+ for version, count in requests_series.items():
+ DailyRequestsSeries(
+ time=timestamp,
+ maas_version=version,
+ count=count,
+ )
+ for (
+ version,
+ series,
+ architecture,
+ ), count in image_download_series.items():
+ DailyImageDownloadsSeries(
+ time=timestamp,
+ maas_version=version,
+ series=series,
+ architecture=architecture,
+ count=count,
+ )
+ for (
+ uuid,
+ version,
+ driver_name,
+ creation_method,
+ ), (count, deployment_count) in power_driver_series.items():
+ DailyPowerDriverSeries(
+ time=timestamp,
+ maas_version=version,
+ uuid=uuid,
+ power_driver_name=driver_name,
+ power_driver_creation=creation_method,
+ count=count,
+ deployment_count=deployment_count,
+ )
+ for (uuid, version), stats in stats_series.items():
+ DailyStatsSeries(
+ time=timestamp,
+ maas_version=version,
+ uuid=uuid,
+ **asdict(stats),
+ )
+ for bucket in self.deployment_size_buckets:
+ DailyDeploymentSizeSeries(
+ time=timestamp,
+ size=bucket.tag,
+ count=bucket.count,
+ )
+
+ def _get_top_deployments(self) -> set[str]:
+ counter = Counter(
+ {
+ uuid: info["stats"].machines if "stats" in info else 0
+ for uuid, info in self.entries.items()
+ }
+ )
+ return set(uuid for uuid, _ in counter.most_common(self.top_deploys_count))
+
+ def _update_deployments_size_count(self, info):
+ stats = info.get("stats")
+ if stats is None:
+ return
+ # this assumes buckts are ordered
+ for bucket in self.deployment_size_buckets:
+ if bucket.max is None or stats.machines <= bucket.max:
+ bucket.count += 1
+ return
class LogFilesHost:
@@ -555,12 +661,10 @@ class DailyStatsCollector(Collector):
self.parser.add_argument(
"--swift-key",
default="",
- nargs="?",
help="Path to the file containing the Swift key",
)
self.parser.add_argument(
"--cache-dir",
- nargs="?",
help="Path to the dir to cache the downloaded logs",
)
self.parser.add_argument(
@@ -568,11 +672,20 @@ class DailyStatsCollector(Collector):
"--days",
# Go back a few days by default, since the logs for the last
# day or two usually are missing.
- default="3",
+ default=3,
type=int,
- nargs="?",
help="Days back to collect metrics for",
)
+ self.parser.add_argument(
+ "--top-deploys",
+ default=50,
+ type=int,
+ help=(
+ "Number of deployments to report individually "
+ "(top ones by machine count). "
+ "Others will be aggregated"
+ ),
+ )
def run_collect(self, args):
if args.swift_key:
@@ -594,9 +707,9 @@ class DailyStatsCollector(Collector):
assert args.cache_dir, "Have to specify either swift or cache."
swift = None
- return self.collect(args.days, swift, args.cache_dir)
+ return self.collect(args.days, args.top_deploys, swift, args.cache_dir)
- def collect(self, days, swift, cache_dir):
+ def collect(self, days, top_deploys_count, swift, cache_dir):
log_files = LogFiles(swift, days, cache_dir)
log_files.init()
if not log_files.hosts:
@@ -617,7 +730,7 @@ class DailyStatsCollector(Collector):
# This has the effect that we won't process the last day,
# which most likely will be incomplete.
day_stats.create_series()
- day_stats = DailyStats(log_files.current_day)
+ day_stats = DailyStats(log_files.current_day, top_deploys_count)
for item in items:
day_stats.process_item(item)
@@ -625,6 +738,7 @@ class DailyStatsCollector(Collector):
yield DailyRequestsSeries
yield DailyImageDownloadsSeries
yield DailyPowerDriverSeries
+ yield DailyDeploymentSizeSeries
run = DailyStatsCollector().run
diff --git a/maaskpi/grafana.py b/maaskpi/grafana.py
index 5659492..2dd7d32 100644
--- a/maaskpi/grafana.py
+++ b/maaskpi/grafana.py
@@ -1,4 +1,5 @@
import os
+import textwrap
import attr
from grafanalib.core import Graph as GrafanaGraph
@@ -43,7 +44,7 @@ class InfluxDBTarget(object):
}
if self.rawQuery:
data["rawQuery"] = self.rawQuery
- data["query"] = self.query
+ data["query"] = textwrap.dedent(self.query).strip()
else:
data["select"] = self.select.to_json_data()
return data