canonical-ubuntu-qa team mailing list archive

Thread
Date

[Merge] ~hyask/autopkgtest-cloud:skia/stats_include_flavor into autopkgtest-cloud:master

To: mp+476862@xxxxxxxxxxxxxxxxxx
From: Skia <mp+476862@xxxxxxxxxxxxxxxxxx>
Date: Wed, 20 Nov 2024 13:58:21 -0000
Reply-to: mp+476862@xxxxxxxxxxxxxxxxxx
Sender: noreply@xxxxxxxxxxxxx

Skia has proposed merging ~hyask/autopkgtest-cloud:skia/stats_include_flavor into autopkgtest-cloud:master.

Requested reviews:
  Canonical's Ubuntu QA (canonical-ubuntu-qa)

For more details, see:
https://code.launchpad.net/~hyask/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/476862

Describe how to compute the quota we need, and back that with some `stats.ipynb` data analysis.
-- 
Your team Canonical's Ubuntu QA is requested to review the proposed merge of ~hyask/autopkgtest-cloud:skia/stats_include_flavor into autopkgtest-cloud:master.

diff --git a/dev-tools/stats.ipynb b/dev-tools/stats.ipynb
index fd898f2..90d513c 100644
--- a/dev-tools/stats.ipynb
+++ b/dev-tools/stats.ipynb
@@ -44,7 +44,7 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "# Update this path with the corresponding path to the database you want to analyze\n",
-    "db_path = \"./autopkgtest_2024-06-18 15:20:42.817741_with_stats.db\"\n",
+    "db_path = \"./autopkgtest_2024-11-20 11:25:43.562429_with_stats.db\"\n",
     "\n",
     "db = sqlite3.connect(f\"file:{db_path}?mode=ro\")\n",
     "sqlite3.paramstyle = \"named\"\n",
@@ -253,6 +253,75 @@
     "    plt.show()\n",
     "    print(df)\n"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b9a948b3",
+   "metadata": {},
+   "source": [
+    "## Flavors distribution for a single datacenter-arch\n",
+    "\n",
+    "When a quota issue arises, it's useful to have a look at the proportion of regular tests vs `big_packages` ones. It's datacenter-arch specific for easier readability, but it's sometimes useful to remove that restriction.\n",
+    "\n",
+    "Remember that this cells requires you to set the `datacenter` and `arch` at the beginning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6eba9dcd",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "with db as db_con:\n",
+    "    db_con.row_factory = sqlite3.Row\n",
+    "\n",
+    "    datacenter = \"bos03\"\n",
+    "    arch = \"amd64\"\n",
+    "  \n",
+    "    query = f\"\"\"\n",
+    "    SELECT flavor, substr(result.run_id, 1, 8) as date\n",
+    "    FROM tests_stats\n",
+    "    JOIN result ON result.uuid=tests_stats.uuid\n",
+    "    JOIN test ON test.id=result.test_id\n",
+    "    WHERE arch = '{arch}' AND datacenter = '{datacenter}'\n",
+    "    AND flavor IS NOT NULL\n",
+    "    ORDER BY date\n",
+    "    \"\"\"\n",
+    "    df = pd.read_sql_query(query, db_con)\n",
+    "    # Get the date as datetime object\n",
+    "    df[\"date\"] = pd.to_datetime(df.date)\n",
+    "    \n",
+    "    # Display data as a graph\n",
+    "    plt.figure(figsize=(14, 5))\n",
+    "\n",
+    "    # Plot point for each dc-arch over time\n",
+    "    for flavor in sorted(df['flavor'].unique()):\n",
+    "        flavor_data = df[df['flavor'] == flavor]\n",
+    "        flavor_data = flavor_data.groupby(\"date\").count()\n",
+    "        print(flavor, flavor_data)\n",
+    "        plt.plot(flavor_data, 'o-', drawstyle='steps-post', label=flavor)\n",
+    "\n",
+    "    # Add some title and labels\n",
+    "    plt.title(f'Flavor counts over time')\n",
+    "    plt.xlabel('Date')\n",
+    "    plt.ylabel('Flavor counts')\n",
+    "    plt.legend()\n",
+    "\n",
+    "    # Plot the graph\n",
+    "    plt.show()\n",
+    "    print(df)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d8413a8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -271,7 +340,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.3"
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,
diff --git a/dev-tools/stats.py b/dev-tools/stats.py
index 82f6f43..7edb897 100755
--- a/dev-tools/stats.py
+++ b/dev-tools/stats.py
@@ -40,6 +40,7 @@ class Run:
         self.lxd_image = None
         self.first_boot_time = None
         self.boot_attempts = None
+        self.flavor = None
 
     def __str__(self):
         return (
@@ -54,6 +55,7 @@ class Run:
             "host": self.host,
             "datacenter": self.datacenter,
             "lxd_image": self.lxd_image,
+            "flavor": self.flavor,
         }
 
     def log_url(self):
@@ -99,6 +101,7 @@ class Run:
         self.extract_datacenter()
         self.extract_first_boot_time()
         self.extract_boot_attempts()
+        self.extract_flavor()
 
     host_pattern = re.compile(
         r"host juju-7f2275-prod-proposed-migration-environment-(\d);"
@@ -173,6 +176,18 @@ class Run:
         except Exception as e:
             logging.debug(e)
 
+    flavor_pattern = re.compile(r"--flavor (\S*)")
+
+    def extract_flavor(self):
+        try:
+            lines = [l for l in self.log_lines if "--flavor" in l]
+            if lines:
+                m = self.flavor_pattern.search(lines[0])
+                if m:
+                    self.flavor = m.group(1)
+        except Exception as e:
+            logging.debug(e)
+
 
 def get_stats(db_con, since_days_ago, until_days_ago, limit):
     try:
@@ -184,6 +199,7 @@ def get_stats(db_con, since_days_ago, until_days_ago, limit):
             "  datacenter CHAR[10],"
             "  lxd_image CHAR[10],"
             "  host CHAR[10],"
+            "  flavor CHAR[32],"
             "  PRIMARY KEY(uuid))"
         )
         logging.info("Created tests_stats table")
@@ -228,7 +244,8 @@ def get_stats(db_con, since_days_ago, until_days_ago, limit):
                             boot_attempts,
                             host,
                             datacenter,
-                            lxd_image
+                            lxd_image,
+                            flavor
                         )
                         VALUES (
                             :uuid,
@@ -236,7 +253,8 @@ def get_stats(db_con, since_days_ago, until_days_ago, limit):
                             :boot_attempts,
                             :host,
                             :datacenter,
-                            :lxd_image
+                            :lxd_image,
+                            :flavor
                         )""",
                     r.as_row_dict(),
                 )
diff --git a/docs/deploying.rst b/docs/deploying.rst
index 8a6dd64..7df78cf 100644
--- a/docs/deploying.rst
+++ b/docs/deploying.rst
@@ -131,6 +131,35 @@ Making configuration changes
 
 Edit the ``service-bundle`` file as above, and run ``mojo run`` again.
 
+About cloud environments quotas
+-------------------------------
+
+Each OpenStack environment has a quota, meaning there is a limit to the number
+of instances, cpu cores, RAM, disk, etc, amount that can be spawned at the same
+time. This quota can be seen with the command `nova limits`.
+
+Changing the quota requires IS approval, and how to do that depends on the cloud
+environments, so it's out of scope of this doc.
+Computing the required quota however fits right in here, so let's details a bit what we want.
+
+Let's say we want 100 autopkgtest runners.
+Regular flavor is ``cpu2-ram4-disk20``, so:
+
+  100*2 = 200 cores
+  100*4 = 400GB of RAM
+  100*20 = 2000GB of disk
+
+We take a 10% margin to accommodate "big_packages", that run on
+``cpu4-ram8-disk100``, which gives us:
+
+  220 cores
+  440GB RAM
+  2200GB disk
+
+A 10% margin means we can concretely run 90 regular jobs, and 10 big_packages.
+That should be enough for most cases, as in average, we're more around 2-4% of
+big_packages tests (observed today, 2024-11-20, with ``stats.ipynb``).
+
 autopkgtest-cloud Storage
 ----------------------------