← Back to team overview

nagios-charmers team mailing list archive

[Merge] ~afreiberger/hw-health-charm:add-mdadm-checks into hw-health-charm:master

 

Drew Freiberger has proposed merging ~afreiberger/hw-health-charm:add-mdadm-checks into hw-health-charm:master.

Requested reviews:
  Nagios Charm developers (nagios-charmers)

For more details, see:
https://code.launchpad.net/~afreiberger/hw-health-charm/+git/hw-health-charm/+merge/374838
-- 
Your team Nagios Charm developers is requested to review the proposed merge of ~afreiberger/hw-health-charm:add-mdadm-checks into hw-health-charm:master.
diff --git a/src/files/mdadm/cron_mdadm.py b/src/files/mdadm/cron_mdadm.py
index e93d2b3..f532ade 100755
--- a/src/files/mdadm/cron_mdadm.py
+++ b/src/files/mdadm/cron_mdadm.py
@@ -60,10 +60,12 @@ def parse_output():
     devices_re = r'^(/\S+):$'
     state_re = r'^\s*State\s+:\s+(\S+)$'
     status_re = r'^\s*(Active|Working|Failed|Spare) Devices\s+:\s+(\d+)$'
+    removed_re = r'^\s*-\s+0\s+0\s+(\d+)\s+removed$'
 
     devices_cre = re.compile(devices_re)
     state_cre = re.compile(state_re)
     status_cre = re.compile(status_re)
+    removed_cre = re.compile(removed_re)
 
     device = None
     devices_stats = {}
@@ -77,27 +79,43 @@ def parse_output():
                     'Active': 0,
                     'Working': 0,
                     'Failed': 0,
-                    'Spare': 0
+                    'Spare': 0,
                 },
-                'degraded': False
+                'degraded': False,
+                'recovering': False,
+                'removed': [],
             }
             continue
 
         m = state_cre.match(line)
         if m:
-            if 'degraded' in m.group(1) and device:
+            # format for State line can be "clean" or "clean, degraded" or "active, degraded, rebuilding", etc.
+            states = m.group(1).split(", ")
+            if 'degraded' in states and device:
                 devices_stats[device]['degraded'] = True
+            if 'recovering' in states and device:
+                devices_stats[device]['recovering'] = True
             continue
 
         m = status_cre.match(line)
         if m and device:
             devices_stats[device]['stats'][m.group(1)] = int(m.group(2))
+            continue
+
+        m = removed_cre.match(line)
+        if m and device:
+            devices_stats[device]['removed'].append(m.group(1))
+            continue
 
     msg = []
     critical = False
     for device in devices_stats:
+        parts = []
         # Is device degraded?
-        if devices_stats[device]['degraded']:
+        if devices_stats[device]['degraded'] and devices_stats[device]['recovering']:
+            warning = True
+            parts = ['{} degraded but recovering'.format(device)]
+        elif devices_stats[device]['degraded']:
             critical = True
             parts = ['{} degraded'.format(device)]
         else:
@@ -112,10 +130,18 @@ def parse_output():
                 for status in sorted(devices_stats[device]['stats'])
             ]
             parts.extend(dev_stats)
+
+        if len(devices_stats[device]['removed']) != 0:
+            critical = True
+            for member in devices_stats[device]['removed']:
+                parts.append('RaidDevice {} marked removed from {}'.format(member, device))
+
         msg.append(', '.join(parts))
 
     if critical:
         msg = 'CRITICAL: {}'.format('; '.join(msg))
+    elif warning:
+        msg = 'WARNING: {}'.format('; '.join(msg))
     else:
         msg = 'OK: {}'.format('; '.join(msg))
     return generate_output(msg)

Follow ups