← Back to team overview

nagios-charmers team mailing list archive

[Merge] ~aieri/hw-health-charm:lp/1814863 into hw-health-charm:master

 

Andrea Ieri has proposed merging ~aieri/hw-health-charm:lp/1814863 into hw-health-charm:master.

Requested reviews:
  Nagios Charm developers (nagios-charmers)

For more details, see:
https://code.launchpad.net/~aieri/hw-health-charm/+git/hw-health-charm/+merge/363062
-- 
Your team Nagios Charm developers is requested to review the proposed merge of ~aieri/hw-health-charm:lp/1814863 into hw-health-charm:master.
diff --git a/src/files/mpt/check_sas3ircu.py b/src/files/mpt/check_sas3ircu.py
index a7d0f75..3e49d84 100755
--- a/src/files/mpt/check_sas3ircu.py
+++ b/src/files/mpt/check_sas3ircu.py
@@ -2,101 +2,190 @@
 # -*- coding: us-ascii -*-
 
 import re
-
-from nagios_plugin3 import CriticalError, WarnError, try_check
+from collections import defaultdict
+from nagios_plugin3 import CriticalError, WarnError, UnknownError, try_check
 
 INPUT_FILE = '/var/lib/nagios/sas3ircu.out'
 
 
-def parse_output():
-    volume_re = r'^IR (volume) \d+'
-    volid_re = r'^\s+Volume ID\s*:\s+(\d+)'
-    vol_status_re = r'^\s+Status of volume\s+:\s+(\S+)'
-    vol_phy_re = r'^\s+PHY\[\d+\] Enclosure#/Slot#\s+:\s+(\S+)'
-
-    volid_status_phy_cre = [
-        re.compile(volid_re),
-        re.compile(vol_status_re),
-        re.compile(vol_phy_re),
-    ]
-
-    disk_re = r'^Device is a Hard (disk)'
-    enclosure_re = r'^\s+Enclosure #\s+:\s+(\d+)'
-    slot_re = r'^\s+Slot #\s+:\s+(\d+)'
-    state_re = r'^\s+State\s+:\s+(\S+)'
-
-    vol_disk_cre = [re.compile(volume_re), re.compile(disk_re)]
-
-    encl_slot_state_cre = [
-        re.compile(enclosure_re),
-        re.compile(slot_re),
-        re.compile(state_re),
-    ]
-
-    devices = {}
-    device = []
-    critical = False
-    dev_type = None
-    with open(INPUT_FILE) as devices_raw:
-        for line in devices_raw.readlines():
-            line = line.rstrip()
-            for vd_cre in vol_disk_cre:
-                m = vd_cre.match(line)
-                if m:
-                    dev_type = m.group(1)
-
-            if not dev_type:
-                continue
-
-            elif dev_type == 'volume':
-                for cre in volid_status_phy_cre:
-                    m = cre.match(line)
-                    if m:
-                        device.append(m.group(1))
-
-                if len(device) >= 3 and line.strip() == '':
-                    tmpdev = devices.get(device[1], [])
-                    tmpdev.append('{}:({})'.format(device[0],
-                                                   ','.join(device[2:])))
-                    devices[device[1]] = tmpdev
-                    if 'Okay' not in device:
-                        critical = True
-                    device = []
-                    dev_type = None
-
-            elif dev_type == 'disk':
-                for cre in encl_slot_state_cre:
-                    m = cre.match(line)
-                    if m:
-                        device.append(m.group(1))
-
-                if len(device) == 3:
-                    tmpdev = devices.get(device[2], [])
-                    tmpdev.append('{}:{}'.format(device[0], device[1]))
-                    devices[device[2]] = tmpdev
-                    # Disks that are part of a RAID should be in Optimal state
-                    # JBOD disks can be in Ready state
-                    if 'Optimal' not in device and 'Ready' not in device:
-                        critical = True
-                    device = []
-                    dev_type = None
-
-    # msg = '; '.join(sorted(devices))
-    msg = '; '.join([
-        '{}[{}]'.format(state, ','.join(devices[state]))
-        for state in sorted(devices)
-    ])
-
-    if msg == '':
-        raise WarnError('WARNING: no output')
-    elif critical:
-        raise CriticalError('CRITICAL: {}'.format(msg))
-    else:
-        print('OK: {}'.format(msg))
+def parse_output(input_file):
+    '''
+    Turn the whole sas3ircu output into a dictionary
+    '''
+    sections_re = re.compile(
+        r'(?<=^Controller information\n)'
+        r'-+\n'
+        r'(?P<ctrl>(?:.|\n)*)'
+        r'^-+\n'
+        r'^IR Volume information\n'
+        r'-+\n'
+        r'(?P<vols>(?:.|\n)*)'
+        r'^-+\n'
+        r'^Physical device information\n'
+        r'-+\n'
+        r'(?P<disks>(?:.|\n)*)'
+        r'^-+\n'
+        r'^Enclosure information\n'
+        r'-+\n'
+        r'(?P<encl>(?:.|\n)*)'
+        r'^-+\n',
+        re.MULTILINE
+    )
+    disks_re = re.compile(
+        r'(?<=^Device is a Hard disk\n)(?P<kv_data>(?:.|\n)*?)(?=^$)',
+        re.MULTILINE
+    )
+
+    with open(input_file) as devices_raw:
+        sections = sections_re.search(devices_raw.read()).groupdict()
+        controller = _kv_parse(sections['ctrl'])
+        volumes = _vols_parse(sections['vols'])
+        # This collects disk level information in a structure simulating the
+        # physical encl/slot arrangement
+        topology = defaultdict(dict)
+        for match in disks_re.findall(sections['disks']):
+            disk = _kv_parse(match)
+            encl = disk['Enclosure #']
+            slot = disk['Slot #']
+            topology[encl][slot] = disk
+        enclosure = _kv_parse(sections['encl'])
+
+    return {
+        'controller':  controller,
+        'volumes':     volumes,
+        'disks':       topology,
+        'enclosure':   enclosure
+    }
+
+
+def _vols_parse(text):
+    vols_re = re.compile(
+        r'^IR volume (?P<n>\d+)\n'
+        r'(?P<kv_data>(?:.|\n)*?)'
+        r'\s+Physical hard disks\s+:.*\n'
+        r'(?P<topology>(?:^\s+PHY.*\n)+)',
+        re.MULTILINE
+    )
+    vol_topology_re = re.compile(
+        r'\s+PHY\[(?P<n>\d+)\]\s+Enclosure#\/Slot#\s+'
+        r':\s+(?P<enc>\d+):(?P<slot>\d+)'
+    )
+    volumes = {}
+    for (vol_n, kv_data, vol_topology) in vols_re.findall(text):
+        topology = {}
+        for (member_n, enc, slot) in vol_topology_re.findall(vol_topology):
+            topology[member_n] = {'enc': enc, 'slot': slot}
+        volumes[vol_n] = {**_kv_parse(kv_data), 'topology': topology}
+
+    return volumes
+
+
+def _kv_parse(text):
+    '''
+    Build a dict by parsing text like:
+
+      key1     : value1
+      key2     : value2
+    '''
+    key_value_re = re.compile(
+        r'^\s*(?P<key>.*?)\s+:\s+(?P<value>.*)'
+    )
+    text = text.strip()
+    return {
+        m.group('key'): m.group('value')
+        for m in map(key_value_re.search, text.split('\n'))
+    }
+
+
+def eval_status(data):
+    '''
+    Given a dictionary and a set of rules, determine the state of the storage
+    subsystem
+    '''
+    OK = 'Okay (OKY)'
+    READY = 'Ready (RDY)'
+    OPTIMAL = 'Optimal (OPT)'
+    status = Status()
+
+    # 1. Volumes must be in Okay state
+    for volume in data['volumes'].values():
+        vol_id = volume['Volume ID']
+        vol_status = volume['Status of volume']
+        if vol_status != OK:
+            status.crit("Volume {}: {}".format(vol_id, vol_status))
+        else:
+            # 2. Volume members must be in Optimal state
+            for member in volume['topology'].values():
+                disk = data['disks'][member['enc']][member['slot']]
+                if disk['State'] != OPTIMAL:
+                    msg = "Disk {}:{} {}".format(
+                        member['enc'],
+                        member['slot'],
+                        disk['State']
+                    )
+                    if disk['State'] == READY:
+                        status.warn(msg)
+                    else:
+                        status.crit(msg)
+    # 3. Disks can be in Optimal or Ready state ("ready" is ok for non-RAID
+    # members)
+    for enclosure_id, enclosure in data['disks'].items():
+        for slot_id, slot in enclosure.items():
+            if slot['State'] not in [OPTIMAL, READY]:
+                status.crit("Disk {}:{} {}".format(
+                    enclosure_id,
+                    slot_id,
+                    slot['State']
+                ))
+    status.get_status()
+
+
+class Status:
+    '''
+    Class hiding the whole "CRIT >> WARN >> OK" priority scheme
+    '''
+    def __init__(self, status='OK'):
+        self._status = status
+        self._msgs = set()
+
+    def crit(self, msg):
+        self._status = 'CRITICAL'
+        self._msgs.add(msg)
+
+    def warn(self, msg):
+        if self._status != 'CRITICAL':
+            self._status = 'WARNING'
+        self._msgs.add(msg)
+
+    def ok(self, msg):
+        self._msgs.add(msg)
+
+    def get_status(self):
+        '''
+        Render the current status, rasing nagios_plugin3 exceptions if things
+        are not OK
+        '''
+        if self._status == 'OK':
+            msg = '{}: no errors'.format(self._status)
+            print(msg)
+        else:
+            msg = '{}: {}'.format(self._status,
+                                  ' | '.join(self._msgs))
+            if self._status == 'CRITICAL':
+                raise CriticalError(msg)
+            elif self._status == 'WARNING':
+                raise WarnError(msg)
+            else:
+                # this really shouldn't be happening
+                raise UnknownError(msg)
+
+    def __repr__(self):
+        print(self._status)
 
 
 def main():
-    try_check(parse_output)
+    data = parse_output(INPUT_FILE)
+    try_check(eval_status, data)
 
 
 if __name__ == '__main__':

Follow ups