nagios-charmers team mailing list archive
-
nagios-charmers team
-
Mailing list archive
-
Message #00261
[Merge] ~aieri/hw-health-charm:lp/1814863 into hw-health-charm:master
Andrea Ieri has proposed merging ~aieri/hw-health-charm:lp/1814863 into hw-health-charm:master.
Requested reviews:
Nagios Charm developers (nagios-charmers)
For more details, see:
https://code.launchpad.net/~aieri/hw-health-charm/+git/hw-health-charm/+merge/363062
--
Your team Nagios Charm developers is requested to review the proposed merge of ~aieri/hw-health-charm:lp/1814863 into hw-health-charm:master.
diff --git a/src/files/mpt/check_sas3ircu.py b/src/files/mpt/check_sas3ircu.py
index a7d0f75..3e49d84 100755
--- a/src/files/mpt/check_sas3ircu.py
+++ b/src/files/mpt/check_sas3ircu.py
@@ -2,101 +2,190 @@
# -*- coding: us-ascii -*-
import re
-
-from nagios_plugin3 import CriticalError, WarnError, try_check
+from collections import defaultdict
+from nagios_plugin3 import CriticalError, WarnError, UnknownError, try_check
INPUT_FILE = '/var/lib/nagios/sas3ircu.out'
-def parse_output():
- volume_re = r'^IR (volume) \d+'
- volid_re = r'^\s+Volume ID\s*:\s+(\d+)'
- vol_status_re = r'^\s+Status of volume\s+:\s+(\S+)'
- vol_phy_re = r'^\s+PHY\[\d+\] Enclosure#/Slot#\s+:\s+(\S+)'
-
- volid_status_phy_cre = [
- re.compile(volid_re),
- re.compile(vol_status_re),
- re.compile(vol_phy_re),
- ]
-
- disk_re = r'^Device is a Hard (disk)'
- enclosure_re = r'^\s+Enclosure #\s+:\s+(\d+)'
- slot_re = r'^\s+Slot #\s+:\s+(\d+)'
- state_re = r'^\s+State\s+:\s+(\S+)'
-
- vol_disk_cre = [re.compile(volume_re), re.compile(disk_re)]
-
- encl_slot_state_cre = [
- re.compile(enclosure_re),
- re.compile(slot_re),
- re.compile(state_re),
- ]
-
- devices = {}
- device = []
- critical = False
- dev_type = None
- with open(INPUT_FILE) as devices_raw:
- for line in devices_raw.readlines():
- line = line.rstrip()
- for vd_cre in vol_disk_cre:
- m = vd_cre.match(line)
- if m:
- dev_type = m.group(1)
-
- if not dev_type:
- continue
-
- elif dev_type == 'volume':
- for cre in volid_status_phy_cre:
- m = cre.match(line)
- if m:
- device.append(m.group(1))
-
- if len(device) >= 3 and line.strip() == '':
- tmpdev = devices.get(device[1], [])
- tmpdev.append('{}:({})'.format(device[0],
- ','.join(device[2:])))
- devices[device[1]] = tmpdev
- if 'Okay' not in device:
- critical = True
- device = []
- dev_type = None
-
- elif dev_type == 'disk':
- for cre in encl_slot_state_cre:
- m = cre.match(line)
- if m:
- device.append(m.group(1))
-
- if len(device) == 3:
- tmpdev = devices.get(device[2], [])
- tmpdev.append('{}:{}'.format(device[0], device[1]))
- devices[device[2]] = tmpdev
- # Disks that are part of a RAID should be in Optimal state
- # JBOD disks can be in Ready state
- if 'Optimal' not in device and 'Ready' not in device:
- critical = True
- device = []
- dev_type = None
-
- # msg = '; '.join(sorted(devices))
- msg = '; '.join([
- '{}[{}]'.format(state, ','.join(devices[state]))
- for state in sorted(devices)
- ])
-
- if msg == '':
- raise WarnError('WARNING: no output')
- elif critical:
- raise CriticalError('CRITICAL: {}'.format(msg))
- else:
- print('OK: {}'.format(msg))
+def parse_output(input_file):
+ '''
+ Turn the whole sas3ircu output into a dictionary
+ '''
+ sections_re = re.compile(
+ r'(?<=^Controller information\n)'
+ r'-+\n'
+ r'(?P<ctrl>(?:.|\n)*)'
+ r'^-+\n'
+ r'^IR Volume information\n'
+ r'-+\n'
+ r'(?P<vols>(?:.|\n)*)'
+ r'^-+\n'
+ r'^Physical device information\n'
+ r'-+\n'
+ r'(?P<disks>(?:.|\n)*)'
+ r'^-+\n'
+ r'^Enclosure information\n'
+ r'-+\n'
+ r'(?P<encl>(?:.|\n)*)'
+ r'^-+\n',
+ re.MULTILINE
+ )
+ disks_re = re.compile(
+ r'(?<=^Device is a Hard disk\n)(?P<kv_data>(?:.|\n)*?)(?=^$)',
+ re.MULTILINE
+ )
+
+ with open(input_file) as devices_raw:
+ sections = sections_re.search(devices_raw.read()).groupdict()
+ controller = _kv_parse(sections['ctrl'])
+ volumes = _vols_parse(sections['vols'])
+ # This collects disk level information in a structure simulating the
+ # physical encl/slot arrangement
+ topology = defaultdict(dict)
+ for match in disks_re.findall(sections['disks']):
+ disk = _kv_parse(match)
+ encl = disk['Enclosure #']
+ slot = disk['Slot #']
+ topology[encl][slot] = disk
+ enclosure = _kv_parse(sections['encl'])
+
+ return {
+ 'controller': controller,
+ 'volumes': volumes,
+ 'disks': topology,
+ 'enclosure': enclosure
+ }
+
+
+def _vols_parse(text):
+ vols_re = re.compile(
+ r'^IR volume (?P<n>\d+)\n'
+ r'(?P<kv_data>(?:.|\n)*?)'
+ r'\s+Physical hard disks\s+:.*\n'
+ r'(?P<topology>(?:^\s+PHY.*\n)+)',
+ re.MULTILINE
+ )
+ vol_topology_re = re.compile(
+ r'\s+PHY\[(?P<n>\d+)\]\s+Enclosure#\/Slot#\s+'
+ r':\s+(?P<enc>\d+):(?P<slot>\d+)'
+ )
+ volumes = {}
+ for (vol_n, kv_data, vol_topology) in vols_re.findall(text):
+ topology = {}
+ for (member_n, enc, slot) in vol_topology_re.findall(vol_topology):
+ topology[member_n] = {'enc': enc, 'slot': slot}
+ volumes[vol_n] = {**_kv_parse(kv_data), 'topology': topology}
+
+ return volumes
+
+
+def _kv_parse(text):
+ '''
+ Build a dict by parsing text like:
+
+ key1 : value1
+ key2 : value2
+ '''
+ key_value_re = re.compile(
+ r'^\s*(?P<key>.*?)\s+:\s+(?P<value>.*)'
+ )
+ text = text.strip()
+ return {
+ m.group('key'): m.group('value')
+ for m in map(key_value_re.search, text.split('\n'))
+ }
+
+
+def eval_status(data):
+ '''
+ Given a dictionary and a set of rules, determine the state of the storage
+ subsystem
+ '''
+ OK = 'Okay (OKY)'
+ READY = 'Ready (RDY)'
+ OPTIMAL = 'Optimal (OPT)'
+ status = Status()
+
+ # 1. Volumes must be in Okay state
+ for volume in data['volumes'].values():
+ vol_id = volume['Volume ID']
+ vol_status = volume['Status of volume']
+ if vol_status != OK:
+ status.crit("Volume {}: {}".format(vol_id, vol_status))
+ else:
+ # 2. Volume members must be in Optimal state
+ for member in volume['topology'].values():
+ disk = data['disks'][member['enc']][member['slot']]
+ if disk['State'] != OPTIMAL:
+ msg = "Disk {}:{} {}".format(
+ member['enc'],
+ member['slot'],
+ disk['State']
+ )
+ if disk['State'] == READY:
+ status.warn(msg)
+ else:
+ status.crit(msg)
+ # 3. Disks can be in Optimal or Ready state ("ready" is ok for non-RAID
+ # members)
+ for enclosure_id, enclosure in data['disks'].items():
+ for slot_id, slot in enclosure.items():
+ if slot['State'] not in [OPTIMAL, READY]:
+ status.crit("Disk {}:{} {}".format(
+ enclosure_id,
+ slot_id,
+ slot['State']
+ ))
+ status.get_status()
+
+
+class Status:
+ '''
+ Class hiding the whole "CRIT >> WARN >> OK" priority scheme
+ '''
+ def __init__(self, status='OK'):
+ self._status = status
+ self._msgs = set()
+
+ def crit(self, msg):
+ self._status = 'CRITICAL'
+ self._msgs.add(msg)
+
+ def warn(self, msg):
+ if self._status != 'CRITICAL':
+ self._status = 'WARNING'
+ self._msgs.add(msg)
+
+ def ok(self, msg):
+ self._msgs.add(msg)
+
+ def get_status(self):
+ '''
+ Render the current status, rasing nagios_plugin3 exceptions if things
+ are not OK
+ '''
+ if self._status == 'OK':
+ msg = '{}: no errors'.format(self._status)
+ print(msg)
+ else:
+ msg = '{}: {}'.format(self._status,
+ ' | '.join(self._msgs))
+ if self._status == 'CRITICAL':
+ raise CriticalError(msg)
+ elif self._status == 'WARNING':
+ raise WarnError(msg)
+ else:
+ # this really shouldn't be happening
+ raise UnknownError(msg)
+
+ def __repr__(self):
+ print(self._status)
def main():
- try_check(parse_output)
+ data = parse_output(INPUT_FILE)
+ try_check(eval_status, data)
if __name__ == '__main__':
Follow ups