nagios-charmers team mailing list archive
-
nagios-charmers team
-
Mailing list archive
-
Message #00368
Re: [Merge] ~aieri/hw-health-charm:oo-rewrite-core into hw-health-charm:oo-rewrite-integration
Comments addressed.
Lint and unittests pass.
Diff comments:
> diff --git a/src/lib/hwhealth/classes.py b/src/lib/hwhealth/classes.py
> new file mode 100644
> index 0000000..1884059
> --- /dev/null
> +++ b/src/lib/hwhealth/classes.py
Makes sense. I was going for "classes for the hwhealth charm" but tools.py or tool_classes.py would be better.
> @@ -0,0 +1,298 @@
> +import os
> +import shutil
> +import subprocess
> +from zipfile import ZipFile, BadZipFile
> +from tempfile import TemporaryDirectory
> +from charmhelpers import fetch
> +from charmhelpers.core import hookenv
> +from charmhelpers.core.hookenv import status_set
> +from charmhelpers.contrib.charmsupport import nrpe
> +
> +
> +class Tool():
> + '''
> + This is an abstract class representing a "tool".
> + The idea is to delegate install/configure duties to specific per-tool
> + classes instead of handling multiple exceptional cases in a main loop
> + during the install hook.
> + Every tool should implement its own internal logic regarding how to be
> + installed, configured, and removed.
> + '''
> + NRPE_PLUGINS_DIR = '/usr/local/lib/nagios/plugins'
> + NRPE_PLUGINS_MODE = 0o100755
> + NRPE_PLUGINS_UID = 0
> + NRPE_PLUGINS_GID = 0
> + TOOLS_DIR = '/usr/local/bin'
> + TOOLS_MODE = 0o100755
> + TOOLS_UID = 0
> + TOOLS_GID = 0
> + CROND_DIR = '/etc/cron.d'
> + CRONJOB_SCRIPT_MODE = 0o100755
> + CRONJOB_SCRIPT_UID = 0
> + CRONJOB_SCRIPT_GID = 0
> + SUDOERS_DIR = '/etc/sudoers.d'
> + SUDOERS_MODE = 0o100440
> + SUDOERS_UID = 0
> + SUDOERS_GID = 0
> +
> + def __init__(self, shortname, nrpe_opts=''):
> + self._nagios_hostname = nrpe.get_nagios_hostname()
> + self._nrpe_opts = nrpe_opts
> + self._shortname = shortname
> + self._files_dir = os.path.join(hookenv.charm_dir(),
> + 'files',
> + self._shortname)
> + self._nrpe_script = 'check_{}'.format(shortname)
> +
> + def _install_nrpe_plugin(self):
> + src = os.path.join(self._files_dir, self._nrpe_script)
> + dst = shutil.copy(src, self.NRPE_PLUGINS_DIR)
> + os.chmod(dst, self.NRPE_PLUGINS_MODE)
> + os.chown(dst, uid=self.NRPE_PLUGINS_UID, gid=self.NRPE_PLUGINS_GID)
> + hookenv.log('NRPE script for tool [{}] installed at as {}'
> + ''.format(self._shortname, dst),
> + hookenv.DEBUG)
> + return dst
> +
> + def _remove_nrpe_plugin(self):
> + plugin_path = os.path.join(self.NRPE_PLUGINS_DIR, self._nrpe_script)
> + os.remove(plugin_path)
> + hookenv.log('deleted NRPE script for tool [{}]'
> + ''.format(self._shortname),
> + hookenv.DEBUG)
> +
> + def configure_nrpe_check(self):
> + nrpe_setup = nrpe.NRPE(hostname=self._nagios_hostname, primary=False)
> + cmd = ' '.join([self._nrpe_script, self._nrpe_opts])
> + nrpe_setup.add_check(
> + shortname=self._shortname,
> + description='{} Hardware Health',
> + check_cmd=cmd
> + )
> + nrpe_setup.write()
> + hookenv.log('configured NRPE check for tool [{}]'
> + ''.format(self._shortname),
> + hookenv.DEBUG)
> +
> + def remove_nrpe_check(self):
> + nrpe_setup = nrpe.NRPE(hostname=self._nagios_hostname, primary=False)
> + cmd = ' '.join([self._nrpe_script, self._nrpe_opts])
> + nrpe_setup.remove_check(
> + shortname=self._shortname,
> + check_cmd=cmd
> + )
> + nrpe_setup.write()
> + hookenv.log('removed NRPE check for tool [{}]'
> + ''.format(self._shortname),
> + hookenv.DEBUG)
> +
> + def install(self):
> + self._install_nrpe_plugin()
> + hookenv.log('Installed tool [{}]'.format(self._shortname))
> +
> + def remove(self):
> + self.remove_nrpe_check()
> + self._remove_nrpe_plugin()
> + hookenv.log('Removed tool [{}]'.format(self._shortname))
> +
> +
> +class VendorTool(Tool):
> + '''
> + A Vendor tool is a statically linked binary we ship via a resource.
> + Since it generally requires root privileges to run, we also deploy a
> + cronjob that runs as root and saves the tool output in a temporary file
> + that nrpe can read (as nagios user).
> + '''
> + def __init__(self, shortname):
> + super().__init__(shortname=shortname)
> + self._cronjob_script = 'check_{}.sh'.format(shortname)
> + self._nrpe_script = 'check_{}.py'.format(shortname)
> +
> + def install(self):
> + self._install_from_resource()
> + # TODO(aluria): install vendor utilities via snaps
> + # https://forum.snapcraft.io/t/request-for-classic-confinement-sas2ircu/9023
> + # Snap interface for hw tooling is not supported
This TODO is effectively already covered by https://bugs.launchpad.net/hw-health-charm/+bug/1809140, so I'll just remove it
> + # in strict mode (confined). Classic mode
> + # (unconfined snaps) need approval
> + # Install the cronjob that generates the temp output
> + self._install_cronjob()
> + super().install()
> +
> + def remove(self):
> + self._remove_cronjob()
> + self._remove_binary()
> + super().remove()
> +
> + def _install_from_resource(self):
> + resource = hookenv.resource_get('tools')
> + if resource:
Yeah, inverting the if makes sense, but I'm personally not a fan of the if/return pattern (in general). Since the extra indentation level doesn't cause weird wrappings I'll keep it
> + hookenv.log('Installing tool [{}] from resource'
> + ''.format(self._shortname),
> + hookenv.DEBUG)
> + status_set('waiting', 'Installing from attached resource')
> + # Move in from a temp directory to be atomic
> + with TemporaryDirectory() as tmpdir:
> + try:
> + with ZipFile(resource, 'r') as zipfile:
> + tmpfile = zipfile.extract(self._shortname, tmpdir)
> + # We could just use self.TOOLS_DIR as a destination
> + # here, but shutil.move refuses to overwrite the
> + # destination file unless it receives a full path
> + dst = shutil.move(tmpfile,
+1
> + os.path.join(self.TOOLS_DIR,
> + self._shortname))
> + os.chmod(dst, self.TOOLS_MODE)
> + os.chown(dst, uid=self.TOOLS_UID, gid=self.TOOLS_GID)
> +
> + except BadZipFile as error:
> + hookenv.log('BadZipFile: {}'.format(error), hookenv.ERROR)
> +
> + except PermissionError as error:
> + hookenv.log('Unable to unzip tool {} from the provided '
> + 'resource: {}'.format(self._shortname, error),
> + hookenv.ERROR)
> + except KeyError as error:
> + hookenv.log('Tool {} not found in the provided resource: '
> + '{}'.format(self._shortname, error),
> + hookenv.ERROR)
> + status_set('blocked',
> + 'Tool {} not found'.format(self._shortname))
> +
> + else:
> + hookenv.log(
> + 'Missing Juju resource: tools - alternative method is not '
> + 'available yet', hookenv.ERROR)
> + status_set('blocked', 'Missing Juju resource: tools')
> +
> + def _remove_binary(self):
> + binary_path = os.path.join(self.TOOLS_DIR, self._shortname)
> + os.remove(binary_path)
> + hookenv.log('Removed binary tool {}'.format(binary_path),
> + hookenv.DEBUG)
> +
> + def _install_cronjob(self):
> + # Copy the cronjob script to the nagios plugins directory
> + src = os.path.join(self._files_dir, self._cronjob_script)
> + dst = shutil.copy(src, self.NRPE_PLUGINS_DIR)
> + os.chmod(dst, self.CRONJOB_SCRIPT_MODE)
> + os.chown(dst,
> + uid=self.CRONJOB_SCRIPT_UID,
> + gid=self.CRONJOB_SCRIPT_GID)
> + hookenv.log('Cronjob script [{}] copied to {}'
> + ''.format(self._cronjob_script, self.NRPE_PLUGINS_DIR),
> + hookenv.DEBUG)
> + # Run it once to generate the temp file, otherwise the nrpe check might
> + # fail at first
> + subprocess.call(dst)
> + # Now generate the cronjob file
> + cronjob_line = '*/5 * * * * root {}\n'.format(dst)
> + crond_file = os.path.join(self.CROND_DIR, self._shortname)
> + with open(crond_file, 'w') as crond_fd:
> + crond_fd.write(cronjob_line)
> + hookenv.log('Cronjob configured at {}'.format(crond_file),
> + hookenv.DEBUG)
> +
> + return dst
> +
> + def _remove_cronjob(self):
> + crond_file = os.path.join(self.CROND_DIR, self._shortname)
> + cronjob_script = os.path.join(self.NRPE_PLUGINS_DIR,
> + self._cronjob_script)
> + os.remove(crond_file)
> + os.remove(cronjob_script)
> + hookenv.log('Removed cronjob files [{}, {}]'
> + ''.format(crond_file, cronjob_script),
> + hookenv.DEBUG)
> +
> +
> +class Sas3IrcuTool(VendorTool):
> + def __init__(self):
> + super().__init__(shortname='sas3ircu')
> +
> +
> +class Sas2IrcuTool(VendorTool):
> + def __init__(self):
> + super().__init__(shortname='sas2ircu')
> +
> +
> +class MegaCLITool(VendorTool):
> + def __init__(self):
> + super().__init__(shortname='megacli')
> +
> +
> +class MdadmTool(VendorTool):
> + '''
> + Our mdadm check kind of behaves like a VendorTool for the purpose of
> + installation as it has a cronjob + check script
> + '''
> + def __init__(self):
> + shortname = 'mdadm'
> + super().__init__(shortname=shortname)
> + self._cronjob_script = 'cron_{}.py'.format(shortname)
> + self._nrpe_script = 'check_{}.py'.format(shortname)
> +
> + def install(self):
> + # mdadm should already be installed, but let's check
> + fetch.apt_install(['mdadm'], fatal=True)
> + self._install_cronjob()
> + # No vendor binary to install
> + Tool.install(self)
> +
> + def remove(self):
> + self._remove_cronjob()
> + # There's no vendor binary to remove
> + Tool.remove(self)
> +
> +
> +class IpmiTool(Tool):
> + '''
> + The IPMI check does not rely on a vendor binary, it only uses freeipmi
> + (available via the main repos) and a nrpe check that is imported as a git
> + submodule
> + '''
> + def __init__(self, nrpe_opts=''):
> + super().__init__(shortname='ipmi',
> + nrpe_opts=hookenv.config('ipmi_check_options'))
> + self._nrpe_script = 'check_ipmi_sensor'
> + self._sudoer_file = 'check_ipmi_sensor.sudoer'
Because I was sort of following the style of the other tools:
def __init__(self):
shortname = 'mdadm'
super().__init__(shortname=shortname)
self._cronjob_script = 'cron_{}.py'.format(shortname)
self._nrpe_script = 'check_{}.py'.format(shortname)
And also because I don't expect us to need to inherit from the IpmiTool class.
> +
> + def install(self):
> + # Install the sudoer file
> + self._install_sudoer()
> + super().install()
> +
> + def remove(self):
> + self._remove_sudoer()
> + super().remove()
> +
> + def _install_sudoer(self):
> + '''
> + User nagios is normally not able to run ipmi-sel and ipmi-sensors, so
> + we need to add a sudoer entry for it
> + '''
> + src = os.path.join(self._files_dir, self._sudoer_file)
> + dst = shutil.copy(src, self.SUDOERS_DIR)
> + os.chmod(dst, self.SUDOERS_MODE)
> + os.chown(dst, uid=self.SUDOERS_UID, gid=self.SUDOERS_GID)
> + hookenv.log('sudoer file for tool [{}] installed at {}'
> + ''.format(self._shortname, dst),
> + hookenv.DEBUG)
> + return dst
> +
> + def _remove_sudoer(self):
> + sudoer_path = os.path.join(self.SUDOERS_DIR, self._sudoer_file)
> + os.remove(sudoer_path)
> + hookenv.log('deleted sudoer file'.format(sudoer_path), hookenv.DEBUG)
> +
> +
> +class HPTool(VendorTool):
> + # ['hponcfg', 'hp-health', 'hpssacli']
> + def __init__(self):
> + raise NotImplementedError
> +
> +
> +class HPETool(VendorTool):
> + # ['hponcfg', 'ilorest']
> + def __init__(self):
> + raise NotImplementedError
> diff --git a/src/lib/hwhealth/hwdiscovery.py b/src/lib/hwhealth/hwdiscovery.py
> index 480fea2..ba949ef 100644
> --- a/src/lib/hwhealth/hwdiscovery.py
> +++ b/src/lib/hwhealth/hwdiscovery.py
> @@ -4,20 +4,38 @@ import os
> import re
> import subprocess
>
> -# import sys
> +from hwhealth import classes
> from charmhelpers.core import host, hookenv
>
>
> -def get_tools():
> +def get_tools(manufacturer='auto'):
> + if manufacturer == 'test':
> + # Return all possible tools to aid testing
> + return [
> + classes.MegaCLITool(),
+1
> + classes.Sas2IrcuTool(),
> + classes.Sas3IrcuTool(),
> + # classes.HPETool(), # NotImplementedYet
> + # classes.HPTool(), # NotImplementedYet
> + classes.MdadmTool(),
> + classes.IpmiTool(),
> + ]
> + elif manufacturer == 'auto':
> + return _get_tools()
> + else:
> + raise NotImplementedError
> +
> +
> +def _get_tools():
> tools = set()
>
> # MegaRAID SAS
> if len(glob.glob('/sys/bus/pci/drivers/megaraid_sas/00*')) > 0:
> - tools.add('megacli')
> + tools.add(classes.MegaCLITool())
>
> # Huawei LSI SAS card
> if len(glob.glob('/sys/bus/pci/drivers/mpt2sas/00*')) > 0:
> - tools.add('sas2ircu')
> + tools.add(classes.Sas2IrcuTool())
>
> # New Huawei drivers, but also Supermicro LSI SAS cards
> pci_devs = glob.glob('/sys/bus/pci/drivers/mpt3sas/00*')
> diff --git a/src/reactive/hw_health.py b/src/reactive/hw_health.py
> index 20bc354..941b33a 100644
> --- a/src/reactive/hw_health.py
> +++ b/src/reactive/hw_health.py
> @@ -2,69 +2,49 @@ import os
>
> from charmhelpers.core import hookenv, host, unitdata
> from charms.layer import status
> -from charms.reactive import (
> +from charms.reactive.flags import (
> + set_flag,
> clear_flag,
> +)
> +from charms.reactive import (
> hook,
> - set_flag,
> when,
> when_none,
> when_not,
> )
> from hwhealth.hwdiscovery import get_tools
> -from hwhealth.tooling import (
> - configure_nrpe_checks,
> - install_resource,
> - remove_nrpe_checks,
> -)
> +from hwhealth import classes
>
>
> @when_none('hw-health.installed', 'hw-health.unsupported')
> +@when('general-info.connected')
> def install():
> - if host.is_container():
> + manufacturer = hookenv.config('manufacturer')
Yeah, that would make sense
> + if host.is_container() and manufacturer != 'test':
> status.blocked('Containers are not supported')
> set_flag('hw-health.unsupported')
> return
>
> - config = hookenv.config()
> - if config.get('manufacturer', '') != 'auto':
> + if manufacturer not in ['auto', 'test']:
> status.blocked('manufacturer needs to be set to auto')
> return
>
> + # Detect hardware and return a list of tools we need to use
> status.maintenance('Autodiscovering hardware')
> - tools = get_tools()
> + tools = get_tools(manufacturer)
> if not tools:
> - set_flag('hw-health.unsupported')
> status.blocked('Hardware not supported')
> + set_flag('hw-health.unsupported')
> else:
> - resource = hookenv.resource_get('tools')
> - if resource:
> - hookenv.log('Installing from resource')
> - status.waiting('Installing from attached resource')
> - installed = install_resource(tools, resource)
> - else:
> - hookenv.log(
> - 'Missing Juju resource: tools - alternative method is not'
> - ' available yet', hookenv.ERROR)
> - status.blocked('Missing Juju resource: tools')
> - return
> - # TODO(aluria): install vendor utilities via snaps
> - # https://forum.snapcraft.io/t/request-for-classic-confinement-sas2ircu/9023
> - # Snap interface for hw tooling is not supported
> - # in strict mode (confined). Classic mode
> - # (unconfined snaps) need approval
> - #
> - # hookenv.log('Installing from snap')
> - # status.waiting('Installing from snap')
> - # installed = install_tools(tools, method='snap')
> -
> - if installed:
> - kv = unitdata.kv()
> - kv.set('tools', list(tools))
> - set_flag('hw-health.installed')
> - status.waiting('Preparing scripts installation')
> - else:
> - missing_tools = tools - {'mdadm', 'ipmi'}
> - status.blocked('Tools not found: {}'.format(', '.join(missing_tools)))
> + tool_list = list()
> + for tool in tools:
> + tool.install()
> + # Save the class name in the unit kv db. This will be reused when
> + # reconfiguring or removing the checks
> + tool_list.append(type(tool).__name__)
> + unitdb = unitdata.kv()
> + unitdb.set('tools', tool_list)
> + set_flag('hw-health.installed')
>
>
> @hook('upgrade-charm')
> diff --git a/src/tests/unit/test_hwdiscovery.py b/src/tests/unit/test_hwdiscovery.py
> index 06b5f95..9c184b7 100644
> --- a/src/tests/unit/test_hwdiscovery.py
> +++ b/src/tests/unit/test_hwdiscovery.py
> @@ -9,50 +9,79 @@ sys.path.append('lib/hwhealth')
> import hwdiscovery # noqa: E402
>
>
> -class TestHWDiscovery(unittest.TestCase):
> - @mock.patch('hwdiscovery._supports_mdadm')
> - @mock.patch('hwdiscovery.dmidecode')
> - @mock.patch('glob.glob')
> - @mock.patch('charmhelpers.core.hookenv.config')
> - def test_get_tools_megacli(self, mock_hookenv, driver, dmidecode, supports_mdadm):
> +class TestGetTools(unittest.TestCase):
> + def setUp(self):
> + patchers = list()
> + # Mock all Tool classes
> + hwhealth_classes = [
> + 'HPETool',
> + 'HPTool',
> + 'MdadmTool',
> + 'IpmiTool',
> + 'Sas2IrcuTool',
> + 'Sas3IrcuTool',
> + 'MegaCLITool'
> + ]
> + for hwhealth_class in hwhealth_classes:
> + patchers.append(mock.patch('hwdiscovery.classes.{}'
> + ''.format(hwhealth_class),
> + autospec=True))
> +
> + # Provide test defaults that should make get_tools detect no tool
> + # support. Specific hardware will be mocked in the specific tests
> +
> + # Mock the hookenv; disable IPMI support
> + def config_return_values(value):
> + if value == 'enable_ipmi':
ha! of course
> + return False
> + else:
> + return True
> +
> + patchers.append(mock.patch('hwdiscovery.hookenv.config',
> + autospec=True,
> + side_effect=config_return_values))
> + # Disable mdadm support
> + patchers.append(mock.patch('hwdiscovery._supports_mdadm',
> + return_value=False))
> + # Assume a dummy 'unittest' hardware model
> + patchers.append(mock.patch('hwdiscovery.dmidecode',
> + return_value='unittest'))
> +
> + # Start all mocks
> + for patcher in patchers:
> + patcher.start()
> +
> + # Clean up all mocks at the end of the testrun
> + self.addCleanup(mock.patch.stopall)
> +
> + @mock.patch('hwdiscovery.glob.glob')
> + def test_get_tools_megacli(self, driver):
> def side_effect_glob(value):
> if value == '/sys/bus/pci/drivers/megaraid_sas/00*':
> return ['/...megaraid']
> else:
> return []
>
> - supports_mdadm.return_value = False
> - dmidecode.return_value = 'unittest'
> driver.side_effect = side_effect_glob
> - mock_hookenv.return_value = True
> real = hwdiscovery.get_tools()
> - expected = set(['megacli', 'ipmi'])
> + expected = set([hwdiscovery.classes.MegaCLITool()])
> self.assertEqual(real, expected)
>
> - @mock.patch('hwdiscovery._supports_mdadm')
> - @mock.patch('hwdiscovery.dmidecode')
> - @mock.patch('glob.glob')
> - @mock.patch('charmhelpers.core.hookenv.config')
> - def test_get_tools_sas2ircu(self, mock_hookenv, driver, dmidecode, supports_mdadm):
> + @mock.patch('hwdiscovery.glob.glob')
> + def test_get_tools_sas2ircu(self, driver):
> def side_effect_glob(value):
> if value == '/sys/bus/pci/drivers/mpt2sas/00*':
> return ['/...mp2sas']
> else:
> return []
>
> - supports_mdadm.return_value = False
> - dmidecode.return_value = 'unittest'
> - mock_hookenv.return_value = True
> driver.side_effect = side_effect_glob
> real = hwdiscovery.get_tools()
> - expected = set(['sas2ircu', 'ipmi'])
> + expected = set([hwdiscovery.classes.Sas2IrcuTool()])
> self.assertEqual(real, expected)
>
> - @mock.patch('hwdiscovery._supports_mdadm')
> - @mock.patch('hwdiscovery.dmidecode')
> - @mock.patch('glob.glob')
> - @mock.patch('charmhelpers.core.hookenv.config')
> - def test_get_tools_sas3ircu(self, mock_hookenv, driver, dmidecode, supports_mdadm):
> + @mock.patch('hwdiscovery.glob.glob')
> + def test_get_tools_sas3ircu(self, driver):
> def side_effect_glob(value):
> if value not in ('/sys/bus/pci/drivers/megaraid_sas/00*',
> '/sys/bus/pci/drivers/mpt2sas/00*'):
--
https://code.launchpad.net/~aieri/hw-health-charm/+git/hw-health-charm/+merge/364757
Your team Nagios Charm developers is subscribed to branch hw-health-charm:oo-rewrite-integration.
References