← Back to team overview

nagios-charmers team mailing list archive

Re: [Merge] ~aieri/hw-health-charm:oo-rewrite-core into hw-health-charm:oo-rewrite-integration

 

Comments addressed.
Lint and unittests pass.

Diff comments:

> diff --git a/src/lib/hwhealth/classes.py b/src/lib/hwhealth/classes.py
> new file mode 100644
> index 0000000..1884059
> --- /dev/null
> +++ b/src/lib/hwhealth/classes.py

Makes sense. I was going for "classes for the hwhealth charm" but tools.py or tool_classes.py would be better.

> @@ -0,0 +1,298 @@
> +import os
> +import shutil
> +import subprocess
> +from zipfile import ZipFile, BadZipFile
> +from tempfile import TemporaryDirectory
> +from charmhelpers import fetch
> +from charmhelpers.core import hookenv
> +from charmhelpers.core.hookenv import status_set
> +from charmhelpers.contrib.charmsupport import nrpe
> +
> +
> +class Tool():
> +    '''
> +    This is an abstract class representing a "tool".
> +    The idea is to delegate install/configure duties to specific per-tool
> +    classes instead of handling multiple exceptional cases in a main loop
> +    during the install hook.
> +    Every tool should implement its own internal logic regarding how to be
> +    installed, configured, and removed.
> +    '''
> +    NRPE_PLUGINS_DIR = '/usr/local/lib/nagios/plugins'
> +    NRPE_PLUGINS_MODE = 0o100755
> +    NRPE_PLUGINS_UID = 0
> +    NRPE_PLUGINS_GID = 0
> +    TOOLS_DIR = '/usr/local/bin'
> +    TOOLS_MODE = 0o100755
> +    TOOLS_UID = 0
> +    TOOLS_GID = 0
> +    CROND_DIR = '/etc/cron.d'
> +    CRONJOB_SCRIPT_MODE = 0o100755
> +    CRONJOB_SCRIPT_UID = 0
> +    CRONJOB_SCRIPT_GID = 0
> +    SUDOERS_DIR = '/etc/sudoers.d'
> +    SUDOERS_MODE = 0o100440
> +    SUDOERS_UID = 0
> +    SUDOERS_GID = 0
> +
> +    def __init__(self, shortname, nrpe_opts=''):
> +        self._nagios_hostname = nrpe.get_nagios_hostname()
> +        self._nrpe_opts = nrpe_opts
> +        self._shortname = shortname
> +        self._files_dir = os.path.join(hookenv.charm_dir(),
> +                                       'files',
> +                                       self._shortname)
> +        self._nrpe_script = 'check_{}'.format(shortname)
> +
> +    def _install_nrpe_plugin(self):
> +        src = os.path.join(self._files_dir, self._nrpe_script)
> +        dst = shutil.copy(src, self.NRPE_PLUGINS_DIR)
> +        os.chmod(dst, self.NRPE_PLUGINS_MODE)
> +        os.chown(dst, uid=self.NRPE_PLUGINS_UID, gid=self.NRPE_PLUGINS_GID)
> +        hookenv.log('NRPE script for tool [{}] installed at as {}'
> +                    ''.format(self._shortname, dst),
> +                    hookenv.DEBUG)
> +        return dst
> +
> +    def _remove_nrpe_plugin(self):
> +        plugin_path = os.path.join(self.NRPE_PLUGINS_DIR, self._nrpe_script)
> +        os.remove(plugin_path)
> +        hookenv.log('deleted NRPE script for tool [{}]'
> +                    ''.format(self._shortname),
> +                    hookenv.DEBUG)
> +
> +    def configure_nrpe_check(self):
> +        nrpe_setup = nrpe.NRPE(hostname=self._nagios_hostname, primary=False)
> +        cmd = ' '.join([self._nrpe_script, self._nrpe_opts])
> +        nrpe_setup.add_check(
> +            shortname=self._shortname,
> +            description='{} Hardware Health',
> +            check_cmd=cmd
> +        )
> +        nrpe_setup.write()
> +        hookenv.log('configured NRPE check for tool [{}]'
> +                    ''.format(self._shortname),
> +                    hookenv.DEBUG)
> +
> +    def remove_nrpe_check(self):
> +        nrpe_setup = nrpe.NRPE(hostname=self._nagios_hostname, primary=False)
> +        cmd = ' '.join([self._nrpe_script, self._nrpe_opts])
> +        nrpe_setup.remove_check(
> +            shortname=self._shortname,
> +            check_cmd=cmd
> +        )
> +        nrpe_setup.write()
> +        hookenv.log('removed NRPE check for tool [{}]'
> +                    ''.format(self._shortname),
> +                    hookenv.DEBUG)
> +
> +    def install(self):
> +        self._install_nrpe_plugin()
> +        hookenv.log('Installed tool [{}]'.format(self._shortname))
> +
> +    def remove(self):
> +        self.remove_nrpe_check()
> +        self._remove_nrpe_plugin()
> +        hookenv.log('Removed tool [{}]'.format(self._shortname))
> +
> +
> +class VendorTool(Tool):
> +    '''
> +    A Vendor tool is a statically linked binary we ship via a resource.
> +    Since it generally requires root privileges to run, we also deploy a
> +    cronjob that runs as root and saves the tool output in a temporary file
> +    that nrpe can read (as nagios user).
> +    '''
> +    def __init__(self, shortname):
> +        super().__init__(shortname=shortname)
> +        self._cronjob_script = 'check_{}.sh'.format(shortname)
> +        self._nrpe_script = 'check_{}.py'.format(shortname)
> +
> +    def install(self):
> +        self._install_from_resource()
> +        # TODO(aluria): install vendor utilities via snaps
> +        # https://forum.snapcraft.io/t/request-for-classic-confinement-sas2ircu/9023
> +        # Snap interface for hw tooling is not supported

This TODO is effectively already covered by https://bugs.launchpad.net/hw-health-charm/+bug/1809140, so I'll just remove it

> +        # in strict mode (confined). Classic mode
> +        # (unconfined snaps) need approval
> +        # Install the cronjob that generates the temp output
> +        self._install_cronjob()
> +        super().install()
> +
> +    def remove(self):
> +        self._remove_cronjob()
> +        self._remove_binary()
> +        super().remove()
> +
> +    def _install_from_resource(self):
> +        resource = hookenv.resource_get('tools')
> +        if resource:

Yeah, inverting the if makes sense, but I'm personally not a fan of the if/return pattern (in general). Since the extra indentation level doesn't cause weird wrappings I'll keep it

> +            hookenv.log('Installing tool [{}] from resource'
> +                        ''.format(self._shortname),
> +                        hookenv.DEBUG)
> +            status_set('waiting', 'Installing from attached resource')
> +            # Move in from a temp directory to be atomic
> +            with TemporaryDirectory() as tmpdir:
> +                try:
> +                    with ZipFile(resource, 'r') as zipfile:
> +                        tmpfile = zipfile.extract(self._shortname, tmpdir)
> +                        # We could just use self.TOOLS_DIR as a destination
> +                        # here, but shutil.move refuses to overwrite the
> +                        # destination file unless it receives a full path
> +                        dst = shutil.move(tmpfile,

+1

> +                                          os.path.join(self.TOOLS_DIR,
> +                                                       self._shortname))
> +                        os.chmod(dst, self.TOOLS_MODE)
> +                        os.chown(dst, uid=self.TOOLS_UID, gid=self.TOOLS_GID)
> +
> +                except BadZipFile as error:
> +                    hookenv.log('BadZipFile: {}'.format(error), hookenv.ERROR)
> +
> +                except PermissionError as error:
> +                    hookenv.log('Unable to unzip tool {} from the provided '
> +                                'resource: {}'.format(self._shortname, error),
> +                                hookenv.ERROR)
> +                except KeyError as error:
> +                    hookenv.log('Tool {} not found in the provided resource: '
> +                                '{}'.format(self._shortname, error),
> +                                hookenv.ERROR)
> +                    status_set('blocked',
> +                               'Tool {} not found'.format(self._shortname))
> +
> +        else:
> +            hookenv.log(
> +                'Missing Juju resource: tools - alternative method is not '
> +                'available yet', hookenv.ERROR)
> +            status_set('blocked', 'Missing Juju resource: tools')
> +
> +    def _remove_binary(self):
> +        binary_path = os.path.join(self.TOOLS_DIR, self._shortname)
> +        os.remove(binary_path)
> +        hookenv.log('Removed binary tool {}'.format(binary_path),
> +                    hookenv.DEBUG)
> +
> +    def _install_cronjob(self):
> +        # Copy the cronjob script to the nagios plugins directory
> +        src = os.path.join(self._files_dir, self._cronjob_script)
> +        dst = shutil.copy(src, self.NRPE_PLUGINS_DIR)
> +        os.chmod(dst, self.CRONJOB_SCRIPT_MODE)
> +        os.chown(dst,
> +                 uid=self.CRONJOB_SCRIPT_UID,
> +                 gid=self.CRONJOB_SCRIPT_GID)
> +        hookenv.log('Cronjob script [{}] copied to {}'
> +                    ''.format(self._cronjob_script, self.NRPE_PLUGINS_DIR),
> +                    hookenv.DEBUG)
> +        # Run it once to generate the temp file, otherwise the nrpe check might
> +        # fail at first
> +        subprocess.call(dst)
> +        # Now generate the cronjob file
> +        cronjob_line = '*/5 * * * * root {}\n'.format(dst)
> +        crond_file = os.path.join(self.CROND_DIR, self._shortname)
> +        with open(crond_file, 'w') as crond_fd:
> +            crond_fd.write(cronjob_line)
> +            hookenv.log('Cronjob configured at {}'.format(crond_file),
> +                        hookenv.DEBUG)
> +
> +        return dst
> +
> +    def _remove_cronjob(self):
> +        crond_file = os.path.join(self.CROND_DIR, self._shortname)
> +        cronjob_script = os.path.join(self.NRPE_PLUGINS_DIR,
> +                                      self._cronjob_script)
> +        os.remove(crond_file)
> +        os.remove(cronjob_script)
> +        hookenv.log('Removed cronjob files [{}, {}]'
> +                    ''.format(crond_file, cronjob_script),
> +                    hookenv.DEBUG)
> +
> +
> +class Sas3IrcuTool(VendorTool):
> +    def __init__(self):
> +        super().__init__(shortname='sas3ircu')
> +
> +
> +class Sas2IrcuTool(VendorTool):
> +    def __init__(self):
> +        super().__init__(shortname='sas2ircu')
> +
> +
> +class MegaCLITool(VendorTool):
> +    def __init__(self):
> +        super().__init__(shortname='megacli')
> +
> +
> +class MdadmTool(VendorTool):
> +    '''
> +    Our mdadm check kind of behaves like a VendorTool for the purpose of
> +    installation as it has a cronjob + check script
> +    '''
> +    def __init__(self):
> +        shortname = 'mdadm'
> +        super().__init__(shortname=shortname)
> +        self._cronjob_script = 'cron_{}.py'.format(shortname)
> +        self._nrpe_script = 'check_{}.py'.format(shortname)
> +
> +    def install(self):
> +        # mdadm should already be installed, but let's check
> +        fetch.apt_install(['mdadm'], fatal=True)
> +        self._install_cronjob()
> +        # No vendor binary to install
> +        Tool.install(self)
> +
> +    def remove(self):
> +        self._remove_cronjob()
> +        # There's no vendor binary to remove
> +        Tool.remove(self)
> +
> +
> +class IpmiTool(Tool):
> +    '''
> +    The IPMI check does not rely on a vendor binary, it only uses freeipmi
> +    (available via the main repos) and a nrpe check that is imported as a git
> +    submodule
> +    '''
> +    def __init__(self, nrpe_opts=''):
> +        super().__init__(shortname='ipmi',
> +                         nrpe_opts=hookenv.config('ipmi_check_options'))
> +        self._nrpe_script = 'check_ipmi_sensor'
> +        self._sudoer_file = 'check_ipmi_sensor.sudoer'

Because I was sort of following the style of the other tools:

      def __init__(self):
          shortname = 'mdadm'
          super().__init__(shortname=shortname)
          self._cronjob_script = 'cron_{}.py'.format(shortname)
          self._nrpe_script = 'check_{}.py'.format(shortname)

And also because I don't expect us to need to inherit from the IpmiTool class.

> +
> +    def install(self):
> +        # Install the sudoer file
> +        self._install_sudoer()
> +        super().install()
> +
> +    def remove(self):
> +        self._remove_sudoer()
> +        super().remove()
> +
> +    def _install_sudoer(self):
> +        '''
> +        User nagios is normally not able to run ipmi-sel and ipmi-sensors, so
> +        we need to add a sudoer entry for it
> +        '''
> +        src = os.path.join(self._files_dir, self._sudoer_file)
> +        dst = shutil.copy(src, self.SUDOERS_DIR)
> +        os.chmod(dst, self.SUDOERS_MODE)
> +        os.chown(dst, uid=self.SUDOERS_UID, gid=self.SUDOERS_GID)
> +        hookenv.log('sudoer file for tool [{}] installed at {}'
> +                    ''.format(self._shortname, dst),
> +                    hookenv.DEBUG)
> +        return dst
> +
> +    def _remove_sudoer(self):
> +        sudoer_path = os.path.join(self.SUDOERS_DIR, self._sudoer_file)
> +        os.remove(sudoer_path)
> +        hookenv.log('deleted sudoer file'.format(sudoer_path), hookenv.DEBUG)
> +
> +
> +class HPTool(VendorTool):
> +    # ['hponcfg', 'hp-health', 'hpssacli']
> +    def __init__(self):
> +        raise NotImplementedError
> +
> +
> +class HPETool(VendorTool):
> +    # ['hponcfg', 'ilorest']
> +    def __init__(self):
> +        raise NotImplementedError
> diff --git a/src/lib/hwhealth/hwdiscovery.py b/src/lib/hwhealth/hwdiscovery.py
> index 480fea2..ba949ef 100644
> --- a/src/lib/hwhealth/hwdiscovery.py
> +++ b/src/lib/hwhealth/hwdiscovery.py
> @@ -4,20 +4,38 @@ import os
>  import re
>  import subprocess
>  
> -# import sys
> +from hwhealth import classes
>  from charmhelpers.core import host, hookenv
>  
>  
> -def get_tools():
> +def get_tools(manufacturer='auto'):
> +    if manufacturer == 'test':
> +        # Return all possible tools to aid testing
> +        return [
> +            classes.MegaCLITool(),

+1

> +            classes.Sas2IrcuTool(),
> +            classes.Sas3IrcuTool(),
> +            # classes.HPETool(), # NotImplementedYet
> +            # classes.HPTool(),  # NotImplementedYet
> +            classes.MdadmTool(),
> +            classes.IpmiTool(),
> +        ]
> +    elif manufacturer == 'auto':
> +        return _get_tools()
> +    else:
> +        raise NotImplementedError
> +
> +
> +def _get_tools():
>      tools = set()
>  
>      # MegaRAID SAS
>      if len(glob.glob('/sys/bus/pci/drivers/megaraid_sas/00*')) > 0:
> -        tools.add('megacli')
> +        tools.add(classes.MegaCLITool())
>  
>      # Huawei LSI SAS card
>      if len(glob.glob('/sys/bus/pci/drivers/mpt2sas/00*')) > 0:
> -        tools.add('sas2ircu')
> +        tools.add(classes.Sas2IrcuTool())
>  
>      # New Huawei drivers, but also Supermicro LSI SAS cards
>      pci_devs = glob.glob('/sys/bus/pci/drivers/mpt3sas/00*')
> diff --git a/src/reactive/hw_health.py b/src/reactive/hw_health.py
> index 20bc354..941b33a 100644
> --- a/src/reactive/hw_health.py
> +++ b/src/reactive/hw_health.py
> @@ -2,69 +2,49 @@ import os
>  
>  from charmhelpers.core import hookenv, host, unitdata
>  from charms.layer import status
> -from charms.reactive import (
> +from charms.reactive.flags import (
> +    set_flag,
>      clear_flag,
> +)
> +from charms.reactive import (
>      hook,
> -    set_flag,
>      when,
>      when_none,
>      when_not,
>  )
>  from hwhealth.hwdiscovery import get_tools
> -from hwhealth.tooling import (
> -    configure_nrpe_checks,
> -    install_resource,
> -    remove_nrpe_checks,
> -)
> +from hwhealth import classes
>  
>  
>  @when_none('hw-health.installed', 'hw-health.unsupported')
> +@when('general-info.connected')
>  def install():
> -    if host.is_container():
> +    manufacturer = hookenv.config('manufacturer')

Yeah, that would make sense

> +    if host.is_container() and manufacturer != 'test':
>          status.blocked('Containers are not supported')
>          set_flag('hw-health.unsupported')
>          return
>  
> -    config = hookenv.config()
> -    if config.get('manufacturer', '') != 'auto':
> +    if manufacturer not in ['auto', 'test']:
>          status.blocked('manufacturer needs to be set to auto')
>          return
>  
> +    # Detect hardware and return a list of tools we need to use
>      status.maintenance('Autodiscovering hardware')
> -    tools = get_tools()
> +    tools = get_tools(manufacturer)
>      if not tools:
> -        set_flag('hw-health.unsupported')
>          status.blocked('Hardware not supported')
> +        set_flag('hw-health.unsupported')
>      else:
> -        resource = hookenv.resource_get('tools')
> -        if resource:
> -            hookenv.log('Installing from resource')
> -            status.waiting('Installing from attached resource')
> -            installed = install_resource(tools, resource)
> -        else:
> -            hookenv.log(
> -                'Missing Juju resource: tools - alternative method is not'
> -                ' available yet', hookenv.ERROR)
> -            status.blocked('Missing Juju resource: tools')
> -            return
> -            # TODO(aluria): install vendor utilities via snaps
> -            # https://forum.snapcraft.io/t/request-for-classic-confinement-sas2ircu/9023
> -            # Snap interface for hw tooling is not supported
> -            # in strict mode (confined). Classic mode
> -            # (unconfined snaps) need approval
> -            #
> -            # hookenv.log('Installing from snap')
> -            # status.waiting('Installing from snap')
> -            # installed = install_tools(tools, method='snap')
> -
> -        if installed:
> -            kv = unitdata.kv()
> -            kv.set('tools', list(tools))
> -            set_flag('hw-health.installed')
> -            status.waiting('Preparing scripts installation')
> -        else:
> -            missing_tools = tools - {'mdadm', 'ipmi'}
> -            status.blocked('Tools not found: {}'.format(', '.join(missing_tools)))
> +        tool_list = list()
> +        for tool in tools:
> +            tool.install()
> +            # Save the class name in the unit kv db. This will be reused when
> +            # reconfiguring or removing the checks
> +            tool_list.append(type(tool).__name__)
> +        unitdb = unitdata.kv()
> +        unitdb.set('tools', tool_list)
> +        set_flag('hw-health.installed')
>  
>  
>  @hook('upgrade-charm')
> diff --git a/src/tests/unit/test_hwdiscovery.py b/src/tests/unit/test_hwdiscovery.py
> index 06b5f95..9c184b7 100644
> --- a/src/tests/unit/test_hwdiscovery.py
> +++ b/src/tests/unit/test_hwdiscovery.py
> @@ -9,50 +9,79 @@ sys.path.append('lib/hwhealth')
>  import hwdiscovery  # noqa: E402
>  
>  
> -class TestHWDiscovery(unittest.TestCase):
> -    @mock.patch('hwdiscovery._supports_mdadm')
> -    @mock.patch('hwdiscovery.dmidecode')
> -    @mock.patch('glob.glob')
> -    @mock.patch('charmhelpers.core.hookenv.config')
> -    def test_get_tools_megacli(self, mock_hookenv, driver, dmidecode, supports_mdadm):
> +class TestGetTools(unittest.TestCase):
> +    def setUp(self):
> +        patchers = list()
> +        # Mock all Tool classes
> +        hwhealth_classes = [
> +            'HPETool',
> +            'HPTool',
> +            'MdadmTool',
> +            'IpmiTool',
> +            'Sas2IrcuTool',
> +            'Sas3IrcuTool',
> +            'MegaCLITool'
> +        ]
> +        for hwhealth_class in hwhealth_classes:
> +            patchers.append(mock.patch('hwdiscovery.classes.{}'
> +                                       ''.format(hwhealth_class),
> +                                       autospec=True))
> +
> +        # Provide test defaults that should make get_tools detect no tool
> +        # support. Specific hardware will be mocked in the specific tests
> +
> +        # Mock the hookenv; disable IPMI support
> +        def config_return_values(value):
> +            if value == 'enable_ipmi':

ha! of course

> +                return False
> +            else:
> +                return True
> +
> +        patchers.append(mock.patch('hwdiscovery.hookenv.config',
> +                                   autospec=True,
> +                                   side_effect=config_return_values))
> +        # Disable mdadm support
> +        patchers.append(mock.patch('hwdiscovery._supports_mdadm',
> +                                   return_value=False))
> +        # Assume a dummy 'unittest' hardware model
> +        patchers.append(mock.patch('hwdiscovery.dmidecode',
> +                                   return_value='unittest'))
> +
> +        # Start all mocks
> +        for patcher in patchers:
> +            patcher.start()
> +
> +        # Clean up all mocks at the end of the testrun
> +        self.addCleanup(mock.patch.stopall)
> +
> +    @mock.patch('hwdiscovery.glob.glob')
> +    def test_get_tools_megacli(self, driver):
>          def side_effect_glob(value):
>              if value == '/sys/bus/pci/drivers/megaraid_sas/00*':
>                  return ['/...megaraid']
>              else:
>                  return []
>  
> -        supports_mdadm.return_value = False
> -        dmidecode.return_value = 'unittest'
>          driver.side_effect = side_effect_glob
> -        mock_hookenv.return_value = True
>          real = hwdiscovery.get_tools()
> -        expected = set(['megacli', 'ipmi'])
> +        expected = set([hwdiscovery.classes.MegaCLITool()])
>          self.assertEqual(real, expected)
>  
> -    @mock.patch('hwdiscovery._supports_mdadm')
> -    @mock.patch('hwdiscovery.dmidecode')
> -    @mock.patch('glob.glob')
> -    @mock.patch('charmhelpers.core.hookenv.config')
> -    def test_get_tools_sas2ircu(self, mock_hookenv, driver, dmidecode, supports_mdadm):
> +    @mock.patch('hwdiscovery.glob.glob')
> +    def test_get_tools_sas2ircu(self, driver):
>          def side_effect_glob(value):
>              if value == '/sys/bus/pci/drivers/mpt2sas/00*':
>                  return ['/...mp2sas']
>              else:
>                  return []
>  
> -        supports_mdadm.return_value = False
> -        dmidecode.return_value = 'unittest'
> -        mock_hookenv.return_value = True
>          driver.side_effect = side_effect_glob
>          real = hwdiscovery.get_tools()
> -        expected = set(['sas2ircu', 'ipmi'])
> +        expected = set([hwdiscovery.classes.Sas2IrcuTool()])
>          self.assertEqual(real, expected)
>  
> -    @mock.patch('hwdiscovery._supports_mdadm')
> -    @mock.patch('hwdiscovery.dmidecode')
> -    @mock.patch('glob.glob')
> -    @mock.patch('charmhelpers.core.hookenv.config')
> -    def test_get_tools_sas3ircu(self, mock_hookenv, driver, dmidecode, supports_mdadm):
> +    @mock.patch('hwdiscovery.glob.glob')
> +    def test_get_tools_sas3ircu(self, driver):
>          def side_effect_glob(value):
>              if value not in ('/sys/bus/pci/drivers/megaraid_sas/00*',
>                               '/sys/bus/pci/drivers/mpt2sas/00*'):


-- 
https://code.launchpad.net/~aieri/hw-health-charm/+git/hw-health-charm/+merge/364757
Your team Nagios Charm developers is subscribed to branch hw-health-charm:oo-rewrite-integration.


References