← Back to team overview

nagios-charmers team mailing list archive

[Merge] ~xavpaice/hw-health-charm:add_ipmi into hw-health-charm:master

 

Xav Paice has proposed merging ~xavpaice/hw-health-charm:add_ipmi into hw-health-charm:master.

Requested reviews:
  Xav Paice (xavpaice)
  Peter Sabaini (peter-sabaini)

For more details, see:
https://code.launchpad.net/~xavpaice/hw-health-charm/+git/hw-health-charm/+merge/363682
-- 
Your team Nagios Charm developers is subscribed to branch hw-health-charm:master.
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..0fbc388
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "check_ipmi_sensor"]
+	path = check_ipmi_sensor_v3
+	url = git@xxxxxxxxxx:thomas-krenn/check_ipmi_sensor_v3.git
+	branch = master
diff --git a/check_ipmi_sensor_v3 b/check_ipmi_sensor_v3
new file mode 160000
index 0000000..268eb36
--- /dev/null
+++ b/check_ipmi_sensor_v3
@@ -0,0 +1 @@
+Subproject commit 268eb36f0c20b060fd04f4c9d1ff1fcd17d3269f
diff --git a/src/README b/src/README.md
similarity index 88%
rename from src/README
rename to src/README.md
index 2c5475b..971b86c 100644
--- a/src/README
+++ b/src/README.md
@@ -14,6 +14,8 @@ Currently supported hardware is:
  InfoCollect)
 
  * Linux software RAID (mdadm)
+ 
+ * IPMI as implemented by freeipmi
 
 In the backlog, hp-health logic still needs to be backported to support
 Hewlett-Packard equipment (HP Smart Array Controllers and MSA Controllers with
@@ -25,28 +27,29 @@ Furthermore, other hardware in the roadmap is:
 
 # Usage
 
-Step by step instructions on using the charm:
 
+```
 juju deploy ubuntu
 juju deploy hw-health
 juju deploy nrpe
 juju add-relation ubuntu nrpe
 juju add-relation ubuntu hw-health
 juju add-relation hw-health nrpe
+```
 
 Charmstore version already ships a resource. However, a new resource can be
 attached:
 
-  * Option 1: juju deploy hw-health --resource tools=/tmp/zipfile.zip
+  * Option 1: `juju deploy hw-health --resource tools=/tmp/zipfile.zip`
 
-  * Option 2: juju attach-resource hw-health tools=/tmp/zipfile.zip
+  * Option 2: `juju attach-resource hw-health tools=/tmp/zipfile.zip`
 
 In both cases format of zipfile.zip must be one of the following:
-``example
+```
 zip /tmp/zipfile.zip megacli sas2ircu sas3ircu
 zip /tmp/zipfile.zip megacli
 etc.
-``
+```
 
 ## Known Limitations and Issues
 
diff --git a/src/actions.yaml b/src/actions.yaml
new file mode 100644
index 0000000..421c35e
--- /dev/null
+++ b/src/actions.yaml
@@ -0,0 +1,2 @@
+clear-sel:
+  description: Using ipmi-sel, clear the system event log
diff --git a/src/actions/__init__.py b/src/actions/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/actions/__init__.py
diff --git a/src/actions/actions.py b/src/actions/actions.py
new file mode 100755
index 0000000..820f76d
--- /dev/null
+++ b/src/actions/actions.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+#
+# Copyright 2016,2019 Canonical Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import subprocess
+import sys
+
+from charmhelpers.core.hookenv import action_set, action_fail, log
+
+
+def clear_sel():
+    """
+    Action to clear the IPMI system event log.
+
+    Uses ipmi-sel --post-clear, clears the SEL log and stores the cleared entries
+    in action output.
+    """
+    command = ['/usr/sbin/ipmi-sel', '--post-clear']
+    try:
+        output = subprocess.check_output(command)
+        log("Action clear-sel completed, sel log cleared: {}".format(output))
+        action_set({"cleared_log": output})
+    except subprocess.CalledProcessError as e:
+        action_fail("Action failed with {}".format(e))
+
+
+# A dictionary of all the defined actions to callables (which take
+# parsed arguments).
+ACTIONS = {"clear-sel": clear_sel}
+
+
+def main(args):
+    action_name = os.path.basename(args[0])
+    try:
+        action = ACTIONS[action_name]
+    except KeyError:
+        return "Action {} undefined".format(action_name)
+    else:
+        try:
+            action(args)
+        except Exception as e:
+            action_fail(str(e))
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))
diff --git a/src/actions/clear-sel b/src/actions/clear-sel
new file mode 120000
index 0000000..405a394
--- /dev/null
+++ b/src/actions/clear-sel
@@ -0,0 +1 @@
+actions.py
\ No newline at end of file
diff --git a/src/config.yaml b/src/config.yaml
index af0a62a..1a5bcfd 100644
--- a/src/config.yaml
+++ b/src/config.yaml
@@ -13,3 +13,7 @@ options:
     description: |
       Choose the tools to get deployed (hp, dell, supermicro, huawei) or leave
       the charm to self discover the tools needed to run hardware health checks.
+  enable_ipmi:
+    type: boolean
+    default: True
+    description: Enable the use of freeipmi tools to monitor hardware status.
\ No newline at end of file
diff --git a/src/files/ipmi/check_ipmi_sensor b/src/files/ipmi/check_ipmi_sensor
new file mode 120000
index 0000000..575389d
--- /dev/null
+++ b/src/files/ipmi/check_ipmi_sensor
@@ -0,0 +1 @@
+../../../check_ipmi_sensor_v3/check_ipmi_sensor
\ No newline at end of file
diff --git a/src/files/ipmi/check_ipmi_sensor_sudoer b/src/files/ipmi/check_ipmi_sensor_sudoer
new file mode 100644
index 0000000..6571238
--- /dev/null
+++ b/src/files/ipmi/check_ipmi_sensor_sudoer
@@ -0,0 +1 @@
+nagios ALL=(root) NOPASSWD: /usr/sbin/ipmi-sensors, /usr/sbin/ipmi-sel
diff --git a/src/layer.yaml b/src/layer.yaml
index 237b87e..054b43e 100644
--- a/src/layer.yaml
+++ b/src/layer.yaml
@@ -1,4 +1,5 @@
 includes:
+- "layer:apt"
 - "layer:basic"
 - "layer:status"
 - "layer:nagios"
@@ -7,5 +8,9 @@ includes:
 options:
   status:
     patch-hookenv: false
+  apt:
+    packages:
+      - "freeipmi"
+      - "libipc-run-perl"
 repo: "lp:hw-health-charm"
 is: "hw-health"
diff --git a/src/lib/utils/hwdiscovery.py b/src/lib/utils/hwdiscovery.py
index 1827ac6..c144fa4 100644
--- a/src/lib/utils/hwdiscovery.py
+++ b/src/lib/utils/hwdiscovery.py
@@ -5,7 +5,7 @@ import re
 import subprocess
 
 # import sys
-from charmhelpers.core import host
+from charmhelpers.core import host, hookenv
 
 
 def get_tools():
@@ -46,6 +46,9 @@ def get_tools():
     if _supports_mdadm():
         tools.add('mdadm')
 
+    if hookenv.config('enable_ipmi'):
+        tools.add('ipmi')
+
     return tools
 
 
@@ -68,6 +71,7 @@ def dmidecode(key):
 
 
 def _supports_mdadm():
+    found_raid_devices = False
     if os.path.exists('/sbin/mdadm'):
         try:
             devices_raw = subprocess.Popen(
@@ -77,16 +81,13 @@ def _supports_mdadm():
             devices_re = re.compile(r'^ARRAY\s+(\S+) ')
             for line in devices_raw.stdout.readlines():
                 line = line.decode().strip()
-                if devices_re.match(line):
-                    return True
-            # FIXME: log
-            # if devices_raw.stderr:
-            #    print('STDERR: {}'.format(devices_raw.stderr
-            #                              .readline().decode().strip()))
-        except Exception:
-            # log error
-            pass
-    return False
+                raid_dev = devices_re.search(line)
+                if raid_dev:
+                    hookenv.log("Found md raid array {}".format(raid_dev.group(1)))
+                    found_raid_devices = True
+        except Exception as e:
+            hookenv.log("mdadm scan failed with {}".format(e))
+    return found_raid_devices
 
 
 if __name__ == '__main__':
diff --git a/src/lib/utils/tooling.py b/src/lib/utils/tooling.py
index 3b9b927..21794a5 100644
--- a/src/lib/utils/tooling.py
+++ b/src/lib/utils/tooling.py
@@ -29,26 +29,43 @@ TOOLING = {
         'filename': 'check_mdadm.py',
         'cronjob': 'cron_mdadm.py'
     },
+    'ipmi': {
+        'filename': 'ipmi/check_ipmi_sensor',
+        'sudoers': 'ipmi/check_ipmi_sensor_sudoer'
+    },
 }
 
 PLUGINS_DIR = '/usr/local/lib/nagios/plugins/'
 TOOLS_DIR = '/usr/local/bin/'
 CRONJOB_PATH = '/etc/cron.d/hw_health_{}'
+SUDOERS_DIR = '/etc/sudoers.d'
 
 
 def install_resource(tools, resource):
-    ntools = len(tools)
-    if ntools == 0:
+    """
+    Install hardware diagnostic tools from a charm resource
+
+    :param tools: set of tools to include
+    :param resource: resource object
+    :return: boolean, status of installation
+    """
+    if not isinstance(tools, set):
+        try:
+            tools = set(tools)
+        except TypeError as e:
+            hookenv.log("exception found with install_resources, {}".format(e))
+            return False
+    if not tools:
         return False
-    elif 'mdadm' in tools:
-        tools = [tool for tool in tools if tool != 'mdadm']
-        ntools -= 1
-        if ntools == 0:
+    elif tools & {'mdadm', 'ipmi'}:
+        tools = tools - {'mdadm', 'ipmi'}
+        if not tools:
             return True
 
     if not isinstance(resource, str) \
             or not resource.endswith('.zip') \
             or not os.path.exists(resource):
+        hookenv.log("The resource 'tools' does not end with .zip or does not exist")
         return False
 
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -67,7 +84,7 @@ def install_resource(tools, resource):
                     shutil.copy2(filepath, TOOLS_DIR)
                     count += 1
 
-            return ntools == count and count > 0
+            return len(tools) == count and count > 0
 
         except zipfile.BadZipFile as error:
             hookenv.log('BadZipFile: {}'.format(error), hookenv.ERROR)
@@ -79,10 +96,14 @@ def install_resource(tools, resource):
     return False
 
 
-def install_tools(tools, method=None):
-    if not method:
-        method = 'snap'
+def install_tools(tools, method='snap'):
+    """
+    Install hardware management tools from a snap
 
+    :param tools: list of tools to install
+    :param method: default to snap if unspecified, other means have yet to be added
+    :return:
+    """
     count = 0
     for tool in tools:
         if tool in TOOLING and method in TOOLING[tool]:
@@ -139,6 +160,12 @@ def configure_tools(tools):
                     count += 1
                     hookenv.log(
                         'NRPE script for tool [{}] configured'.format(tool))
+            if 'sudoers' in TOOLING[tool]:
+                sudoers_path = _get_filepath(TOOLING[tool]['sudoers'])
+                if sudoers_path:
+                    shutil.copy2(sudoers_path, SUDOERS_DIR)
+                    count += 1
+                    hookenv.log('sudoers entry for tool [{}] added'.format(tool))
     return count > 0
 
 
@@ -159,7 +186,7 @@ def configure_nrpe_checks(tools):
     for tool in tools:
         if tool in TOOLING and 'filename' in TOOLING[tool]:
             scriptname = os.path.basename(TOOLING[tool]['filename'])
-            cmd = os.path.join(PLUGINS_DIR, scriptname)
+            cmd = os.path.join(PLUGINS_DIR, scriptname)  # TODO args to the command?
             if os.path.exists(cmd):
                 nrpe_setup.add_check(tool, '{} Hardware Health', cmd)
                 nrpe_setup.write()
diff --git a/src/reactive/hw_health.py b/src/reactive/hw_health.py
index 1a1402f..29cea0f 100644
--- a/src/reactive/hw_health.py
+++ b/src/reactive/hw_health.py
@@ -62,7 +62,8 @@ def install():
             set_flag('hw-health.installed')
             status.waiting('Preparing scripts installation')
         else:
-            status.blocked('Tools not found: {}'.format(', '.join(tools)))
+            missing_tools = tools - {'mdadm', 'ipmi'}
+            status.blocked('Tools not found: {}'.format(', '.join(missing_tools)))
 
 
 @when('hw-health.installed')
diff --git a/src/tests/unit/test_actions.py b/src/tests/unit/test_actions.py
new file mode 100644
index 0000000..aadca73
--- /dev/null
+++ b/src/tests/unit/test_actions.py
@@ -0,0 +1,31 @@
+# Copyright 2019 Canonical Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import unittest.mock as mock
+
+from actions.actions import clear_sel
+
+
+class ClearSelTestCase(unittest.TestCase):
+
+    @mock.patch('subprocess.check_output')
+    @mock.patch('subprocess.check_call')
+    @mock.patch('charmhelpers.core.hookenv.log')
+    def test_clear_sel(self, mock_log, mock_check_call, mock_subprocess):
+        sel_output = "Unittest system event log output"
+        mock_subprocess.return_value = sel_output
+        mock_check_call.return_value = None
+        clear_sel()
+        mock_check_call.assert_called_once_with(['action-set', "cleared_log={}".format(sel_output)])
diff --git a/src/tests/unit/test_cron_mdadm.py b/src/tests/unit/test_cron_mdadm.py
index dffd65b..016745e 100644
--- a/src/tests/unit/test_cron_mdadm.py
+++ b/src/tests/unit/test_cron_mdadm.py
@@ -29,10 +29,13 @@ class TestCronMdadm(unittest.TestCase):
 
     @mock.patch('os.path.exists')
     @mock.patch('subprocess.Popen')
-    def test_get_devices_mdadm_exception(self, mdadm_details, path_exists):
+    @mock.patch('sys.exit')
+    def test_get_devices_mdadm_exception(self, mock_exit, mdadm_details,
+                                         path_exists):
         path_exists.return_value = True
         mdadm_details.side_effect = subprocess.CalledProcessError(
             1, 'unittest')
+        mock_exit.return_value = 0
         self.assertEqual(cron_mdadm.get_devices(), set())
 
     @mock.patch('cron_mdadm.generate_output')
diff --git a/src/tests/unit/test_hwdiscovery.py b/src/tests/unit/test_hwdiscovery.py
index 8a62247..7cb516e 100644
--- a/src/tests/unit/test_hwdiscovery.py
+++ b/src/tests/unit/test_hwdiscovery.py
@@ -5,8 +5,6 @@ import sys
 import unittest
 import unittest.mock as mock
 
-import charmhelpers.core.host  # noqa: F401
-
 sys.path.append('lib/utils')
 import hwdiscovery  # noqa: E402
 
@@ -15,7 +13,8 @@ class TestHWDiscovery(unittest.TestCase):
     @mock.patch('hwdiscovery._supports_mdadm')
     @mock.patch('hwdiscovery.dmidecode')
     @mock.patch('glob.glob')
-    def test_get_tools_megacli(self, driver, dmidecode, supports_mdadm):
+    @mock.patch('charmhelpers.core.hookenv.config')
+    def test_get_tools_megacli(self, mock_hookenv, driver, dmidecode, supports_mdadm):
         def side_effect_glob(value):
             if value == '/sys/bus/pci/drivers/megaraid_sas/00*':
                 return ['/...megaraid']
@@ -25,14 +24,16 @@ class TestHWDiscovery(unittest.TestCase):
         supports_mdadm.return_value = False
         dmidecode.return_value = 'unittest'
         driver.side_effect = side_effect_glob
+        mock_hookenv.return_value = True
         real = hwdiscovery.get_tools()
-        expected = set(['megacli'])
+        expected = set(['megacli', 'ipmi'])
         self.assertEqual(real, expected)
 
     @mock.patch('hwdiscovery._supports_mdadm')
     @mock.patch('hwdiscovery.dmidecode')
     @mock.patch('glob.glob')
-    def test_get_tools_sas2ircu(self, driver, dmidecode, supports_mdadm):
+    @mock.patch('charmhelpers.core.hookenv.config')
+    def test_get_tools_sas2ircu(self, mock_hookenv, driver, dmidecode, supports_mdadm):
         def side_effect_glob(value):
             if value == '/sys/bus/pci/drivers/mpt2sas/00*':
                 return ['/...mp2sas']
@@ -41,15 +42,17 @@ class TestHWDiscovery(unittest.TestCase):
 
         supports_mdadm.return_value = False
         dmidecode.return_value = 'unittest'
+        mock_hookenv.return_value = True
         driver.side_effect = side_effect_glob
         real = hwdiscovery.get_tools()
-        expected = set(['sas2ircu'])
+        expected = set(['sas2ircu', 'ipmi'])
         self.assertEqual(real, expected)
 
     @mock.patch('hwdiscovery._supports_mdadm')
     @mock.patch('hwdiscovery.dmidecode')
     @mock.patch('glob.glob')
-    def test_get_tools_sas3ircu(self, driver, dmidecode, supports_mdadm):
+    @mock.patch('charmhelpers.core.hookenv.config')
+    def test_get_tools_sas3ircu(self, mock_hookenv, driver, dmidecode, supports_mdadm):
         def side_effect_glob(value):
             if value not in ('/sys/bus/pci/drivers/megaraid_sas/00*',
                              '/sys/bus/pci/drivers/mpt2sas/00*'):
@@ -60,6 +63,7 @@ class TestHWDiscovery(unittest.TestCase):
         supports_mdadm.return_value = False
         dmidecode.return_value = 'unittest'
         driver.side_effect = side_effect_glob
+        mock_hookenv.return_value = False
         real = hwdiscovery.get_tools()
         expected = set(['sas3ircu'])
         self.assertEqual(real, expected)
@@ -67,10 +71,12 @@ class TestHWDiscovery(unittest.TestCase):
     @mock.patch('hwdiscovery._supports_mdadm')
     @mock.patch('hwdiscovery.dmidecode')
     @mock.patch('glob.glob')
-    def test_get_tools_hpe(self, driver, dmidecode, supports_mdadm):
+    @mock.patch('charmhelpers.core.hookenv.config')
+    def test_get_tools_hpe(self, mock_hookenv, driver, dmidecode, supports_mdadm):
         supports_mdadm.return_value = False
         dmidecode.return_value = 'HPE'
         driver.side_effect = lambda x: []
+        mock_hookenv.return_value = False
         real = hwdiscovery.get_tools()
         expected = set(['hpe'])
         self.assertEqual(real, expected)
@@ -78,10 +84,12 @@ class TestHWDiscovery(unittest.TestCase):
     @mock.patch('hwdiscovery._supports_mdadm')
     @mock.patch('hwdiscovery.dmidecode')
     @mock.patch('glob.glob')
-    def test_get_tools_hp(self, driver, dmidecode, supports_mdadm):
+    @mock.patch('charmhelpers.core.hookenv.config')
+    def test_get_tools_hp(self, mock_hookenv, driver, dmidecode, supports_mdadm):
         supports_mdadm.return_value = False
         dmidecode.return_value = 'HP'
         driver.side_effect = lambda x: []
+        mock_hookenv.return_value = False
         real = hwdiscovery.get_tools()
         expected = set(['hp'])
         self.assertEqual(real, expected)
@@ -89,9 +97,11 @@ class TestHWDiscovery(unittest.TestCase):
     @mock.patch('hwdiscovery._supports_mdadm')
     @mock.patch('hwdiscovery.dmidecode')
     @mock.patch('glob.glob')
-    def test_get_tools_mdadm(self, driver, dmidecode, supports_mdadm):
+    @mock.patch('charmhelpers.core.hookenv.config')
+    def test_get_tools_mdadm(self, mock_hookenv, driver, dmidecode, supports_mdadm):
         supports_mdadm.return_value = True
         dmidecode.return_value = 'unittest'
+        mock_hookenv.return_value = False
         real = hwdiscovery.get_tools()
         expected = set(['mdadm'])
         self.assertEqual(real, expected)
@@ -108,7 +118,8 @@ class TestHWDiscovery(unittest.TestCase):
 
     @mock.patch('os.path.exists')
     @mock.patch('subprocess.Popen')
-    def test_supports_mdadm_true(self, mdadm_details, path_exists):
+    @mock.patch('charmhelpers.core.hookenv.log')
+    def test_supports_mdadm_true(self, log, mdadm_details, path_exists):
         class Test_Popen(object):
             def __init__(cls):
                 data = b'ARRAY /dev/md0 metadata=1.2 name=node00:0'
@@ -116,6 +127,7 @@ class TestHWDiscovery(unittest.TestCase):
                 cls.stdout = io.BufferedReader(io.BytesIO(data))
 
         path_exists.return_value = True
+        log.return_value = True
         mdadm_details.return_value = Test_Popen()
         real = hwdiscovery._supports_mdadm()
         expected = True
diff --git a/src/tests/unit/test_tooling.py b/src/tests/unit/test_tooling.py
index f19dbde..5236f6e 100644
--- a/src/tests/unit/test_tooling.py
+++ b/src/tests/unit/test_tooling.py
@@ -4,8 +4,6 @@ import unittest
 import unittest.mock as mock
 import zipfile
 
-import shutil  # noqa: F401
-import tempfile  # noqa: F401
 
 sys.path.append('lib/utils')
 import tooling  # noqa: E402
@@ -66,7 +64,7 @@ class TestTooling(unittest.TestCase):
     @mock.patch('zipfile.ZipFile')
     def test_install_resource_zipfile_error(self, zfile, path_exists):
         path_exists.return_value = True
-        zfile.side_effect = zipfile.BadZipFile('ugly error')
+        zfile.side_effect = zipfile.BadZipFile('Intended error for unit test')
         actual = tooling.install_resource(['megacli'], '/tmp/test.zip')
         expected = False
         self.assertEqual(actual, expected)
diff --git a/src/tox.ini b/src/tox.ini
index ddf16bf..491fbe6 100644
--- a/src/tox.ini
+++ b/src/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = unit
+envlist = unit, pep8
 skipsdist = true
 
 [testenv:unit]
@@ -8,6 +8,14 @@ envdir={toxworkdir}/py3
 deps=
   charms.reactive
   nose
+  mock
 commands =
   {toxworkdir}/../tests/download_nagios_plugin3.py
   nosetests tests/unit
+
+[testenv:pep8]
+basepython = python3
+deps =
+  flake8
+commands = flake8 {posargs} --max-complexity=20 --max-line-length=120 reactive files unit_tests
+

Follow ups