nagios-charmers team mailing list archive
-
nagios-charmers team
-
Mailing list archive
-
Message #00282
[Merge] ~xavpaice/hw-health-charm:add_ipmi into hw-health-charm:master
Xav Paice has proposed merging ~xavpaice/hw-health-charm:add_ipmi into hw-health-charm:master.
Requested reviews:
Xav Paice (xavpaice)
Peter Sabaini (peter-sabaini)
For more details, see:
https://code.launchpad.net/~xavpaice/hw-health-charm/+git/hw-health-charm/+merge/363682
--
Your team Nagios Charm developers is subscribed to branch hw-health-charm:master.
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..0fbc388
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "check_ipmi_sensor"]
+ path = check_ipmi_sensor_v3
+ url = git@xxxxxxxxxx:thomas-krenn/check_ipmi_sensor_v3.git
+ branch = master
diff --git a/check_ipmi_sensor_v3 b/check_ipmi_sensor_v3
new file mode 160000
index 0000000..268eb36
--- /dev/null
+++ b/check_ipmi_sensor_v3
@@ -0,0 +1 @@
+Subproject commit 268eb36f0c20b060fd04f4c9d1ff1fcd17d3269f
diff --git a/src/README b/src/README.md
similarity index 88%
rename from src/README
rename to src/README.md
index 2c5475b..971b86c 100644
--- a/src/README
+++ b/src/README.md
@@ -14,6 +14,8 @@ Currently supported hardware is:
InfoCollect)
* Linux software RAID (mdadm)
+
+ * IPMI as implemented by freeipmi
In the backlog, hp-health logic still needs to be backported to support
Hewlett-Packard equipment (HP Smart Array Controllers and MSA Controllers with
@@ -25,28 +27,29 @@ Furthermore, other hardware in the roadmap is:
# Usage
-Step by step instructions on using the charm:
+```
juju deploy ubuntu
juju deploy hw-health
juju deploy nrpe
juju add-relation ubuntu nrpe
juju add-relation ubuntu hw-health
juju add-relation hw-health nrpe
+```
Charmstore version already ships a resource. However, a new resource can be
attached:
- * Option 1: juju deploy hw-health --resource tools=/tmp/zipfile.zip
+ * Option 1: `juju deploy hw-health --resource tools=/tmp/zipfile.zip`
- * Option 2: juju attach-resource hw-health tools=/tmp/zipfile.zip
+ * Option 2: `juju attach-resource hw-health tools=/tmp/zipfile.zip`
In both cases format of zipfile.zip must be one of the following:
-``example
+```
zip /tmp/zipfile.zip megacli sas2ircu sas3ircu
zip /tmp/zipfile.zip megacli
etc.
-``
+```
## Known Limitations and Issues
diff --git a/src/actions.yaml b/src/actions.yaml
new file mode 100644
index 0000000..421c35e
--- /dev/null
+++ b/src/actions.yaml
@@ -0,0 +1,2 @@
+clear-sel:
+ description: Using ipmi-sel, clear the system event log
diff --git a/src/actions/__init__.py b/src/actions/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/actions/__init__.py
diff --git a/src/actions/actions.py b/src/actions/actions.py
new file mode 100755
index 0000000..820f76d
--- /dev/null
+++ b/src/actions/actions.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+#
+# Copyright 2016,2019 Canonical Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import subprocess
+import sys
+
+from charmhelpers.core.hookenv import action_set, action_fail, log
+
+
+def clear_sel():
+ """
+ Action to clear the IPMI system event log.
+
+ Uses ipmi-sel --post-clear, clears the SEL log and stores the cleared entries
+ in action output.
+ """
+ command = ['/usr/sbin/ipmi-sel', '--post-clear']
+ try:
+ output = subprocess.check_output(command)
+ log("Action clear-sel completed, sel log cleared: {}".format(output))
+ action_set({"cleared_log": output})
+ except subprocess.CalledProcessError as e:
+ action_fail("Action failed with {}".format(e))
+
+
+# A dictionary of all the defined actions to callables (which take
+# parsed arguments).
+ACTIONS = {"clear-sel": clear_sel}
+
+
+def main(args):
+ action_name = os.path.basename(args[0])
+ try:
+ action = ACTIONS[action_name]
+ except KeyError:
+ return "Action {} undefined".format(action_name)
+ else:
+ try:
+ action(args)
+ except Exception as e:
+ action_fail(str(e))
+
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv))
diff --git a/src/actions/clear-sel b/src/actions/clear-sel
new file mode 120000
index 0000000..405a394
--- /dev/null
+++ b/src/actions/clear-sel
@@ -0,0 +1 @@
+actions.py
\ No newline at end of file
diff --git a/src/config.yaml b/src/config.yaml
index af0a62a..1a5bcfd 100644
--- a/src/config.yaml
+++ b/src/config.yaml
@@ -13,3 +13,7 @@ options:
description: |
Choose the tools to get deployed (hp, dell, supermicro, huawei) or leave
the charm to self discover the tools needed to run hardware health checks.
+ enable_ipmi:
+ type: boolean
+ default: True
+ description: Enable the use of freeipmi tools to monitor hardware status.
\ No newline at end of file
diff --git a/src/files/ipmi/check_ipmi_sensor b/src/files/ipmi/check_ipmi_sensor
new file mode 120000
index 0000000..575389d
--- /dev/null
+++ b/src/files/ipmi/check_ipmi_sensor
@@ -0,0 +1 @@
+../../../check_ipmi_sensor_v3/check_ipmi_sensor
\ No newline at end of file
diff --git a/src/files/ipmi/check_ipmi_sensor_sudoer b/src/files/ipmi/check_ipmi_sensor_sudoer
new file mode 100644
index 0000000..6571238
--- /dev/null
+++ b/src/files/ipmi/check_ipmi_sensor_sudoer
@@ -0,0 +1 @@
+nagios ALL=(root) NOPASSWD: /usr/sbin/ipmi-sensors, /usr/sbin/ipmi-sel
diff --git a/src/layer.yaml b/src/layer.yaml
index 237b87e..054b43e 100644
--- a/src/layer.yaml
+++ b/src/layer.yaml
@@ -1,4 +1,5 @@
includes:
+- "layer:apt"
- "layer:basic"
- "layer:status"
- "layer:nagios"
@@ -7,5 +8,9 @@ includes:
options:
status:
patch-hookenv: false
+ apt:
+ packages:
+ - "freeipmi"
+ - "libipc-run-perl"
repo: "lp:hw-health-charm"
is: "hw-health"
diff --git a/src/lib/utils/hwdiscovery.py b/src/lib/utils/hwdiscovery.py
index 1827ac6..c144fa4 100644
--- a/src/lib/utils/hwdiscovery.py
+++ b/src/lib/utils/hwdiscovery.py
@@ -5,7 +5,7 @@ import re
import subprocess
# import sys
-from charmhelpers.core import host
+from charmhelpers.core import host, hookenv
def get_tools():
@@ -46,6 +46,9 @@ def get_tools():
if _supports_mdadm():
tools.add('mdadm')
+ if hookenv.config('enable_ipmi'):
+ tools.add('ipmi')
+
return tools
@@ -68,6 +71,7 @@ def dmidecode(key):
def _supports_mdadm():
+ found_raid_devices = False
if os.path.exists('/sbin/mdadm'):
try:
devices_raw = subprocess.Popen(
@@ -77,16 +81,13 @@ def _supports_mdadm():
devices_re = re.compile(r'^ARRAY\s+(\S+) ')
for line in devices_raw.stdout.readlines():
line = line.decode().strip()
- if devices_re.match(line):
- return True
- # FIXME: log
- # if devices_raw.stderr:
- # print('STDERR: {}'.format(devices_raw.stderr
- # .readline().decode().strip()))
- except Exception:
- # log error
- pass
- return False
+ raid_dev = devices_re.search(line)
+ if raid_dev:
+ hookenv.log("Found md raid array {}".format(raid_dev.group(1)))
+ found_raid_devices = True
+ except Exception as e:
+ hookenv.log("mdadm scan failed with {}".format(e))
+ return found_raid_devices
if __name__ == '__main__':
diff --git a/src/lib/utils/tooling.py b/src/lib/utils/tooling.py
index 3b9b927..21794a5 100644
--- a/src/lib/utils/tooling.py
+++ b/src/lib/utils/tooling.py
@@ -29,26 +29,43 @@ TOOLING = {
'filename': 'check_mdadm.py',
'cronjob': 'cron_mdadm.py'
},
+ 'ipmi': {
+ 'filename': 'ipmi/check_ipmi_sensor',
+ 'sudoers': 'ipmi/check_ipmi_sensor_sudoer'
+ },
}
PLUGINS_DIR = '/usr/local/lib/nagios/plugins/'
TOOLS_DIR = '/usr/local/bin/'
CRONJOB_PATH = '/etc/cron.d/hw_health_{}'
+SUDOERS_DIR = '/etc/sudoers.d'
def install_resource(tools, resource):
- ntools = len(tools)
- if ntools == 0:
+ """
+ Install hardware diagnostic tools from a charm resource
+
+ :param tools: set of tools to include
+ :param resource: resource object
+ :return: boolean, status of installation
+ """
+ if not isinstance(tools, set):
+ try:
+ tools = set(tools)
+ except TypeError as e:
+ hookenv.log("exception found with install_resources, {}".format(e))
+ return False
+ if not tools:
return False
- elif 'mdadm' in tools:
- tools = [tool for tool in tools if tool != 'mdadm']
- ntools -= 1
- if ntools == 0:
+ elif tools & {'mdadm', 'ipmi'}:
+ tools = tools - {'mdadm', 'ipmi'}
+ if not tools:
return True
if not isinstance(resource, str) \
or not resource.endswith('.zip') \
or not os.path.exists(resource):
+ hookenv.log("The resource 'tools' does not end with .zip or does not exist")
return False
with tempfile.TemporaryDirectory() as tmpdir:
@@ -67,7 +84,7 @@ def install_resource(tools, resource):
shutil.copy2(filepath, TOOLS_DIR)
count += 1
- return ntools == count and count > 0
+ return len(tools) == count and count > 0
except zipfile.BadZipFile as error:
hookenv.log('BadZipFile: {}'.format(error), hookenv.ERROR)
@@ -79,10 +96,14 @@ def install_resource(tools, resource):
return False
-def install_tools(tools, method=None):
- if not method:
- method = 'snap'
+def install_tools(tools, method='snap'):
+ """
+ Install hardware management tools from a snap
+ :param tools: list of tools to install
+ :param method: default to snap if unspecified, other means have yet to be added
+ :return:
+ """
count = 0
for tool in tools:
if tool in TOOLING and method in TOOLING[tool]:
@@ -139,6 +160,12 @@ def configure_tools(tools):
count += 1
hookenv.log(
'NRPE script for tool [{}] configured'.format(tool))
+ if 'sudoers' in TOOLING[tool]:
+ sudoers_path = _get_filepath(TOOLING[tool]['sudoers'])
+ if sudoers_path:
+ shutil.copy2(sudoers_path, SUDOERS_DIR)
+ count += 1
+ hookenv.log('sudoers entry for tool [{}] added'.format(tool))
return count > 0
@@ -159,7 +186,7 @@ def configure_nrpe_checks(tools):
for tool in tools:
if tool in TOOLING and 'filename' in TOOLING[tool]:
scriptname = os.path.basename(TOOLING[tool]['filename'])
- cmd = os.path.join(PLUGINS_DIR, scriptname)
+ cmd = os.path.join(PLUGINS_DIR, scriptname) # TODO args to the command?
if os.path.exists(cmd):
nrpe_setup.add_check(tool, '{} Hardware Health', cmd)
nrpe_setup.write()
diff --git a/src/reactive/hw_health.py b/src/reactive/hw_health.py
index 1a1402f..29cea0f 100644
--- a/src/reactive/hw_health.py
+++ b/src/reactive/hw_health.py
@@ -62,7 +62,8 @@ def install():
set_flag('hw-health.installed')
status.waiting('Preparing scripts installation')
else:
- status.blocked('Tools not found: {}'.format(', '.join(tools)))
+ missing_tools = tools - {'mdadm', 'ipmi'}
+ status.blocked('Tools not found: {}'.format(', '.join(missing_tools)))
@when('hw-health.installed')
diff --git a/src/tests/unit/test_actions.py b/src/tests/unit/test_actions.py
new file mode 100644
index 0000000..aadca73
--- /dev/null
+++ b/src/tests/unit/test_actions.py
@@ -0,0 +1,31 @@
+# Copyright 2019 Canonical Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import unittest.mock as mock
+
+from actions.actions import clear_sel
+
+
+class ClearSelTestCase(unittest.TestCase):
+
+ @mock.patch('subprocess.check_output')
+ @mock.patch('subprocess.check_call')
+ @mock.patch('charmhelpers.core.hookenv.log')
+ def test_clear_sel(self, mock_log, mock_check_call, mock_subprocess):
+ sel_output = "Unittest system event log output"
+ mock_subprocess.return_value = sel_output
+ mock_check_call.return_value = None
+ clear_sel()
+ mock_check_call.assert_called_once_with(['action-set', "cleared_log={}".format(sel_output)])
diff --git a/src/tests/unit/test_cron_mdadm.py b/src/tests/unit/test_cron_mdadm.py
index dffd65b..016745e 100644
--- a/src/tests/unit/test_cron_mdadm.py
+++ b/src/tests/unit/test_cron_mdadm.py
@@ -29,10 +29,13 @@ class TestCronMdadm(unittest.TestCase):
@mock.patch('os.path.exists')
@mock.patch('subprocess.Popen')
- def test_get_devices_mdadm_exception(self, mdadm_details, path_exists):
+ @mock.patch('sys.exit')
+ def test_get_devices_mdadm_exception(self, mock_exit, mdadm_details,
+ path_exists):
path_exists.return_value = True
mdadm_details.side_effect = subprocess.CalledProcessError(
1, 'unittest')
+ mock_exit.return_value = 0
self.assertEqual(cron_mdadm.get_devices(), set())
@mock.patch('cron_mdadm.generate_output')
diff --git a/src/tests/unit/test_hwdiscovery.py b/src/tests/unit/test_hwdiscovery.py
index 8a62247..7cb516e 100644
--- a/src/tests/unit/test_hwdiscovery.py
+++ b/src/tests/unit/test_hwdiscovery.py
@@ -5,8 +5,6 @@ import sys
import unittest
import unittest.mock as mock
-import charmhelpers.core.host # noqa: F401
-
sys.path.append('lib/utils')
import hwdiscovery # noqa: E402
@@ -15,7 +13,8 @@ class TestHWDiscovery(unittest.TestCase):
@mock.patch('hwdiscovery._supports_mdadm')
@mock.patch('hwdiscovery.dmidecode')
@mock.patch('glob.glob')
- def test_get_tools_megacli(self, driver, dmidecode, supports_mdadm):
+ @mock.patch('charmhelpers.core.hookenv.config')
+ def test_get_tools_megacli(self, mock_hookenv, driver, dmidecode, supports_mdadm):
def side_effect_glob(value):
if value == '/sys/bus/pci/drivers/megaraid_sas/00*':
return ['/...megaraid']
@@ -25,14 +24,16 @@ class TestHWDiscovery(unittest.TestCase):
supports_mdadm.return_value = False
dmidecode.return_value = 'unittest'
driver.side_effect = side_effect_glob
+ mock_hookenv.return_value = True
real = hwdiscovery.get_tools()
- expected = set(['megacli'])
+ expected = set(['megacli', 'ipmi'])
self.assertEqual(real, expected)
@mock.patch('hwdiscovery._supports_mdadm')
@mock.patch('hwdiscovery.dmidecode')
@mock.patch('glob.glob')
- def test_get_tools_sas2ircu(self, driver, dmidecode, supports_mdadm):
+ @mock.patch('charmhelpers.core.hookenv.config')
+ def test_get_tools_sas2ircu(self, mock_hookenv, driver, dmidecode, supports_mdadm):
def side_effect_glob(value):
if value == '/sys/bus/pci/drivers/mpt2sas/00*':
return ['/...mp2sas']
@@ -41,15 +42,17 @@ class TestHWDiscovery(unittest.TestCase):
supports_mdadm.return_value = False
dmidecode.return_value = 'unittest'
+ mock_hookenv.return_value = True
driver.side_effect = side_effect_glob
real = hwdiscovery.get_tools()
- expected = set(['sas2ircu'])
+ expected = set(['sas2ircu', 'ipmi'])
self.assertEqual(real, expected)
@mock.patch('hwdiscovery._supports_mdadm')
@mock.patch('hwdiscovery.dmidecode')
@mock.patch('glob.glob')
- def test_get_tools_sas3ircu(self, driver, dmidecode, supports_mdadm):
+ @mock.patch('charmhelpers.core.hookenv.config')
+ def test_get_tools_sas3ircu(self, mock_hookenv, driver, dmidecode, supports_mdadm):
def side_effect_glob(value):
if value not in ('/sys/bus/pci/drivers/megaraid_sas/00*',
'/sys/bus/pci/drivers/mpt2sas/00*'):
@@ -60,6 +63,7 @@ class TestHWDiscovery(unittest.TestCase):
supports_mdadm.return_value = False
dmidecode.return_value = 'unittest'
driver.side_effect = side_effect_glob
+ mock_hookenv.return_value = False
real = hwdiscovery.get_tools()
expected = set(['sas3ircu'])
self.assertEqual(real, expected)
@@ -67,10 +71,12 @@ class TestHWDiscovery(unittest.TestCase):
@mock.patch('hwdiscovery._supports_mdadm')
@mock.patch('hwdiscovery.dmidecode')
@mock.patch('glob.glob')
- def test_get_tools_hpe(self, driver, dmidecode, supports_mdadm):
+ @mock.patch('charmhelpers.core.hookenv.config')
+ def test_get_tools_hpe(self, mock_hookenv, driver, dmidecode, supports_mdadm):
supports_mdadm.return_value = False
dmidecode.return_value = 'HPE'
driver.side_effect = lambda x: []
+ mock_hookenv.return_value = False
real = hwdiscovery.get_tools()
expected = set(['hpe'])
self.assertEqual(real, expected)
@@ -78,10 +84,12 @@ class TestHWDiscovery(unittest.TestCase):
@mock.patch('hwdiscovery._supports_mdadm')
@mock.patch('hwdiscovery.dmidecode')
@mock.patch('glob.glob')
- def test_get_tools_hp(self, driver, dmidecode, supports_mdadm):
+ @mock.patch('charmhelpers.core.hookenv.config')
+ def test_get_tools_hp(self, mock_hookenv, driver, dmidecode, supports_mdadm):
supports_mdadm.return_value = False
dmidecode.return_value = 'HP'
driver.side_effect = lambda x: []
+ mock_hookenv.return_value = False
real = hwdiscovery.get_tools()
expected = set(['hp'])
self.assertEqual(real, expected)
@@ -89,9 +97,11 @@ class TestHWDiscovery(unittest.TestCase):
@mock.patch('hwdiscovery._supports_mdadm')
@mock.patch('hwdiscovery.dmidecode')
@mock.patch('glob.glob')
- def test_get_tools_mdadm(self, driver, dmidecode, supports_mdadm):
+ @mock.patch('charmhelpers.core.hookenv.config')
+ def test_get_tools_mdadm(self, mock_hookenv, driver, dmidecode, supports_mdadm):
supports_mdadm.return_value = True
dmidecode.return_value = 'unittest'
+ mock_hookenv.return_value = False
real = hwdiscovery.get_tools()
expected = set(['mdadm'])
self.assertEqual(real, expected)
@@ -108,7 +118,8 @@ class TestHWDiscovery(unittest.TestCase):
@mock.patch('os.path.exists')
@mock.patch('subprocess.Popen')
- def test_supports_mdadm_true(self, mdadm_details, path_exists):
+ @mock.patch('charmhelpers.core.hookenv.log')
+ def test_supports_mdadm_true(self, log, mdadm_details, path_exists):
class Test_Popen(object):
def __init__(cls):
data = b'ARRAY /dev/md0 metadata=1.2 name=node00:0'
@@ -116,6 +127,7 @@ class TestHWDiscovery(unittest.TestCase):
cls.stdout = io.BufferedReader(io.BytesIO(data))
path_exists.return_value = True
+ log.return_value = True
mdadm_details.return_value = Test_Popen()
real = hwdiscovery._supports_mdadm()
expected = True
diff --git a/src/tests/unit/test_tooling.py b/src/tests/unit/test_tooling.py
index f19dbde..5236f6e 100644
--- a/src/tests/unit/test_tooling.py
+++ b/src/tests/unit/test_tooling.py
@@ -4,8 +4,6 @@ import unittest
import unittest.mock as mock
import zipfile
-import shutil # noqa: F401
-import tempfile # noqa: F401
sys.path.append('lib/utils')
import tooling # noqa: E402
@@ -66,7 +64,7 @@ class TestTooling(unittest.TestCase):
@mock.patch('zipfile.ZipFile')
def test_install_resource_zipfile_error(self, zfile, path_exists):
path_exists.return_value = True
- zfile.side_effect = zipfile.BadZipFile('ugly error')
+ zfile.side_effect = zipfile.BadZipFile('Intended error for unit test')
actual = tooling.install_resource(['megacli'], '/tmp/test.zip')
expected = False
self.assertEqual(actual, expected)
diff --git a/src/tox.ini b/src/tox.ini
index ddf16bf..491fbe6 100644
--- a/src/tox.ini
+++ b/src/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = unit
+envlist = unit, pep8
skipsdist = true
[testenv:unit]
@@ -8,6 +8,14 @@ envdir={toxworkdir}/py3
deps=
charms.reactive
nose
+ mock
commands =
{toxworkdir}/../tests/download_nagios_plugin3.py
nosetests tests/unit
+
+[testenv:pep8]
+basepython = python3
+deps =
+ flake8
+commands = flake8 {posargs} --max-complexity=20 --max-line-length=120 reactive files unit_tests
+
Follow ups