nagios-charmers team mailing list archive

Thread
Date

[Merge] ~aieri/charm-hw-health:lp1832906 into ~nagios-charmers/charm-hw-health:master

To: mp+385838@xxxxxxxxxxxxxxxxxx,Xav Paice <xav.paice@xxxxxxxxxxxxx>
From: Andrea Ieri <andrea.ieri@xxxxxxxxxxxxx>
Date: Tue, 16 Jun 2020 17:34:23 -0000
Reply-to: mp+385838@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Andrea Ieri has proposed merging ~aieri/charm-hw-health:lp1832906 into ~nagios-charmers/charm-hw-health:master.

Commit message:
Fix issue with cron_mdadm.py which causes degraded state to not be reported

Requested reviews:
  Giuseppe Petralia (peppepetra86)
  Drew Freiberger (afreiberger)
  Stuart Bishop (stub)
  BootStack Reviewers (bootstack-reviewers)
  Xav Paice (xavpaice)

For more details, see:
https://code.launchpad.net/~aieri/charm-hw-health/+git/hw-health-charm/+merge/385838

    Fix issue with cron_mdadm.py which causes degraded state to not be reported

    There was a formatting assumption which broke the detection of the degraded
    flag in the State section of each device report from mdadm --detail <devices>
    This merge adds code to split the State flags and check for both degraded and
    recovering states and sets alert status based on the combination of states.

    Also added is the direct detection of a "removed" member of the raid.

    Closes-Bug: 1832906

-- 
Your team Nagios Charm developers is subscribed to branch ~nagios-charmers/charm-hw-health:master.

diff --git a/src/README.md b/src/README.md
index 12a33f0..45b48ed 100644
--- a/src/README.md
+++ b/src/README.md
@@ -87,6 +87,31 @@ Zip file size: 1204457 bytes, number of entries: 3
 3 files, 3842044 bytes uncompressed, 1204005 bytes compressed:  68.7%
 ```
 
+Two more zip resources may be needed for functional tests to succeed:
+ * tools-checksum.zip replaces the megacli tool by an empty file.
+ * tools-missing.zip removes the megacli tool from the resource
+
+```
+$ zipinfo tools-checksum.zip
+Archive:  tools-checksum.zip
+Zip file size: 547860 bytes, number of entries: 3
+-rwxr-xr-x  3.0 unx        0 bx stor 19-Jan-16 11:35 megacli
+-rwxr-xr-x  3.0 unx   559164 bx defN 19-Jan-16 11:31 sas2ircu
+-rwxr-xr-x  3.0 unx   660560 bx defN 19-Jan-16 11:31 sas3ircu
+3 files, 1219724 bytes uncompressed, 547408 bytes compressed:  55.1%
+
+$ zipinfo tools-missing.zip
+Archive:  tools-missing.zip
+Zip file size: 547718 bytes, number of entries: 2
+-rwxr-xr-x  3.0 unx   559164 bx defN 19-Jan-16 11:31 sas2ircu
+-rwxr-xr-x  3.0 unx   660560 bx defN 19-Jan-16 11:31 sas3ircu
+2 files, 1219724 bytes uncompressed, 547408 bytes compressed:  55.1%
+```
+
+Note: vendor tools may be updated over time. The charm verifies that the shared
+binaries match a set of known checksums. If you feel a checksum is missing, please
+file a bug (see link below) and it will be added.
+
 # Configuration
 
 Manufacturer option needs to be left in auto mode.
diff --git a/src/files/ipmi/cron_ipmi_sensors.py b/src/files/ipmi/cron_ipmi_sensors.py
index a11f9fb..2b6cdd5 100644
--- a/src/files/ipmi/cron_ipmi_sensors.py
+++ b/src/files/ipmi/cron_ipmi_sensors.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: us-ascii -*-
 
 import os
 import subprocess
@@ -16,6 +15,16 @@ NAGIOS_ERRORS = {
 }
 
 
+def write_output_file(output):
+    try:
+        with open(TMP_OUTPUT_FILE, 'w') as fd:
+            fd.write(output)
+    except IOError as e:
+        print("Cannot write output file {}, error {}".format(TMP_OUTPUT_FILE, e))
+        sys.exit(1)
+    os.rename(TMP_OUTPUT_FILE, OUTPUT_FILE)
+
+
 def gather_metrics():
     # Check if a PID file exists
     if os.path.exists(CHECK_IPMI_PID):
@@ -40,18 +49,13 @@ def gather_metrics():
     if len(sys.argv) > 1:
         cmdline.extend(sys.argv[1:])
     try:
-        output = subprocess.check_output(cmdline)
+        output = subprocess.check_output(cmdline).decode('utf8')
+        write_output_file(output)
     except subprocess.CalledProcessError as error:
         output = error.stdout.decode(errors='ignore')
-        with open(TMP_OUTPUT_FILE, 'w') as fd:
-            fd.write('{}: {}'.format(NAGIOS_ERRORS[error.returncode], output))
-    try:
-        with open(TMP_OUTPUT_FILE, 'w') as fd:
-            fd.write(output)
-    except IOError as e:
-        print("Cannot write output file {}, error {}".format(TMP_OUTPUT_FILE, e))
-        sys.exit(1)
-    os.rename(TMP_OUTPUT_FILE, OUTPUT_FILE)
+        write_output_file('{}: {}'.format(NAGIOS_ERRORS[error.returncode], output))
+    except PermissionError as error:
+        write_output_file('UNKNOWN: {}'.format(error))
 
     # remove pid reference
     os.remove(CHECK_IPMI_PID)
diff --git a/src/files/mdadm/cron_mdadm.py b/src/files/mdadm/cron_mdadm.py
index 0b0b365..d57c44c 100755
--- a/src/files/mdadm/cron_mdadm.py
+++ b/src/files/mdadm/cron_mdadm.py
@@ -6,24 +6,28 @@ import shutil
 import subprocess
 import sys
 
-OUTPUT_FILE = "/var/lib/nagios/mdadm.out"
-TEMP_FILE = "/tmp/mdadm.out"
+OUTPUT_FILE = '/var/lib/nagios/mdadm.out'
+TEMP_FILE = '/tmp/mdadm.out'
 
 
 def get_devices():
-    if os.path.exists("/sbin/mdadm"):
+    if os.path.exists('/sbin/mdadm'):
         try:
-            devices_raw = subprocess.check_output(["/sbin/mdadm", "--detail", "--scan"])
-            devices_re = re.compile(r"^ARRAY\s+([^ ]+) ")
+            devices_raw = subprocess.check_output(
+                ['/sbin/mdadm', '--detail', '--scan']
+            )
+            devices_re = re.compile(r'^ARRAY\s+([^ ]+) ')
             devices = set()
-            for line in devices_raw.decode().split("\n"):
+            for line in devices_raw.decode().split('\n'):
                 line = line.strip()
                 device_re = devices_re.match(line)
                 if device_re is not None:
                     devices.add(device_re.group(1))
             return devices
         except subprocess.CalledProcessError as error:
-            rc = generate_output("CRITICAL: get_devices error - {}".format(error))
+            rc = generate_output(
+                'CRITICAL: get_devices error - {}'.format(error)
+            )
             if rc:
                 sys.exit(0)
     return set()
@@ -31,90 +35,150 @@ def get_devices():
 
 def generate_output(msg):
     try:
-        with open(TEMP_FILE, "w") as fd:
+        with open(TEMP_FILE, 'w') as fd:
             fd.write(msg)
         shutil.move(TEMP_FILE, OUTPUT_FILE)
         return True
     except Exception as error:
-        print("Unable to generate output file:", error)
+        print('Unable to generate output file:', error)
         return False
 
 
 def get_devices_stats(devices):
-    mdadm_detail = ["/sbin/mdadm", "--detail"]
+    mdadm_detail = ['/sbin/mdadm', '--detail']
     mdadm_detail.extend(sorted(devices))
-    devices_details_raw = subprocess.check_output(mdadm_detail)
-
-    devices_re = r"^(/\S+):$"
-    state_re = r"^\s*State\s+:\s+(\S+)$"
-    status_re = r"^\s*(Active|Working|Failed|Spare) Devices\s+:\s+(\d+)$"
+    try:
+        devices_details_raw = subprocess.check_output(mdadm_detail)
+    except subprocess.CalledProcessError as error:
+        return generate_output(
+            'WARNING: error executing mdadm: {}'.format(error)
+        )
+
+    devices_re = r'^(/\S+):$'
+    state_re = r'^\s*State\s+:\s+(.+)\s*$'
+    status_re = r'^\s*(Active|Working|Failed|Spare) Devices\s+:\s+(\d+)$'
+    rebuild_status_re = r'^\s*Rebuild Status\s+:\s+(\d+%\s+\S+)$'
+    removed_re = r'^\s*-\s+0\s+0\s+(\d+)\s+removed$'
+    #        4 8 162 3 spare rebuilding /dev/sdk2
+    rebuilding_re = r'^\s*\d+\s+\d+\s+\d+\s+\d+\s+\S+\s+rebuilding\s+(\S+)$'
 
     devices_cre = re.compile(devices_re)
     state_cre = re.compile(state_re)
     status_cre = re.compile(status_re)
+    rebuild_status_cre = re.compile(rebuild_status_re)
+    removed_cre = re.compile(removed_re)
+    rebuilding_cre = re.compile(rebuilding_re)
 
     device = None
     devices_stats = {}
-    for line in devices_details_raw.decode().split("\n"):
+    for line in devices_details_raw.decode().split('\n'):
         line = line.rstrip()
         m = devices_cre.match(line)
         if m:
             device = m.group(1)
             devices_stats[device] = {
-                "stats": {"Active": 0, "Working": 0, "Failed": 0, "Spare": 0},
-                "degraded": False,
+                'stats': {
+                    'Active': 0,
+                    'Working': 0,
+                    'Failed': 0,
+                    'Spare': 0,
+                },
+                'rebuild_status': '',
+                'degraded': False,
+                'recovering': False,
+                'removed': [],
+                'rebuilding': [],
             }
             continue
 
         m = state_cre.match(line)
         if m:
-            if "degraded" in m.group(1) and device:
-                devices_stats[device]["degraded"] = True
+            # format for State line can be "clean" or "clean, degraded" or "active, degraded, rebuilding", etc.
+            states = m.group(1).split(", ")
+            if 'degraded' in states and device:
+                devices_stats[device]['degraded'] = True
+            if 'recovering' in states and device:
+                devices_stats[device]['recovering'] = True
             continue
 
         m = status_cre.match(line)
         if m and device:
-            devices_stats[device]["stats"][m.group(1)] = int(m.group(2))
+            devices_stats[device]['stats'][m.group(1)] = int(m.group(2))
+            continue
+
+        m = removed_cre.match(line)
+        if m and device:
+            devices_stats[device]['removed'].append(m.group(1))
+            continue
+
+        m = rebuild_status_cre.match(line)
+        if m and device:
+            devices_stats[device]['rebuild_status'] = m.group(1)
+            continue
+
+        m = rebuilding_cre.match(line)
+        if m and device:
+            devices_stats[device]['rebuilding'].append(m.group(1))
+            continue
+
     return devices_stats
 
 
 def parse_output():
     devices = get_devices()
     if len(devices) == 0:
-        return generate_output("WARNING: unexpectedly checked no devices")
+        return generate_output('WARNING: unexpectedly checked no devices')
 
-    try:
-        devices_stats = get_devices_stats(devices)
-    except subprocess.CalledProcessError as error:
-        return generate_output("WARNING: error executing mdadm: {}".format(error))
+    devices_stats = get_devices_stats(devices)
+    if isinstance(devices_stats, bool):
+        # if the device_stats is boolean, generate_output was already called
+        return devices_stats
 
     msg = []
     critical = False
+    warning = False
     for device in devices_stats:
+        parts = []
         # Is device degraded?
-        if devices_stats[device]["degraded"]:
+        if devices_stats[device]['degraded'] and devices_stats[device]['recovering']:
+            warning = True
+            parts = ['{} recovering'.format(device)]
+        elif devices_stats[device]['degraded']:
             critical = True
-            parts = ["{} degraded".format(device)]
+            parts = ['{} degraded'.format(device)]
         else:
-            parts = ["{} ok".format(device)]
+            parts = ['{} ok'.format(device)]
 
         # If Failed drives are found, list counters (how many?)
-        failed_cnt = devices_stats[device]["stats"].get("Failed", 0)
+        failed_cnt = devices_stats[device]['stats'].get('Failed', 0)
         if failed_cnt > 0:
             critical = True
             dev_stats = [
-                "{}[{}]".format(status, devices_stats[device]["stats"][status])
-                for status in sorted(devices_stats[device]["stats"])
+                '{}[{}]'.format(status, devices_stats[device]['stats'][status])
+                for status in sorted(devices_stats[device]['stats'])
             ]
             parts.extend(dev_stats)
-        msg.append(", ".join(parts))
+
+        if len(devices_stats[device]['removed']) != 0:
+            critical = True
+            members = " and ".join(devices_stats[device]['removed'])
+            parts.append('RaidDevice(s) {} marked removed'.format(members))
+
+        if len(devices_stats[device]['rebuilding']) != 0:
+            rebuilding_members = " ".join(devices_stats[device]['rebuilding'])
+            rebuild_status = devices_stats[device]['rebuild_status']
+            parts.append('{} rebuilding ({})'.format(rebuilding_members, rebuild_status))
+
+        msg.append(', '.join(parts))
 
     if critical:
-        msg = "CRITICAL: {}".format("; ".join(msg))
+        msg = 'CRITICAL: {}'.format('; '.join(msg))
+    elif warning:
+        msg = 'WARNING: {}'.format('; '.join(msg))
     else:
-        msg = "OK: {}".format("; ".join(msg))
+        msg = 'OK: {}'.format('; '.join(msg))
     return generate_output(msg)
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     parse_output()
diff --git a/src/lib/hwhealth/tools.py b/src/lib/hwhealth/tools.py
index 0bf0121..970ac7d 100644
--- a/src/lib/hwhealth/tools.py
+++ b/src/lib/hwhealth/tools.py
@@ -303,7 +303,10 @@ class Sas3Ircu(VendorTool):
     """
     def __init__(self):
         super().__init__(shortname='sas3ircu')
-        self.checksums = ['f150eb37bb332668949a3eccf9636e0e03f874aecd17a39d586082c6be1386bd']
+        self.checksums = [
+            'f150eb37bb332668949a3eccf9636e0e03f874aecd17a39d586082c6be1386bd',
+            'd69967057992134df1b136f83bc775a641e32c4efc741def3ef6f6a25a9a14b5',
+        ]
 
 
 class Sas2Ircu(VendorTool):
@@ -323,7 +326,11 @@ class MegaCLI(VendorTool):
     """
     def __init__(self):
         super().__init__(shortname='megacli')
-        self.checksums = ['34f1a235543662615ee35f458317380b3f89fac0e415dee755e0dbc7c4cf6f92']
+        self.checksums = [
+            '34f1a235543662615ee35f458317380b3f89fac0e415dee755e0dbc7c4cf6f92',
+            '1c4effe33ee5db82227e05925dd629771fd49c7d2be2382d48c48a864452cdec',
+            '1a68e6646d1e3dfb7039f581be994500d0ed02de2f928e57399e86473d4c8662',
+        ]
 
 
 class Mdadm(VendorTool):
diff --git a/src/metadata.yaml b/src/metadata.yaml
index db91438..58c709a 100644
--- a/src/metadata.yaml
+++ b/src/metadata.yaml
@@ -1,6 +1,6 @@
 name: hw-health
 summary: Hardware Monitoring for Nagios
-maintainer: Nagios Charm Developers <nagios-charmers@xxxxxxxxxxxxxxxxxxx>
+maintainer: Llama (LMA) Charmers <llama-charmers@xxxxxxxxxxxxxxxx>
 description: |
   This addon installs hardware monitoring tools and configures Nagios checks
   for the system hardware and storage monitoring.
@@ -23,6 +23,7 @@ tags:
 - monitoring
 - hardware
 series:
+- focal
 - bionic
 - xenial
 subordinate: true
diff --git a/src/tests/download_nagios_plugin3.py b/src/tests/download_nagios_plugin3.py
index 173fa31..b454caf 100755
--- a/src/tests/download_nagios_plugin3.py
+++ b/src/tests/download_nagios_plugin3.py
@@ -18,7 +18,7 @@ def content():
 
 
 def main():
-    for i in glob('.tox/unit/lib/python3*'):
+    for i in glob('.tox/unit/lib/python3*/site-packages'):
         mod_path = os.path.join(i, MODULE_NAME)
         if os.path.isdir(i) and not os.path.exists(mod_path):
             open(mod_path, 'wb').write(content())
diff --git a/src/tests/functional/test_hwhealth.py b/src/tests/functional/test_hwhealth.py
index f16878f..51d407a 100644
--- a/src/tests/functional/test_hwhealth.py
+++ b/src/tests/functional/test_hwhealth.py
@@ -12,7 +12,11 @@ from hwhealth import tools        # noqa: E402
 
 # Treat all tests as coroutines
 pytestmark = pytest.mark.asyncio
-SERIES = ['xenial', 'bionic']
+SERIES = [
+    'focal',
+    'bionic',
+    'xenial',
+]
 JUJU_REPOSITORY = os.getenv('JUJU_REPOSITORY', '.')
 NRPECFG_DIR = '/etc/nagios/nrpe.d'
 
@@ -65,10 +69,17 @@ async def deploy_app(request, model):
     hw_health_checksum_app_name = 'hw-health-checksum-{}'.format(release)
 
     for principal_app in ['ubuntu', 'nagios']:
+        relname = series = release
+        if principal_app == "nagios" and release == "focal":
+            # NOTE(aluria): cs:nagios was not available in focal
+            # On the other hand, bionic testing would create nagios-bionic
+            # hence nagios-bionic2
+            relname = "bionic2"
+            series = "bionic"
         await model.deploy(
             principal_app,
-            application_name='{}-{}'.format(principal_app, release),
-            series=release,
+            application_name='{}-{}'.format(principal_app, relname),
+            series=series,
             channel=channel,
         )
     await model.deploy('ubuntu', application_name='ubuntu-checksum-{}'.format(release),
@@ -87,7 +98,7 @@ async def deploy_app(request, model):
         )
     await nrpe_app.add_relation(
         'monitors',
-        'nagios-{}:monitors'.format(release)
+        'nagios-{}:monitors'.format(relname)
     )
 
     # Attaching resources is not implemented yet in libjuju
diff --git a/src/tests/hw-health-samples/mdadm.output.critical b/src/tests/hw-health-samples/mdadm.output.critical.1
similarity index 100%
rename from src/tests/hw-health-samples/mdadm.output.critical
rename to src/tests/hw-health-samples/mdadm.output.critical.1
diff --git a/src/tests/hw-health-samples/mdadm.output.critical.2 b/src/tests/hw-health-samples/mdadm.output.critical.2
new file mode 100644
index 0000000..2220eae
--- /dev/null
+++ b/src/tests/hw-health-samples/mdadm.output.critical.2
@@ -0,0 +1,33 @@
+/dev/md1:
+           Version : 1.2
+     Creation Time : Wed Jun 26 18:18:55 2019
+        Raid Level : raid10
+        Array Size : 7812235264 (7450.33 GiB 7999.73 GB)
+     Used Dev Size : 3906117632 (3725.16 GiB 3999.86 GB)
+      Raid Devices : 4
+     Total Devices : 3
+       Persistence : Superblock is persistent
+
+     Intent Bitmap : Internal
+
+       Update Time : Mon Oct 28 17:27:54 2019
+             State : active, degraded 
+    Active Devices : 3
+   Working Devices : 3
+    Failed Devices : 0
+     Spare Devices : 0
+
+            Layout : near=2
+        Chunk Size : 512K
+
+Consistency Policy : bitmap
+
+              Name : CMFOSCHSTUP7411:1  (local to host CMFOSCHSTUP7411)
+              UUID : 3887c84d:ebd28589:5a4b5d9e:3c83a25d
+            Events : 491275
+
+    Number   Major   Minor   RaidDevice State
+       0       8       98        0      active sync set-A   /dev/sdg2
+       1       8      114        1      active sync set-B   /dev/sdh2
+       -       0        0        2      removed
+       3       8      162        3      active sync set-B   /dev/sdk2
diff --git a/src/tests/hw-health-samples/mdadm.output.warning b/src/tests/hw-health-samples/mdadm.output.warning
new file mode 100644
index 0000000..bba0d18
--- /dev/null
+++ b/src/tests/hw-health-samples/mdadm.output.warning
@@ -0,0 +1,33 @@
+/dev/md1:
+        Version : 1.2
+  Creation Time : Thu Oct 18 17:41:01 2018
+     Raid Level : raid10
+     Array Size : 7812235264 (7450.33 GiB 7999.73 GB)
+  Used Dev Size : 3906117632 (3725.16 GiB 3999.86 GB)
+   Raid Devices : 4
+  Total Devices : 4
+    Persistence : Superblock is persistent
+
+  Intent Bitmap : Internal
+
+    Update Time : Mon Oct 28 18:46:34 2019
+          State : active, degraded, recovering
+ Active Devices : 3
+Working Devices : 4
+ Failed Devices : 0
+  Spare Devices : 1
+
+         Layout : near=2
+     Chunk Size : 512K
+
+ Rebuild Status : 13% complete
+
+           Name : CMOOSCHSTUP7305:1 (local to host CMOOSCHSTUP7305)
+           UUID : fed3a645:1f742fd3:1685dda5:71794407
+         Events : 750593
+
+    Number Major Minor RaidDevice State
+       0 8 98 0 active sync set-A /dev/sdg2
+       1 8 114 1 active sync set-B /dev/sdh2
+       2 8 146 2 active sync set-A /dev/sdj2
+       4 8 162 3 spare rebuilding /dev/sdk2
diff --git a/src/tests/unit/test_check_mdadm.py b/src/tests/unit/test_check_mdadm.py
index 231ca4f..0314032 100644
--- a/src/tests/unit/test_check_mdadm.py
+++ b/src/tests/unit/test_check_mdadm.py
@@ -6,24 +6,23 @@ import unittest.mock as mock
 
 import nagios_plugin3
 
-sys.path.append('files/mdadm')
+sys.path.append(os.path.join(os.path.dirname(__file__), 'lib'))
+from samples import get_sample  # noqa: E402
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../files/mdadm'))
 import check_mdadm  # noqa: E402
 
 
 class TestCheckMdadm(unittest.TestCase):
     def test_parse_output_crit(self):
-        check_mdadm.INPUT_FILE = os.path.join(
-            os.getcwd(), 'tests', 'hw-health-samples',
-            'mdadm.output.nrpe.critical')
+        check_mdadm.INPUT_FILE = get_sample('mdadm.output.nrpe.critical')
         expected = 'CRITICAL: critical msg'
         with self.assertRaises(nagios_plugin3.CriticalError) as context:
             check_mdadm.parse_output()
         self.assertTrue(expected in str(context.exception))
 
     def test_parse_output_warn(self):
-        check_mdadm.INPUT_FILE = os.path.join(
-            os.getcwd(), 'tests', 'hw-health-samples',
-            'mdadm.output.nrpe.warning')
+        check_mdadm.INPUT_FILE = get_sample('mdadm.output.nrpe.warning')
         expected = 'WARNING: warning msg'
         with self.assertRaises(nagios_plugin3.WarnError) as context:
             check_mdadm.parse_output()
@@ -31,8 +30,7 @@ class TestCheckMdadm(unittest.TestCase):
 
     @mock.patch('sys.stdout', new_callable=io.StringIO)
     def test_parse_output_ok(self, mock_print):
-        check_mdadm.INPUT_FILE = os.path.join(
-            os.getcwd(), 'tests', 'hw-health-samples', 'mdadm.output.nrpe.ok')
+        check_mdadm.INPUT_FILE = get_sample('mdadm.output.nrpe.ok')
         check_mdadm.parse_output()
         self.assertEqual(
             mock_print.getvalue(),
@@ -41,9 +39,7 @@ class TestCheckMdadm(unittest.TestCase):
 
     @mock.patch('sys.stdout', new_callable=io.StringIO)
     def test_parse_output_unknown_filenotfound(self, mock_print):
-        check_mdadm.INPUT_FILE = os.path.join(
-            os.getcwd(), 'tests', 'hw-health-samples',
-            'thisfiledoesnotexist')
+        check_mdadm.INPUT_FILE = get_sample('thisfiledoesnotexist')
         expected = 'UNKNOWN: file not found ({})'.format(
             check_mdadm.INPUT_FILE)
         with self.assertRaises(nagios_plugin3.UnknownError) as context:
@@ -52,9 +48,7 @@ class TestCheckMdadm(unittest.TestCase):
 
     @mock.patch('sys.stdout', new_callable=io.StringIO)
     def test_parse_output_unknown1(self, mock_print):
-        check_mdadm.INPUT_FILE = os.path.join(
-            os.getcwd(), 'tests', 'hw-health-samples',
-            'mdadm.output.nrpe.unknown.1')
+        check_mdadm.INPUT_FILE = get_sample('mdadm.output.nrpe.unknown.1')
         check_mdadm.parse_output()
         self.assertEqual(
             mock_print.getvalue(),
@@ -63,9 +57,7 @@ class TestCheckMdadm(unittest.TestCase):
 
     @mock.patch('sys.stdout', new_callable=io.StringIO)
     def test_parse_output_unknown2(self, mock_print):
-        check_mdadm.INPUT_FILE = os.path.join(
-            os.getcwd(), 'tests', 'hw-health-samples',
-            'mdadm.output.nrpe.unknown.2')
+        check_mdadm.INPUT_FILE = get_sample('mdadm.output.nrpe.unknown.2')
         check_mdadm.parse_output()
         self.assertEqual(
             mock_print.getvalue(),
diff --git a/src/tests/unit/test_check_megacli.py b/src/tests/unit/test_check_megacli.py
index 988075a..4207953 100644
--- a/src/tests/unit/test_check_megacli.py
+++ b/src/tests/unit/test_check_megacli.py
@@ -6,15 +6,17 @@ import unittest.mock as mock
 
 import nagios_plugin3
 
-sys.path.append('files/megacli')
+sys.path.append(os.path.join(os.path.dirname(__file__), 'lib'))
+from samples import get_sample  # noqa: E402
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../files/megacli'))
 import check_megacli  # noqa: E402
 
 
 class TestCheckMegaCLI(unittest.TestCase):
     @mock.patch('sys.stdout', new_callable=io.StringIO)
     def test_parse_output(self, mock_print):
-        check_megacli.INPUT_FILE = os.path.join(
-            os.getcwd(), 'tests', 'hw-health-samples', 'megacli.output.1')
+        check_megacli.INPUT_FILE = get_sample('megacli.output.1')
         check_megacli.parse_output()
         actual = mock_print.getvalue()
         expected = 'OK: Optimal, ldrives[1], pdrives[4]\n'
@@ -22,8 +24,7 @@ class TestCheckMegaCLI(unittest.TestCase):
 
     @mock.patch('sys.stdout', new_callable=io.StringIO)
     def test_parse_output_critical_singledrive(self, mock_print):
-        check_megacli.INPUT_FILE = os.path.join(
-            os.getcwd(), 'tests', 'hw-health-samples', 'megacli.output.nrpe.critical.1')
+        check_megacli.INPUT_FILE = get_sample('megacli.output.nrpe.critical.1')
         expected = 'CRITICAL: adapter(0):ld(0):state(Degraded)'
         with self.assertRaises(nagios_plugin3.CriticalError) as context:
             check_megacli.parse_output()
@@ -31,8 +32,7 @@ class TestCheckMegaCLI(unittest.TestCase):
 
     @mock.patch('sys.stdout', new_callable=io.StringIO)
     def test_parse_output_critical_multiple(self, mock_print):
-        check_megacli.INPUT_FILE = os.path.join(
-            os.getcwd(), 'tests', 'hw-health-samples', 'megacli.output.nrpe.critical.2')
+        check_megacli.INPUT_FILE = get_sample('megacli.output.nrpe.critical.2')
         expected = ('CRITICAL: adapter(0):ld(0):state(Degraded);'
                     ' adapter(0):ld(4):state(Degraded)')
         with self.assertRaises(nagios_plugin3.CriticalError) as context:
diff --git a/src/tests/unit/test_check_nvme.py b/src/tests/unit/test_check_nvme.py
index 53466e4..097fd76 100644
--- a/src/tests/unit/test_check_nvme.py
+++ b/src/tests/unit/test_check_nvme.py
@@ -4,7 +4,10 @@ import sys
 import unittest
 import unittest.mock as mock
 
-sys.path.append('files/nvme')
+sys.path.append(os.path.join(os.path.dirname(__file__), 'lib'))
+from samples import get_sample  # noqa: E402
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../files/nvme'))
 import check_nvme  # noqa: E402
 
 
@@ -14,8 +17,7 @@ class TestCheckNvme(unittest.TestCase):
     @mock.patch('sys.stdout', new_callable=io.StringIO)
     def test_parse_output(self, mock_print, mock_subprocess, mock_glob):
         mock_glob.return_value = ['/dev/nvme0']
-        input_file = os.path.join(os.getcwd(), 'tests', 'hw-health-samples',
-                                  'nvme.output.1')
+        input_file = get_sample('nvme.output.1')
         with open(input_file, 'r') as fd:
             mock_subprocess.return_value = fd.read().encode()
         check_nvme.parse_output()
diff --git a/src/tests/unit/test_check_sas2ircu.py b/src/tests/unit/test_check_sas2ircu.py
index 1d0dc80..5464889 100644
--- a/src/tests/unit/test_check_sas2ircu.py
+++ b/src/tests/unit/test_check_sas2ircu.py
@@ -4,16 +4,17 @@ import sys
 import unittest
 import unittest.mock as mock
 
-sys.path.append('files/sas2ircu')
+sys.path.append(os.path.join(os.path.dirname(__file__), 'lib'))
+from samples import get_sample  # noqa: E402
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../files/sas2ircu'))
 import check_sas2ircu  # noqa: E402
 
 
 class TestCheckMegaCLI(unittest.TestCase):
     @mock.patch('sys.stdout', new_callable=io.StringIO)
     def test_parse_output(self, mock_print):
-        check_sas2ircu.INPUT_FILE = os.path.join(os.getcwd(), 'tests',
-                                                 'hw-health-samples',
-                                                 'sas2ircu.huawei.output.1')
+        check_sas2ircu.INPUT_FILE = get_sample('sas2ircu.huawei.output.1')
         check_sas2ircu.parse_output()
         actual = mock_print.getvalue()
         expected = 'OK: Ready[1:0,1:1,1:2,1:3,1:4,1:5,1:6,1:7]\n'
diff --git a/src/tests/unit/test_check_sas3ircu.py b/src/tests/unit/test_check_sas3ircu.py
index 5747de9..1379369 100644
--- a/src/tests/unit/test_check_sas3ircu.py
+++ b/src/tests/unit/test_check_sas3ircu.py
@@ -4,15 +4,17 @@ import sys
 import unittest
 import unittest.mock as mock
 
-sys.path.append('files/sas3ircu')
+sys.path.append(os.path.join(os.path.dirname(__file__), 'lib'))
+from samples import get_sample  # noqa: E402
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../files/sas3ircu'))
 import check_sas3ircu  # noqa: E402
 
 
 class TestCheckMegaCLI(unittest.TestCase):
     @mock.patch('sys.stdout', new_callable=io.StringIO)
     def test_parse_output_ok(self, mock_print):
-        _filepath = os.path.join(os.getcwd(), 'tests', 'hw-health-samples',
-                                 'sas3ircu.supermicro.ok.output.1')
+        _filepath = get_sample('sas3ircu.supermicro.ok.output.1')
         data = check_sas3ircu.parse_output(_filepath)
         check_sas3ircu.eval_status(data)
         actual = mock_print.getvalue()
diff --git a/src/tests/unit/test_cron_mdadm.py b/src/tests/unit/test_cron_mdadm.py
index d124931..f52ea9c 100644
--- a/src/tests/unit/test_cron_mdadm.py
+++ b/src/tests/unit/test_cron_mdadm.py
@@ -5,7 +5,10 @@ import sys  # noqa: F401
 import unittest
 import unittest.mock as mock
 
-sys.path.append('files/mdadm')
+sys.path.append(os.path.join(os.path.dirname(__file__), 'lib'))
+from samples import get_sample  # noqa: E402
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../files/mdadm'))
 import cron_mdadm  # noqa: E402
 
 
@@ -44,13 +47,11 @@ class TestCronMdadm(unittest.TestCase):
     def test_parse_output_ok(self, mock_print, mdadm_details, devices, genout):
         class Test_Popen(object):
             def __init__(cls):
-                test_output = os.path.join(
-                    os.getcwd(), 'tests', 'hw-health-samples', 'mdadm.output')
+                test_output = get_sample('mdadm.output')
                 cls.stdout = io.FileIO(test_output)
                 cls.wait = lambda: 0
 
-        test_output = os.path.join(
-            os.getcwd(), 'tests', 'hw-health-samples', 'mdadm.output')
+        test_output = get_sample('mdadm.output')
         with open(test_output, 'r') as fd:
             mdadm_details.return_value = ''.join(fd.readlines()).encode()
         devices.return_value = set(['/dev/md0', '/dev/md1', '/dev/md2'])
@@ -87,9 +88,7 @@ class TestCronMdadm(unittest.TestCase):
     @mock.patch('cron_mdadm.get_devices')
     @mock.patch('subprocess.check_output')
     def test_parse_output_degraded(self, mdadm_details, devices, genout):
-        test_output = os.path.join(
-            os.getcwd(), 'tests', 'hw-health-samples',
-            'mdadm.output.critical')
+        test_output = get_sample('mdadm.output.critical.1')
         with open(test_output, 'r') as fd:
             mdadm_details.return_value = ''.join(fd.readlines()).encode()
 
@@ -100,3 +99,31 @@ class TestCronMdadm(unittest.TestCase):
                     ' /dev/md2 ok')
         self.assertTrue(cron_mdadm.parse_output())
         genout.assert_called_once_with(expected)
+
+    @mock.patch('cron_mdadm.generate_output')
+    @mock.patch('cron_mdadm.get_devices')
+    @mock.patch('subprocess.check_output')
+    def test_parse_output_removed(self, mdadm_details, devices, genout):
+        test_output = get_sample('mdadm.output.critical.2')
+        with open(test_output, 'r') as fd:
+            mdadm_details.return_value = ''.join(fd.readlines()).encode()
+
+        devices.return_value = set(['/dev/md1'])
+        genout.return_value = True
+        expected = ('CRITICAL: /dev/md1 degraded, RaidDevice(s) 2 marked removed')
+        self.assertTrue(cron_mdadm.parse_output())
+        genout.assert_called_once_with(expected)
+
+    @mock.patch('cron_mdadm.generate_output')
+    @mock.patch('cron_mdadm.get_devices')
+    @mock.patch('subprocess.check_output')
+    def test_parse_output_rebuilding(self, mdadm_details, devices, genout):
+        test_output = get_sample('mdadm.output.warning')
+        with open(test_output, 'r') as fd:
+            mdadm_details.return_value = ''.join(fd.readlines()).encode()
+
+        devices.return_value = set(['/dev/md0', '/dev/md1', '/dev/md2'])
+        genout.return_value = True
+        expected = ('WARNING: /dev/md1 recovering, /dev/sdk2 rebuilding (13% complete)')
+        self.assertTrue(cron_mdadm.parse_output())
+        genout.assert_called_once_with(expected)
diff --git a/src/tests/unit/test_hwdiscovery.py b/src/tests/unit/test_hwdiscovery.py
index 6e5bf3d..43ea360 100644
--- a/src/tests/unit/test_hwdiscovery.py
+++ b/src/tests/unit/test_hwdiscovery.py
@@ -1,11 +1,13 @@
-import glob
 import os  # noqa: F401
 import subprocess  # noqa: F401
 import sys
 import unittest
 import unittest.mock as mock
 
-sys.path.append('lib/hwhealth')
+sys.path.append(os.path.join(os.path.dirname(__file__), 'lib'))
+from samples import get_sample  # noqa: E402
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../lib/hwhealth'))
 import hwdiscovery  # noqa: E402
 from discovery.lshw import Hardware  # noqa: E402
 
@@ -83,8 +85,7 @@ class TestGetTools(unittest.TestCase):
             'lshw.supermicro.sas.02.json': set(['Nvme', 'Sas3Ircu']),
         }
 
-        for filename in glob.glob(os.path.join(
-              os.getcwd(), 'tests/hw-health-samples/lshw.*.json')):
+        for filename in get_sample('lshw.*.json'):
             mock_hwinfo.return_value = Hardware(filename)
             actual = hwdiscovery._get_tools()
             if os.path.basename(filename) in TOOLS:
diff --git a/src/tox.ini b/src/tox.ini
index 71aeb29..29b332a 100644
--- a/src/tox.ini
+++ b/src/tox.ini
@@ -4,6 +4,7 @@ skipsdist = true
 
 [testenv:unit]
 basepython=python3
+setenv = PYTHONPATH={toxinidir}/lib
 deps=
   charms.reactive
   nose