← Back to team overview

nagios-charmers team mailing list archive

[Merge] ~xavpaice/hw-health-charm:LP1838562 into hw-health-charm:master

 

Xav Paice has proposed merging ~xavpaice/hw-health-charm:LP1838562 into hw-health-charm:master.

Requested reviews:
  Nagios Charm developers (nagios-charmers)
Related bugs:
  Bug #1838562 in hw-health-charm: "cron_ipmi_sensors.py can get blocked if PID file not removed"
  https://bugs.launchpad.net/hw-health-charm/+bug/1838562

For more details, see:
https://code.launchpad.net/~xavpaice/hw-health-charm/+git/hw-health-charm/+merge/378128
-- 
Your team Nagios Charm developers is requested to review the proposed merge of ~xavpaice/hw-health-charm:LP1838562 into hw-health-charm:master.
diff --git a/src/files/ipmi/cron_ipmi_sensors.py b/src/files/ipmi/cron_ipmi_sensors.py
index a9538af..72d07eb 100644
--- a/src/files/ipmi/cron_ipmi_sensors.py
+++ b/src/files/ipmi/cron_ipmi_sensors.py
@@ -17,30 +17,41 @@ NAGIOS_ERRORS = {
 
 
 def gather_metrics():
-    # a child is already running
+    # Check if a PID file exists
     if os.path.exists(CHECK_IPMI_PID):
-        return
+        # is the PID valid?
+        with open(CHECK_IPMI_PID, 'r') as fd:
+            PID = fd.read()
+        if PID not in os.listdir('/proc'):
+            # PID file is invalid, remove it
+            os.remove(CHECK_IPMI_PID)
+        else:
+            return
 
     try:
         with open(CHECK_IPMI_PID, 'w') as fd:
             fd.write(str(os.getpid()))
-
-        cmdline = [CMD]
-        if len(sys.argv) > 1:
-            cmdline.extend(sys.argv[1:])
-
+    except IOError as e:
+        # unable to write PID file, can't lock
+        print("Cannot write lock file, error {}".format(e))
+        sys.exit(1)
+
+    cmdline = [CMD]
+    if len(sys.argv) > 1:
+        cmdline.extend(sys.argv[1:])
+    try:
         output = subprocess.check_output(cmdline)
-        with open(TMP_OUTPUT_FILE, 'w') as fd:
-            fd.write(output.decode(errors='ignore'))
-        os.rename(TMP_OUTPUT_FILE, OUTPUT_FILE)
     except subprocess.CalledProcessError as error:
         output = error.stdout.decode(errors='ignore')
         with open(TMP_OUTPUT_FILE, 'w') as fd:
             fd.write('{}: {}'.format(NAGIOS_ERRORS[error.returncode], output))
-        os.rename(TMP_OUTPUT_FILE, OUTPUT_FILE)
-    except PermissionError as error:
-        with (OUTPUT_FILE, 'w') as fd:
-            fd.write('UNKNOWN: {}'.format(error))
+    try:
+        with open(TMP_OUTPUT_FILE, 'w') as fd:
+            fd.write(output)
+    except IOError as e:
+        print("Cannot write output file, error {}".format(e))
+        sys.exit(1)
+    os.rename(TMP_OUTPUT_FILE, OUTPUT_FILE)
 
     # remove pid reference
     os.remove(CHECK_IPMI_PID)

Follow ups