nagios-charmers team mailing list archive
-
nagios-charmers team
-
Mailing list archive
-
Message #00775
[Merge] ~xavpaice/hw-health-charm:LP1838562 into hw-health-charm:master
Xav Paice has proposed merging ~xavpaice/hw-health-charm:LP1838562 into hw-health-charm:master.
Requested reviews:
Nagios Charm developers (nagios-charmers)
Related bugs:
Bug #1838562 in hw-health-charm: "cron_ipmi_sensors.py can get blocked if PID file not removed"
https://bugs.launchpad.net/hw-health-charm/+bug/1838562
For more details, see:
https://code.launchpad.net/~xavpaice/hw-health-charm/+git/hw-health-charm/+merge/378128
--
Your team Nagios Charm developers is requested to review the proposed merge of ~xavpaice/hw-health-charm:LP1838562 into hw-health-charm:master.
diff --git a/src/files/ipmi/cron_ipmi_sensors.py b/src/files/ipmi/cron_ipmi_sensors.py
index a9538af..72d07eb 100644
--- a/src/files/ipmi/cron_ipmi_sensors.py
+++ b/src/files/ipmi/cron_ipmi_sensors.py
@@ -17,30 +17,41 @@ NAGIOS_ERRORS = {
def gather_metrics():
- # a child is already running
+ # Check if a PID file exists
if os.path.exists(CHECK_IPMI_PID):
- return
+ # is the PID valid?
+ with open(CHECK_IPMI_PID, 'r') as fd:
+ PID = fd.read()
+ if PID not in os.listdir('/proc'):
+ # PID file is invalid, remove it
+ os.remove(CHECK_IPMI_PID)
+ else:
+ return
try:
with open(CHECK_IPMI_PID, 'w') as fd:
fd.write(str(os.getpid()))
-
- cmdline = [CMD]
- if len(sys.argv) > 1:
- cmdline.extend(sys.argv[1:])
-
+ except IOError as e:
+ # unable to write PID file, can't lock
+ print("Cannot write lock file, error {}".format(e))
+ sys.exit(1)
+
+ cmdline = [CMD]
+ if len(sys.argv) > 1:
+ cmdline.extend(sys.argv[1:])
+ try:
output = subprocess.check_output(cmdline)
- with open(TMP_OUTPUT_FILE, 'w') as fd:
- fd.write(output.decode(errors='ignore'))
- os.rename(TMP_OUTPUT_FILE, OUTPUT_FILE)
except subprocess.CalledProcessError as error:
output = error.stdout.decode(errors='ignore')
with open(TMP_OUTPUT_FILE, 'w') as fd:
fd.write('{}: {}'.format(NAGIOS_ERRORS[error.returncode], output))
- os.rename(TMP_OUTPUT_FILE, OUTPUT_FILE)
- except PermissionError as error:
- with (OUTPUT_FILE, 'w') as fd:
- fd.write('UNKNOWN: {}'.format(error))
+ try:
+ with open(TMP_OUTPUT_FILE, 'w') as fd:
+ fd.write(output)
+ except IOError as e:
+ print("Cannot write output file, error {}".format(e))
+ sys.exit(1)
+ os.rename(TMP_OUTPUT_FILE, OUTPUT_FILE)
# remove pid reference
os.remove(CHECK_IPMI_PID)
Follow ups