livepatch-charmers team mailing list archive
-
livepatch-charmers team
-
Mailing list archive
-
Message #00210
[Merge] ~barryprice/canonical-livepatch-charm/+git/canonical-livepatch-charm:master into canonical-livepatch-charm:master
Barry Price has proposed merging ~barryprice/canonical-livepatch-charm/+git/canonical-livepatch-charm:master into canonical-livepatch-charm:master.
Commit message:
Clearer check output, and warn if the client output format changes (i.e. if checkState or patchState go away).
Requested reviews:
Livepatch charm developers (livepatch-charmers)
Related bugs:
Bug #1794327 in Canonical Livepatch Charm: "Nagios check can miss output changes"
https://bugs.launchpad.net/canonical-livepatch-charm/+bug/1794327
For more details, see:
https://code.launchpad.net/~barryprice/canonical-livepatch-charm/+git/canonical-livepatch-charm/+merge/355822
--
Your team Livepatch charm developers is requested to review the proposed merge of ~barryprice/canonical-livepatch-charm/+git/canonical-livepatch-charm:master into canonical-livepatch-charm:master.
diff --git a/files/check_canonical-livepatch.py b/files/check_canonical-livepatch.py
index 7878169..f9a0c24 100755
--- a/files/check_canonical-livepatch.py
+++ b/files/check_canonical-livepatch.py
@@ -5,72 +5,122 @@
import os
import nagios_plugin
from subprocess import check_output, call
+from yaml import safe_load
supported_archs = ['x86_64']
##############################################################################
-def check_package_installed():
+def check_snap_installed():
+ """Confirm the snap is installed, raise an error if not"""
cmd = ['snap', 'list', 'canonical-livepatch']
try:
check_output(cmd, universal_newlines=True)
except Exception:
- raise nagios_plugin.CriticalError("canonical-livepatch snap is not installed")
+ raise nagios_plugin.CriticalError('canonical-livepatch snap is not installed')
##############################################################################
-def check_vmlinuz():
- vmlinuz = '/vmlinuz'
- if os.path.exists(vmlinuz):
- full_kernel_path = os.path.realpath(vmlinuz)
- elif os.path.exists('/boot' + vmlinuz):
- vmlinuz = '/boot' + vmlinuz
- full_kernel_path = os.path.realpath(vmlinuz)
- else:
- return 'no /vmlinuz or /boot/vmlinuz'
- kernel_filename = os.path.basename(full_kernel_path)
- # remove 'vmlinuz'-' from start:
- kernel_version = '-'.join(kernel_filename.split('-', 1)[1:])
- # check for '-generic-pae' kernels that need two removes:
- if '-generic-pae' in kernel_version:
- kernel_version = '-'.join(kernel_version.split('-')[:-1])
- # remove e.g. '-generic' from end:
- kernel_version = '-'.join(kernel_version.split('-')[:-1])
- return kernel_version.strip()
+def parse_status():
+ """Load the cached status from disk, return it as a string"""
+ livepatch_output_path = '/var/lib/nagios/canonical-livepatch-status.txt'
+
+ with open(livepatch_output_path, 'r') as canonical_livepatch_log:
+ livepatch_status = canonical_livepatch_log.read()
+
+ return livepatch_status
+
+
+##############################################################################
+
+def check_enabled():
+ """Confirm machine is enabled, raise an error if not"""
+ livepatch_status = parse_status()
+ if 'Machine is not enabled' in livepatch_status:
+ raise nagios_plugin.CriticalError('Machine is not enabled.')
+
+
+##############################################################################
+
+def active_kernel_version():
+ """Return the active kernel version, from livepatch's perspective"""
+ livepatch_status = parse_status()
+ status_yaml = safe_load(livepatch_status)
+ for kernel in status_yaml.get('status'):
+ if kernel.get('running') is True:
+ return kernel.get('kernel')
##############################################################################
def check_status():
- livepatch_output_path = '/var/lib/nagios/canonical-livepatch-status.txt'
- err_lines = []
- wrn_lines = []
+ """Check the cached status, raise an error if we find any issues"""
+ livepatch_status = parse_status()
+ err = ''
- with open(livepatch_output_path, 'r') as canonical_livepatch_log:
- for line in canonical_livepatch_log:
- line = line.strip()
- if 'State:' in line:
- if 'apply-failed' in line:
- err_lines.append('Livepatch failed to apply patches.')
- elif 'check-failed' in line:
- err_lines.append('Livepatch failed to check the remote service for patches.')
- elif 'unknown' in line:
- err_lines.append('Livepatch reports an unknown error.')
- elif 'kernel-upgrade-required' in line:
- err_lines.append('A kernel upgrade (and reboot) is required.')
- elif 'Machine is not enabled' in line:
- err_lines.append('Machine is not enabled.')
-
- if err_lines:
- err = " ".join(err_lines)
+ status_yaml = safe_load(livepatch_status)
+
+ for kernel in status_yaml.get('status'):
+ if kernel.get('running') is True:
+ check_state = kernel.get('livepatch').get('checkState')
+ patch_state = kernel.get('livepatch').get('patchState')
+
+ check_state_errors = check_check_state(check_state)
+ patch_state_errors = check_patch_state(patch_state)
+
+ if check_state_errors:
+ err = err + ' '.join(check_state_errors)
+
+ if err != '':
+ err = err + ' '
+
+ if patch_state_errors:
+ err = err + ' '.join(patch_state_errors)
+
+ if err != '':
raise nagios_plugin.CriticalError(err)
- elif wrn_lines:
- wrn = " ".join(wrn_lines)
- raise nagios_plugin.WarnError(wrn)
+##############################################################################
+
+def check_check_state(check_state):
+ """Check for issues with checkState, including unexpected output"""
+ error_list = []
+ if check_state in ['checked', 'needs-check']:
+ pass
+ elif check_state == 'check-failed':
+ error_list.append('Livepatch failed to check the remote service for patches.')
+ else:
+ error_list.append('Unknown check state: {}'.format(check_state))
+
+ return error_list
+
+
+##############################################################################
+
+def check_patch_state(patch_state):
+ """Check for issues with patchState, including unexpected output"""
+ error_list = []
+ if patch_state in ['applied', 'nothing-to-apply']:
+ pass
+ elif patch_state == 'unapplied':
+ error_list.append('Livepatch has not applied the downloaded patches.')
+ elif patch_state == 'applied-with-bug':
+ error_list.append('Livepatch has detected a kernel bug while applying patches.')
+ elif patch_state == 'apply-failed':
+ error_list.append('Livepatch failed to apply patches.')
+ elif patch_state == 'kernel-upgrade-required':
+ error_list.append('A kernel upgrade (and reboot) is required.')
+ else:
+ error_list.append('Unknown patch state: {}'.format(patch_state))
+
+ return error_list
+
+
+##############################################################################
+
def lsb_release():
"""Return /etc/lsb-release in a dict"""
d = {}
@@ -81,6 +131,8 @@ def lsb_release():
return d
+##############################################################################
+
def init_is_systemd():
"""Return True if the host system uses systemd, False otherwise."""
if lsb_release()['DISTRIB_CODENAME'] == 'trusty':
@@ -88,6 +140,8 @@ def init_is_systemd():
return os.path.isdir('/run/systemd/system')
+##############################################################################
+
def is_container():
"""Determine whether unit is running in a container
@@ -112,9 +166,11 @@ def main():
elif is_container():
print("canonical-livepatch not needed in OS containers.")
else:
- nagios_plugin.try_check(check_package_installed)
+ nagios_plugin.try_check(check_snap_installed)
+ nagios_plugin.try_check(check_enabled)
nagios_plugin.try_check(check_status)
- print("OK - canonical-livepatch seems to be installed and working")
+ kernel_version = active_kernel_version()
+ print("OK - canonical-livepatch is active on kernel {}".format(kernel_version))
##############################################################################
Follow ups