← Back to team overview

cf-charmers team mailing list archive

[Merge] lp:~johnsca/charms/trusty/cloudfoundry/better-basic-reconciler-status into lp:~cf-charmers/charms/trusty/cloudfoundry/trunk

 

Cory Johns has proposed merging lp:~johnsca/charms/trusty/cloudfoundry/better-basic-reconciler-status into lp:~cf-charmers/charms/trusty/cloudfoundry/trunk.

Requested reviews:
  Cloud Foundry Charmers (cf-charmers)

For more details, see:
https://code.launchpad.net/~johnsca/charms/trusty/cloudfoundry/better-basic-reconciler-status/+merge/242372

Since the monit status is so unreliable, switch the basic health check to use the self-reported Juju status until we have something that more deeply introspects.
-- 
Your team Cloud Foundry Charmers is requested to review the proposed merge of lp:~johnsca/charms/trusty/cloudfoundry/better-basic-reconciler-status into lp:~cf-charmers/charms/trusty/cloudfoundry/trunk.
=== modified file 'cloudfoundry/health_checks.py'
--- cloudfoundry/health_checks.py	2014-08-24 21:36:50 +0000
+++ cloudfoundry/health_checks.py	2014-11-20 16:14:14 +0000
@@ -1,3 +1,4 @@
+from charmhelpers.core import hookenv
 from cloudfoundry import tasks
 
 
@@ -17,3 +18,21 @@
                     message='not all services running',
                     data={'services': summary})
     return result
+
+
+def status(service):
+    result = {
+        'name': 'monit_summary',
+        'health': 'pass',
+        'message': None,
+        'data': {},
+    }
+    status = hookenv.juju_status()
+    if status['status'] == 'error':
+        return dict(result, health='fail', message=status['message'])
+    elif status['status'] == 'blocked' and status['manual']:
+        return dict(result, health='fail', message='Blocked: %s' % status['blockers'])
+    elif status['status'] == 'up':
+        return result
+    else:
+        return dict(result, health='warn', message='Working (%s)' % status['status'])

=== modified file 'cloudfoundry/jobs.py'
--- cloudfoundry/jobs.py	2014-10-03 15:47:17 +0000
+++ cloudfoundry/jobs.py	2014-11-20 16:14:14 +0000
@@ -68,7 +68,7 @@
     service_def = service_data[charm_name]
     results = []
     health = 'pass'
-    checks = service_def.get('health', []) + [health_checks.monit_summary]
+    checks = service_def.get('health', []) + [health_checks.status]
     for health_check in checks:
         result = health_check(service_def)
         if result['health'] == 'fail':

=== modified file 'reconciler/app.py'
--- reconciler/app.py	2014-11-14 17:00:08 +0000
+++ reconciler/app.py	2014-11-20 16:14:14 +0000
@@ -83,13 +83,14 @@
         units = service.get('Units', {}) or {}
         for unit_name, unit in units.iteritems():
             unit_addr = unit.get('PublicAddress')
+            unit_state = unit.get('AgentState')
             if unit_addr:
                 loop = tornado.ioloop.IOLoop.instance()
                 loop.add_callback(check_health, service_name,
-                                  unit_name, unit_addr)
-
-
-def check_health(service_name, unit_name, unit_addr):
+                                  unit_name, unit_addr, unit_state)
+
+
+def check_health(service_name, unit_name, unit_addr, unit_state):
     service = health.setdefault(service_name, {
         'name': service_name,
         'health': 'unknown',
@@ -118,8 +119,13 @@
             unit['health'] = 'fail'
             unit['state'] = {'message': 'Unable to parse health: {}'.format(output)}
     except subprocess.CalledProcessError as e:
-        unit['health'] = 'warn'
         unit['state'] = {'message': 'Unable to retrieve health: {}'.format(e.output)}
+        if unit_state == 'started':
+            unit['health'] = 'pass'
+        elif unit_state == 'error':
+            unit['health'] = 'fail'
+        else:
+            unit['health'] = 'warn'
 
     units_fail = [u['health'] == 'fail' for u in service['units'].values()]
     units_not_pass = [u['health'] != 'pass' for u in service['units'].values()]


Follow ups