← Back to team overview

cf-charmers team mailing list archive

[Merge] lp:~whitmo/charms/trusty/cloudfoundry/cf-disco-check into lp:~cf-charmers/charms/trusty/cloudfoundry/trunk

 

Whit Morriss has proposed merging lp:~whitmo/charms/trusty/cloudfoundry/cf-disco-check into lp:~cf-charmers/charms/trusty/cloudfoundry/trunk.

Requested reviews:
  Cory Johns (johnsca): feedback

For more details, see:
https://code.launchpad.net/~whitmo/charms/trusty/cloudfoundry/cf-disco-check/+merge/244644

Basic implementation of health check using healthz & varz endpoint.  Only applied to DEA node currently.
-- 
Your team Cloud Foundry Charmers is subscribed to branch lp:~cf-charmers/charms/trusty/cloudfoundry/trunk.
=== modified file '.bzrignore'
--- .bzrignore	2014-11-13 16:22:27 +0000
+++ .bzrignore	2014-12-12 19:15:16 +0000
@@ -47,3 +47,7 @@
 # Generated test charms
 cloudfoundry-r*
 output-v*.yaml
+
+# Generated when running CATs
+src/
+pkg/

=== modified file 'cfdeploy'
--- cfdeploy	2014-12-03 23:14:23 +0000
+++ cfdeploy	2014-12-12 19:15:16 +0000
@@ -35,7 +35,7 @@
         try:
             subprocess.check_output(
                 ['sudo', 'apt-get', 'install', '-y', '-q', 'python-virtualenv'],
-                stderr=subprocess.STDOUT)
+                stderr=subprocess.fSTDOUT)
         except subprocess.CalledProcessError as e:
             print e.output
             raise
@@ -77,6 +77,7 @@
     parser.add_argument('-l', '--log', action='store_true',
                         help='Write debug log to cfdeploy.log')
     parser.add_argument('-c', '--constraints')
+    parser.add_argument('--to')
     parser.add_argument('-g', '--generate', action='store_true')
     parser.add_argument('admin_password')
 
@@ -152,6 +153,7 @@
     bar.next(message='Deploying Orchestrator')
     wait_for(30, 5, partial(deploy,
              constraints=options.constraints,
+             to=options.to,
              generate_dependents=options.generate,
              admin_password=options.admin_password))
     until(lambda: socket_open(reconciler_endpoint(), 8888),

=== modified file 'charmgen/generator.py'
--- charmgen/generator.py	2014-11-07 21:50:48 +0000
+++ charmgen/generator.py	2014-12-12 19:15:16 +0000
@@ -227,6 +227,10 @@
             if zone:
                 driver = zone[0]
                 for follower in zone[1:]:
+                    if follower not in services:
+                        # allow dangling references in placement to handle
+                        # removed services
+                        continue
                     with_references.add(driver)
                     if 'to' not in services[follower]:
                         services[follower]['to'] = [driver]

=== modified file 'charmgen/placements.yaml'
--- charmgen/placements.yaml	2014-11-11 03:52:11 +0000
+++ charmgen/placements.yaml	2014-12-12 19:15:16 +0000
@@ -5,7 +5,7 @@
         b: [router, nats, nats-sf]
         c: [login, uaa, mysql]
         d: [dea, hm, etcd]
-        e: [loggregator, loggregator-trafficcontrol]
+        e: [loggregator-trafficcontrol, loggregator, doppler]
         f: [haproxy, collector]
 local:
     __default__:

=== modified file 'cloudfoundry/contexts.py'
--- cloudfoundry/contexts.py	2014-11-11 04:02:53 +0000
+++ cloudfoundry/contexts.py	2014-12-12 19:15:16 +0000
@@ -226,7 +226,7 @@
 
 class LoginRelation(RelationContext):
     name = 'login'
-    interface = 'http'
+    interface = 'login'
     required_keys = []
     port = 8080
 
@@ -239,6 +239,8 @@
         data = self[self.name][0]
         return {
             'login.port': data['port'],
+            'login.saml.socket.connectionManagerTimeout': 10000,
+            'login.saml.socket.soTimeout': 10000,
         }
 
 
@@ -250,6 +252,11 @@
     def erb_mapping(self):
         return {
             'dea_next.directory_server_protocol': 'http',
+            # these are the default values, but since they're used in hm and
+            # cc and those specs don't give the default value (only dea_next),
+            # we have to manually specify them here
+            'dea_next.advertise_interval_in_seconds': 5,
+            'dea_next.heartbeat_interval_in_seconds': 10,
         }
 
 
@@ -296,8 +303,6 @@
             'traffic_controller.host': data[0]['host'],
             'traffic_controller.incoming_port': data[0]['port'],
             'traffic_controller.outgoing_port': data[0]['outgoing_port'],
-            'logger_endpoint.use_ssl': False,  # TODO: support SSL option
-            'logger_endpoint.port': 80,  # default is 443
         }
 
 
@@ -328,6 +333,48 @@
         }
 
 
+class DopplerRelation(RelationContext):
+    name = 'doppler'
+    interface = 'doppler'
+    required_keys = ['address', 'incoming_port', 'outgoing_port',
+                     'dropsonde_port', 'doppler_endpoint_secret']
+    incoming_port = 3457
+    outgoing_port = 8083
+    dropsonde_port = 3460
+
+    def get_shared_secrets(self):
+        secret_context = StoredContext('doppler-secrets.yml', {
+            'doppler_endpoint_secret': host.pwgen(20),
+        })
+        return secret_context
+
+    def provide_data(self):
+        secrets = self.get_shared_secrets()
+        return {
+            'address': hookenv.unit_get('private-address').encode('utf-8'),
+            'incoming_port': self.incoming_port,
+            'outgoing_port': self.outgoing_port,
+            'dropsonde_port': self.dropsonde_port,
+            'doppler_endpoint_secret': secrets['doppler_endpoint_secret']
+        }
+
+    def erb_mapping(self):
+        data = self[self.name]
+        return {
+            'doppler.zone': 'z1',
+            'doppler.blacklisted_syslog_ranges': [],
+            'doppler.incoming_port': data[0]['incoming_port'],
+            'doppler.outgoing_port': data[0]['outgoing_port'],
+            'doppler.dropsonde_incoming_port': data[0]['dropsonde_port'],
+            'doppler_endpoint.shared_secret': data[0]['doppler_endpoint_secret'],
+            'loggregator.doppler_port': data[0]['outgoing_port'],
+            'loggregator.incoming_port': data[0]['incoming_port'],
+            'loggregator.dropsonde_incoming_port': data[0]['dropsonde_port'],
+            'loggregator.dropsonde_outgoing_port': data[0]['outgoing_port'],
+            'loggregator_endpoint.shared_secret': data[0]['doppler_endpoint_secret'],
+        }
+
+
 class EtcdRelation(RelationContext):
     name = 'etcd'
     interface = 'etcd'
@@ -343,13 +390,15 @@
 class CloudControllerRelation(RelationContext):
     name = 'cc'
     interface = 'controller'
-    required_keys = ['hostname', 'port', 'user', 'password', 'db_encryption_key']
+    required_keys = ['hostname', 'port', 'user', 'password',
+                     'internal_api_password', 'db_encryption_key']
 
     def get_credentials(self):
         return StoredContext('api_credentials.yml', {
             'user': host.pwgen(7),
             'password': host.pwgen(7),
             'db_encryption_key': host.pwgen(7),
+            'internal_api_password': host.pwgen(7),
         })
 
     def provide_data(self):
@@ -357,6 +406,7 @@
         return {
             'user': creds['user'],
             'password': creds['password'],
+            'internal_api_password': creds['internal_api_password'],
             'db_encryption_key': creds['db_encryption_key'],
             'hostname': hookenv.unit_get('private-address').encode('utf-8'),
             'port': 9022,
@@ -369,6 +419,7 @@
             'cc.srv_api_uri': 'http://{}:{}'.format(data['hostname'], data['port']),
             'cc.bulk_api_user': data['user'],
             'cc.bulk_api_password': data['password'],
+            'cc.internal_api_password': data['internal_api_password'],
             'cc.staging_upload_user': 'ignored',  # FIXME: We need a staging cache set up
             'cc.staging_upload_password': 'ignored',
             'cc.db_encryption_key': data['db_encryption_key'],
@@ -439,7 +490,7 @@
 
 class RouterRelation(RelationContext):
     name = 'router'
-    interface = 'http'
+    interface = 'router'
     required_keys = ['address']
     port = 8000
     varz_port = 8084  # not currently used
@@ -545,6 +596,8 @@
             'app_domains': [d['domain'] for d in self[self.name]],
             'system_domain': domain,  # TODO: These should probably be config options
             'system_domain_organization': 'juju-org',
+            'logger_endpoint.use_ssl': False,  # TODO: support SSL option
+            'logger_endpoint.port': 80,  # default is 443
         }
 
 

=== modified file 'cloudfoundry/health_checks.py'
--- cloudfoundry/health_checks.py	2014-11-20 15:50:36 +0000
+++ cloudfoundry/health_checks.py	2014-12-12 19:15:16 +0000
@@ -36,3 +36,14 @@
         return result
     else:
         return dict(result, health='warn', message='Working (%s)' % status['status'])
+
+
+def healthz_check(ctype, service, healthz=tasks.healthz, varz=tasks.varz):
+    health = healthz(ctype) and 'pass' or 'fail'
+    result = {
+        'name': 'healthz',
+        'health': health,
+        'message': None,
+        'data': varz(ctype),
+    }
+    return result

=== modified file 'cloudfoundry/jobs.py'
--- cloudfoundry/jobs.py	2014-11-20 15:50:36 +0000
+++ cloudfoundry/jobs.py	2014-12-12 19:15:16 +0000
@@ -2,6 +2,7 @@
 from functools import partial
 from .path import path
 import yaml
+import itertools
 
 from charmhelpers.core import hookenv
 from charmhelpers.core import services
@@ -68,7 +69,13 @@
     service_def = service_data[charm_name]
     results = []
     health = 'pass'
-    checks = service_def.get('health', []) + [health_checks.status]
+    job_health = [job['health'] for job in service_def['jobs'] \
+                  if 'health' in job]
+    base_checks = service_def.get('health', [])
+    checks = itertools.chain(base_checks,
+                             itertools.chain(*job_health),
+                             [health_checks.status])
+
     for health_check in checks:
         result = health_check(service_def)
         if result['health'] == 'fail':

=== modified file 'cloudfoundry/releases.py'
--- cloudfoundry/releases.py	2014-11-05 18:22:07 +0000
+++ cloudfoundry/releases.py	2014-12-12 19:15:16 +0000
@@ -8,8 +8,6 @@
     ('uaa-v1', 'uaa'),
     ('login-v1', 'login'),
     ('nats-stream-forwarder-v1', 'nats-sf'),
-    ('loggregator-v1', 'loggregator'),
-    ('hm9000-v1', 'hm'),
     ('haproxy-v1', 'haproxy'),
     ('collector-v1', 'collector'),
 
@@ -25,15 +23,49 @@
     ('mysql:db', 'cc:db'),
     ('mysql:db', 'uaa:db'),
     ('etcd:client', 'hm:etcd'),
-    ('etcd:client', 'loggregator:etcd'),
     ('etcd:client', 'loggregator-trafficcontrol:etcd'),
 ]
 
+UTILITIES = {
+    "cf-disco": "cf-disco-v0.0.1-538768711c7545d87e60ad79ec3568d8c77c7db6"
+}
+
 COMMON_UPGRADES = []
 
-
 RELEASES = [
     {
+        "releases": (190, 190),
+        "topology": {
+            "services": COMMON_SERVICES + [
+                ('router-v3', 'router'),
+                ('cloud-controller-v3', 'cc'),
+                ('cloud-controller-clock-v3', 'cc-clock'),
+                ('cloud-controller-worker-v3', 'cc-worker'),
+                ('dea-v3', 'dea'),
+                ('loggregator-trafficcontroller-v3', 'loggregator-trafficcontrol'),
+                ('hm9000-v2', 'hm'),
+                ('doppler-v1', 'doppler'),
+            ],
+            "relations": COMMON_RELATIONS + [
+                ('etcd:client', 'cc:etcd'),
+                ('etcd:client', 'cc-worker:etcd'),
+                ('etcd:client', 'cc-clock:etcd'),
+                ('etcd:client', 'router:etcd'),
+                ('etcd:client', 'dea:etcd'),
+                ('etcd:client', 'doppler:etcd'),
+            ],
+            "expose": ['haproxy'],
+            "constraints": {
+                "__default__": "arch=amd64 instance-type=m3.medium",
+                "cc": "arch=amd64 root-disk=12G mem=12G",
+                "cc-worker": "arch=amd64 root-disk=10G",
+                "cc-clock": "arch=amd64 root-disk=10G",
+                "dea": "arch=amd64 mem=5G",
+            },
+        },
+        "upgrades": COMMON_UPGRADES
+    },
+    {
         "releases": (177, 180),
         "topology": {
             "services": COMMON_SERVICES + [
@@ -42,7 +74,9 @@
                 ('cloud-controller-clock-v2', 'cc-clock'),
                 ('cloud-controller-worker-v2', 'cc-worker'),
                 ('dea-v2', 'dea'),
+                ('loggregator-v1', 'loggregator'),
                 ('loggregator-trafficcontroller-v2', 'loggregator-trafficcontrol'),
+                ('hm9000-v1', 'hm'),
             ],
             "relations": COMMON_RELATIONS + [
                 ('etcd:client', 'cc:etcd'),
@@ -50,6 +84,7 @@
                 ('etcd:client', 'cc-clock:etcd'),
                 ('etcd:client', 'router:etcd'),
                 ('etcd:client', 'dea:etcd'),
+                ('etcd:client', 'loggregator:etcd'),
             ],
             "expose": ['haproxy'],
             "constraints": {
@@ -71,9 +106,13 @@
                 ('cloud-controller-clock-v1', 'cc-clock'),
                 ('cloud-controller-worker-v1', 'cc-worker'),
                 ('dea-v1', 'dea'),
+                ('loggregator-v1', 'loggregator'),
                 ('loggregator-trafficcontroller-v1', 'loggregator-trafficcontrol'),
-            ],
-            "relations": COMMON_RELATIONS,
+                ('hm9000-v1', 'hm'),
+            ],
+            "relations": COMMON_RELATIONS + [
+                ('etcd:client', 'loggregator:etcd'),
+            ],
             "expose": ['haproxy'],
             "constraints": {
                 "__default__": "arch=amd64",

=== modified file 'cloudfoundry/services.py'
--- cloudfoundry/services.py	2014-11-07 21:18:42 +0000
+++ cloudfoundry/services.py	2014-12-12 19:15:16 +0000
@@ -3,6 +3,10 @@
 import tasks
 import utils
 
+from functools import partial
+from .health_checks import healthz_check
+
+
 __all__ = ['SERVICES']
 
 
@@ -49,6 +53,27 @@
 
     },
 
+    'cloud-controller-clock-v3': {
+        'summary': "A shared clock",
+        'description': '',
+        'jobs': [
+            {'job_name': 'cloud_controller_clock',
+             'mapping': {'ccdb': mapper.ccdb},
+             'provided_data': [],
+             'required_data': [contexts.NatsRelation,
+                               contexts.CloudControllerRelation,
+                               contexts.UAARelation,
+                               contexts.CloudControllerDBRelation],
+             },
+            {'job_name': 'metron_agent',
+             'required_data': [contexts.LTCRelation,
+                               contexts.NatsRelation,
+                               contexts.DopplerRelation,
+                               contexts.EtcdRelation]},
+            ],
+
+    },
+
     'cloud-controller-v1': {
         'summary': 'CF Cloud Controller, the brains of the operation',
         'description': '',
@@ -89,6 +114,29 @@
         ]
     },
 
+    'cloud-controller-v3': {
+        'summary': 'CF Cloud Controller, the brains of the operation',
+        'description': '',
+        'jobs': [
+            {'job_name': 'cloud_controller_ng',
+             'mapping': {'db': mapper.ccdb},
+             'provided_data': [contexts.CloudControllerRelation,
+                               contexts.CloudControllerDBRelation],
+             'required_data': [contexts.NatsRelation,
+                               contexts.MysqlRelation,
+                               contexts.UAARelation,
+                               contexts.DEARelation,
+                               contexts.CloudControllerRelation.remote_view,
+                               ],
+             },
+            {'job_name': 'metron_agent',
+             'required_data': [contexts.LTCRelation,
+                               contexts.NatsRelation,
+                               contexts.DopplerRelation,
+                               contexts.EtcdRelation]},
+        ]
+    },
+
     'cloud-controller-worker-v1': {
         'summary': "Worker for cc",
         'description': '',
@@ -128,6 +176,27 @@
             ]
     },
 
+    'cloud-controller-worker-v3': {
+        'summary': "Worker for cc",
+        'description': '',
+        'jobs': [
+            {'job_name': 'cloud_controller_worker',
+             'mapping': {'ccdb': mapper.ccdb},
+             'provided_data': [],
+             'required_data': [contexts.NatsRelation,
+                               contexts.UAARelation,
+                               contexts.CloudControllerRelation,
+                               contexts.CloudControllerDBRelation,
+                               ],
+             },
+            {'job_name': 'metron_agent',
+             'required_data': [contexts.LTCRelation,
+                               contexts.NatsRelation,
+                               contexts.DopplerRelation,
+                               contexts.EtcdRelation]},
+            ]
+    },
+
     'dea-v1': {
         'summary': 'DEA runs CF apps in containers',
         'description': '',
@@ -199,6 +268,52 @@
 
     },
 
+    'dea-v3': {
+        'summary': 'DEA runs CF apps in containers',
+        'description': '',
+        'jobs': [
+            {
+                'job_name': 'dea_next',
+                'mapping': {},
+                'install': [
+                    utils.install_linux_image_extra,
+                    utils.apt_install(['quota']),
+                    utils.modprobe(['quota_v1', 'quota_v2'])
+                ],
+                'provided_data': [contexts.DEARelation],
+                'required_data': [
+                    contexts.NatsRelation,
+                    contexts.LTCRelation,
+                    contexts.DEARelation.remote_view,
+                    contexts.RouterRelation,
+                ],
+                'data_ready': [
+                    tasks.install_cfdisco,
+                    tasks.config_cfdisco,
+                    # tasks.enable_swapaccounting,
+                    tasks.patch_dea
+                ],
+                'health': [partial(healthz_check, "DEA")]
+            },
+            {
+                'job_name': 'dea_logging_agent',
+                'mapping': {},
+                'required_data': [contexts.NatsRelation,
+                                  contexts.LTCRelation,
+                                  contexts.EtcdRelation]
+            },
+            {'job_name': 'metron_agent',
+             'data_ready': [tasks.install_cfdisco,
+                            tasks.config_cfdisco],
+             'required_data': [contexts.LTCRelation,
+                               contexts.NatsRelation,
+                               contexts.DopplerRelation,
+                               contexts.EtcdRelation],
+             'health': [partial(healthz_check, "MetronAgent")]},
+        ]
+
+    },
+
     'nats-v1': {
         'service': 'nats',
         'summary': 'NATS message bus for CF',
@@ -259,6 +374,25 @@
 
     },
 
+    'router-v3': {
+        'service': 'router',
+        'summary': 'CF Router',
+        'jobs': [
+            {'job_name': 'gorouter',
+             'ports': [contexts.RouterRelation.port],
+             'mapping': {},
+             'provided_data': [contexts.RouterRelation],
+             'required_data': [contexts.NatsRelation,
+                               contexts.RouterRelation.remote_view]},
+            {'job_name': 'metron_agent',
+             'required_data': [contexts.LTCRelation,
+                               contexts.NatsRelation,
+                               contexts.DopplerRelation,
+                               contexts.EtcdRelation]},
+        ],
+
+    },
+
     'uaa-v1': {
         'service': 'uaa',
         'summary': 'CF Oauth2 for identity management service',
@@ -303,6 +437,21 @@
             }]
         },
 
+    'doppler-v1': {
+        'service': 'doppler',
+        'summary': 'successor of loggregator',
+        'description': 'Successor of loggregator.',
+        'jobs': [{
+            'job_name': 'doppler',
+            'mapping': {},
+            'provided_data': [contexts.DopplerRelation],
+            'required_data': [contexts.NatsRelation,
+                              contexts.EtcdRelation,
+                              contexts.LTCRelation,
+                              contexts.DopplerRelation.remote_view]
+            }]
+        },
+
     'loggregator-trafficcontroller-v1': {
         'service': 'loggregator-trafficcontroller',
         'summary': 'loggregator-trafficcontroller',
@@ -337,6 +486,23 @@
             ]
         },
 
+    'loggregator-trafficcontroller-v3': {
+        'service': 'loggregator-trafficcontroller',
+        'summary': 'loggregator-trafficcontroller',
+        'description': '',
+        'jobs': [
+            {'job_name': 'loggregator_trafficcontroller',
+             'ports': [contexts.LTCRelation.outgoing_port],
+             'mapping': {},
+             'provided_data': [contexts.LTCRelation],
+             'required_data': [contexts.DopplerRelation,
+                               contexts.LTCRelation.remote_view,
+                               contexts.NatsRelation,
+                               contexts.CloudControllerRelation,
+                               contexts.EtcdRelation]},
+            ]
+        },
+
     'hm9000-v1': {
         'service': 'hm9000',
         'summary': 'health monitor',
@@ -351,6 +517,21 @@
             }]
         },
 
+    'hm9000-v2': {
+        'service': 'hm9000',
+        'summary': 'health monitor',
+        'description': '',
+        'jobs': [{
+            'job_name': 'hm9000',
+            'mapping': {},
+            'provided_data': [],
+            'required_data': [contexts.NatsRelation,
+                              contexts.CloudControllerRelation,
+                              contexts.EtcdRelation,
+                              contexts.DEARelation]
+            }]
+        },
+
     'haproxy-v1': {
         'service': 'haproxy',
         'summary': 'loadbalance the routers',

=== modified file 'cloudfoundry/tasks.py'
--- cloudfoundry/tasks.py	2014-10-03 15:47:17 +0000
+++ cloudfoundry/tasks.py	2014-12-12 19:15:16 +0000
@@ -1,12 +1,14 @@
+import json
+import logging
 import os
 import re
+import shlex
 import shutil
+import stat
 import subprocess
-import yaml
-import stat
 import tempfile
 import textwrap
-import logging
+import yaml
 from urlparse import urlparse
 from functools import partial
 from charmhelpers.core import host
@@ -16,6 +18,7 @@
 from cloudfoundry import contexts
 from cloudfoundry import templating
 from cloudfoundry import utils
+from .releases import UTILITIES
 from .path import path
 
 logger = logging.getLogger(__name__)
@@ -41,7 +44,8 @@
     enable_monit_http_interface()
     subprocess.check_call(['gem', 'install', '--no-ri', '--no-rdoc', gem_file])
     subprocess.check_call([
-        'pip', 'install', '--use-wheel', '-f', './wheelhouse', '--pre', 'raindance'])
+        'pip', 'install', '--use-wheel', '-f', './wheelhouse', '--pre',
+        'raindance'])
 
 
 def install(service_def):
@@ -99,7 +103,8 @@
         return
     from raindance.package import PackageArchive
     pa = PackageArchive(url)
-    mirror = pa.build_mirror_section(ARTIFACTS_DIR, SOFTWARE, [(version, ARCH)], [job_name])
+    mirror = pa.build_mirror_section(ARTIFACTS_DIR,
+                                     SOFTWARE, [(version, ARCH)], [job_name])
     for filename in mirror:
         pass  # just need to iterate to force the (lazy) download
 
@@ -203,6 +208,75 @@
         subprocess.check_call(['patch', '-s', '-F4'], stdin=patch)
         os.unlink(fn)
 
+JUJU_VCAP_BIN = path("/var/vcap/juju/bin")
+
+
+def install_cfdisco(job_name, jujubin=JUJU_VCAP_BIN,
+                    discov=UTILITIES['cf-disco']):
+    orch = contexts.OrchestratorRelation()
+
+    urlbase = path(orch.get_first('artifacts_url'))
+    url = urlbase / ('utilities/%s' % discov)
+    sha1 = discov.rsplit("-", 1)[1]
+
+    outfile = jujubin / 'cf-disco'
+
+    from raindance.package import PackageArchive
+    verify = PackageArchive.verify_file
+
+    if not (outfile.exists() and verify(outfile, sha1)):
+        jujubin.makedirs_p()
+        PackageArchive.wget(url, outfile)
+        assert PackageArchive.verify_file(outfile, sha1), "Bad sha1 for %s" % outfile
+        outfile.chmod(755)
+
+
+JUJU_VCAP_ETC = path("/var/vcap/juju/etc")
+
+
+def config_cfdisco(job_name, jujuetc=JUJU_VCAP_ETC):
+    disco_conf = jujuetc / 'disco.json'
+    if not disco_conf.exists():
+        nats = contexts.NatsRelation()
+        nats_info = nats.erb_mapping()
+        nats_info = dict((k.replace('nats.', ''), v) for k, v in nats_info.items())
+        nats_info['addr'] = nats.get_first('address')
+
+        # @@ fix to use multiple nats servers
+        nats_uri_tmpt = "nats://{user}:{password}@{addr}:{port}"
+        nats_uri = nats_uri_tmpt.format(**nats_info)
+        privip = hookenv.unit_private_ip()
+
+        jujuetc.makedirs_p()
+        disco_conf.write_text(json.dumps(dict(nats_uri=nats_uri,
+                                              ip=privip)))
+
+
+def healthz(ctype, exe=JUJU_VCAP_BIN / 'cf-disco',
+            config=JUJU_VCAP_ETC / 'disco.json'):
+    conf = json.loads(config.text())
+    conf['type'] = ctype
+    tmplt = "{exe} {type} {ip} {nats_uri}"
+    conf['exe'] = exe
+    args = shlex.split(tmplt.format(**conf))
+    try:
+        return subprocess.check_call(args) == 0
+    except subprocess.CalledProcessError as e:
+        if e.returncode == 1:
+            return False
+        raise
+
+
+def varz(ctype, exe=JUJU_VCAP_BIN / 'cf-disco',
+         config=JUJU_VCAP_ETC / 'disco.json'):
+    conf = json.loads(config.text())
+    conf['type'] = ctype
+    tmplt = "{exe} --check='varz' {type} {ip} {nats_uri}"
+    conf['exe'] = exe
+    args = shlex.split(tmplt.format(**conf))
+    out = subprocess.check_output(args)
+    return json.loads(out)
+
 
 class JobTemplates(services.ManagerCallback):
     template_base_dir = TEMPLATES_BASE_DIR

=== modified file 'cloudfoundry/utils.py'
--- cloudfoundry/utils.py	2014-12-03 23:14:23 +0000
+++ cloudfoundry/utils.py	2014-12-12 19:15:16 +0000
@@ -23,8 +23,10 @@
 
 
 @contextmanager
-def cd(directory):
+def cd(directory, make=False):
     cwd = os.getcwd()
+    if not os.path.exists(directory) and make:
+        os.makedirs(directory)
     os.chdir(directory)
     try:
         yield
@@ -405,6 +407,7 @@
 def deploy(**config):
     status = get_client().status()
     constraints = config.pop('constraints', None)
+    to = config.pop('to', None)
     if 'cloudfoundry' in status['Services']:
         return True
     # create an up to date config
@@ -425,6 +428,10 @@
             '--repository=%s' % repo_path]
     if constraints:
         args.append('--constraints=%s' % constraints)
+
+    if to:
+        args.append('--to=%s' % to)
+
     args.append('local:trusty/cloudfoundry')
     juju = sh.check('juju', throw=False)
     if juju(*args) != 0:

=== modified file 'reconciler/app.py'
--- reconciler/app.py	2014-11-20 16:09:27 +0000
+++ reconciler/app.py	2014-12-12 19:15:16 +0000
@@ -135,6 +135,8 @@
         service['health'] = 'warn'
     else:
         service['health'] = 'pass'
+    if service_name == 'cloudfoundry' and db.error:  # XXX don't hard-code
+        service['health'] = 'fail'
 
 
 def get_current_state():

=== modified file 'reconciler/strategy.py'
--- reconciler/strategy.py	2014-11-11 18:22:53 +0000
+++ reconciler/strategy.py	2014-12-12 19:15:16 +0000
@@ -28,6 +28,7 @@
         self.strategy = Strategy(self.env)
         self.history = []
         self.exec_lock = threading.Lock()
+        self.error = False
 
     def reset(self):
         self.expected = {}
@@ -106,8 +107,12 @@
             return []
 
         # Service Deltas
-        self.strategy.extend(self.build_services())
-        self.strategy.extend(self.build_relations())
+        try:
+            self.strategy.extend(self.build_services())
+            self.strategy.extend(self.build_relations())
+        except Exception as e:
+            self.error = True
+            logging.error('Error building strategy: %s', e)
 
     def _changed(self):
         previous = hashlib.md5(json.dumps(self.previous or {}, sort_keys=True))
@@ -205,6 +210,10 @@
                     self.execute_strategy)
             else:
                 self._reset_strategy()
+            self.error = False
+        except Exception as e:
+            self.error = True
+            logging.error('Error executing strategy: %s', e)
         finally:
             self.exec_lock.release()
 

=== modified file 'tests/02-cats'
--- tests/02-cats	2014-10-01 18:50:57 +0000
+++ tests/02-cats	2014-12-12 19:15:16 +0000
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-python tests/cats.py
+python tests/cats.py "$@"

=== modified file 'tests/cats.py'
--- tests/cats.py	2014-10-16 15:01:08 +0000
+++ tests/cats.py	2014-12-12 19:15:16 +0000
@@ -6,6 +6,7 @@
 import subprocess
 import sys
 import tempfile
+from functools import partial
 
 from cloudfoundry.releases import RELEASES
 from cloudfoundry.utils import (api,
@@ -26,6 +27,7 @@
     global options
     parser = argparse.ArgumentParser()
     parser.add_argument('-v', '--version', default="latest")
+    parser.add_argument('admin_password', default="password", nargs='?')
     options = parser.parse_args()
     if options.version == 'latest':
         options.version = RELEASES[0]['releases'][1]
@@ -50,10 +52,11 @@
 
 def get_cats(version):
     logging.info("Getting CATs from github")
-    if not os.path.exists('cf-acceptance-tests'):
-        sh.git('clone',
-               'https://github.com/cloudfoundry/cf-acceptance-tests.git')
-    with cd('cf-acceptance-tests'):
+    if not os.path.exists('src/github.com/cloudfoundry/cf-acceptance-tests'):
+        with cd('src/github.com/cloudfoundry', make=True):
+            sh.git('clone',
+                   'https://github.com/cloudfoundry/cf-acceptance-tests.git')
+    with cd('src/github.com/cloudfoundry/cf-acceptance-tests'):
         sh.git('fetch')
         sh.check('./bin/compile')
         # Switch to the branch of the revision in question
@@ -66,12 +69,13 @@
         sha = acceptance[0]['sha']
         print "Switching CATs to {} to test version {}".format(
             sha[:6], version)
-        sh.git('reset', '--hard', sha)
-
-
-def run_cats():
+        sh.git('checkout', sha)
+
+
+def run_cats(options):
     ep = endpoint()
-    with cd('cf-acceptance-tests'):
+    gopath = os.getcwd()
+    with cd('src/github.com/cloudfoundry/cf-acceptance-tests'):
         fd, fn = tempfile.mkstemp(suffix=".json")
         os.close(fd)
         with open(fn, 'w') as fp:
@@ -81,8 +85,8 @@
                 "api": api(),
                 "syslog_drain_port": 8082,
                 "syslog_ip_address": ep,
-                "password": "admin",
-                "admin_password": "admin",
+                "password": options.admin_password,
+                "admin_password": options.admin_password,
                 "user": "admin",
                 "admin_user": "admin",
                 "org": "juju-org",
@@ -94,7 +98,10 @@
             json.dump(defaults, fp, indent=2, sort_keys=True)
         logging.info("Running CATs %s", defaults)
         env = os.environ.copy()
-        env.update({'CONFIG': os.path.abspath(fn)})
+        env.update({
+            'CONFIG': os.path.abspath(fn),
+            'GOPATH': gopath,
+        })
         subprocess.call('./bin/test', shell=True, env=env)
 
 
@@ -103,9 +110,11 @@
     logging.basicConfig(level=logging.INFO)
     get_cats(options.version)
     bootstrap()
-    deploy()
-    wait_for(60 * 40, 30, cf_service, endpoint, login)
-    run_cats()
+    deploy(generate_dependents=True,
+           admin_password=options.admin_password)
+    wait_for(60 * 40, 30,
+             cf_service, endpoint, partial(login, options.admin_password))
+    run_cats(options)
     sys.exit(0)
 
 

=== modified file 'tests/test_tasks.py'
--- tests/test_tasks.py	2014-10-03 15:47:17 +0000
+++ tests/test_tasks.py	2014-12-12 19:15:16 +0000
@@ -1,11 +1,20 @@
 import unittest
 import mock
+import contextlib
+import tempfile
+import json
 
+from subprocess import CalledProcessError
 from charmhelpers.core import services
 from cloudfoundry.path import path
 from cloudfoundry import tasks
 
 
+def patch_set(*methods):
+    patches = [mock.patch(x) for x in methods]
+    return contextlib.nested(*patches)
+
+
 class TestTasks(unittest.TestCase):
     def setUp(self):
         self.charm_dir_patch = mock.patch(
@@ -161,3 +170,84 @@
         output = tasks._enable_swapaccounting(sample)
         self.assertIn("arg1 cgroup_enable=memory swapaccount=1", output)
         self.assertIn("recovery arg2 cgroup_enable=memory swapaccount=1", output)
+
+    def test_cfdisco_install(self):
+        patches = ['cloudfoundry.contexts.OrchestratorRelation',
+                   'raindance.package.PackageArchive.wget',
+                   'raindance.package.PackageArchive.verify_file']
+
+        bin_d = path(tempfile.mkdtemp(prefix='cf-tests-'))
+        with patch_set(*patches) as (om, wm, vm):
+            om().get_first.return_value = 'http://pkg/url'
+            vm.return_value = True
+            tasks.install_cfdisco("some_job",
+                                  jujubin=bin_d,
+                                  discov="bin-version")
+            assert wm.called
+            expect = mock.call(path(u'http://pkg/url/utilities/bin-version'),
+                               bin_d / 'cf-disco')
+            assert wm.call_args == expect
+            assert vm.called
+
+    def test_config_cfdisco(self):
+        patches = ('cloudfoundry.contexts.NatsRelation',
+                   'charmhelpers.core.hookenv.unit_private_ip')
+        with patch_set(*patches) as (cxt, ipm):
+            cxt().get_first.return_value = "10.0.0.1"
+            cxt().erb_mapping.return_value = {
+                'nats.user': 'user',
+                'nats.password': 'pw',
+                'nats.port': "1234"
+                }
+            local_ip = ipm.return_value = "10.0.0.10"
+            etc = path(tempfile.mkdtemp(prefix='cf-tests-'))
+            result = etc / 'disco.json'
+            tasks.config_cfdisco("JOB", etc)
+            assert result.exists()
+            outval = json.loads(result.text())
+            assert outval['ip'] == local_ip
+            assert outval['nats_uri'] == 'nats://user:pw@10.0.0.1:1234'
+
+    def test_check_healthz_ok(self):
+        config = path(tempfile.mkstemp()[1])
+        conf_txt = json.dumps(dict(type='COMP',
+                                   ip='10.0.0.1',
+                                   nats_uri='NATS_URI'))
+        config.write_text(conf_txt)
+        with mock.patch('subprocess.check_call') as cm:
+            cm.return_value = 0
+            assert tasks.healthz('CF-THANG', exe='cd', config=config) is True
+
+    def test_check_healthz_not_ok(self):
+        config = path(tempfile.mkstemp()[1])
+        conf_txt = json.dumps(dict(type='COMP',
+                                   ip='10.0.0.1',
+                                   nats_uri='NATS_URI'))
+
+        config.write_text(conf_txt)
+        with mock.patch('subprocess.check_call') as cm:
+            cm.side_effect = CalledProcessError(1, "CMD")
+            assert tasks.healthz('CF-THANG', exe='cd', config=config) is False
+
+    def test_healthz_raises(self):
+        config = path(tempfile.mkstemp()[1])
+        conf_txt = json.dumps(dict(type='COMP',
+                                   ip='10.0.0.1',
+                                   nats_uri='NATS_URI'))
+        config.write_text(conf_txt)
+        with mock.patch('subprocess.check_call') as cm:
+            cm.side_effect = CalledProcessError(2, "CMD")
+            with self.assertRaises(CalledProcessError):
+                tasks.healthz('CF-THANG', exe='cd', config=config)
+
+    def test_check_varz(self):
+        config = path(tempfile.mkstemp()[1])
+        conf_txt = json.dumps(dict(type='COMP',
+                                   ip='10.0.0.1',
+                                   nats_uri='NATS_URI'))
+
+        config.write_text(conf_txt)
+        with mock.patch('subprocess.check_output') as co:
+            co.return_value = "{}"
+            out = tasks.varz('CF-THANG', "exe", config)
+            assert out == {}


Follow ups