← Back to team overview

bigdata-dev team mailing list archive

[Merge] lp:~bigdata-dev/charm-helpers/distconfig into lp:~bigdata-dev/charm-helpers/framework

 

Kevin W Monroe has proposed merging lp:~bigdata-dev/charm-helpers/distconfig into lp:~bigdata-dev/charm-helpers/framework.

Requested reviews:
  Juju Big Data Development (bigdata-dev)

For more details, see:
https://code.launchpad.net/~bigdata-dev/charm-helpers/distconfig/+merge/253152

This separates the dist.yaml processing into a utils class, and refactors handlers/apache.py to use it.
-- 
Your team Juju Big Data Development is requested to review the proposed merge of lp:~bigdata-dev/charm-helpers/distconfig into lp:~bigdata-dev/charm-helpers/framework.
=== modified file 'charmhelpers/contrib/bigdata/handlers/apache.py'
--- charmhelpers/contrib/bigdata/handlers/apache.py	2015-03-13 19:04:46 +0000
+++ charmhelpers/contrib/bigdata/handlers/apache.py	2015-03-17 05:14:30 +0000
@@ -19,23 +19,31 @@
 import time
 
 from path import Path
-import yaml
 
 import jujuresources
 
-from charmhelpers.core import host
 from charmhelpers.core import hookenv
 from charmhelpers.core import charmdata
-from charmhelpers import fetch
 from charmhelpers.contrib.bigdata import utils
 
 
 class HadoopBase(object):
-    def __init__(self):
+    def __init__(self, dist_config):
+        self.config = dist_config
         self.cpu_arch = check_output(['uname', '-p']).strip()
-        self.load_dist_config()
+
+        # verify config
+        required_dirs = ['hadoop', 'hadoop_conf', 'hdfs_log_dir',
+                         'yarn_log_dir']
+        missing_dirs = set(required_dirs) - set(self.config.dirs.keys())
+        if missing_dirs:
+            raise ValueError('dirs option in {} is missing required entr{}: {}'.format(
+                self.config.yaml_file,
+                'ies' if len(missing_dirs) > 1 else 'y',
+                ', '.join(missing_dirs)))
+
         self.client_spec = {
-            'hadoop': self.hadoop_version,
+            'hadoop': self.config.hadoop_version,
         }
 
     def spec(self):
@@ -50,47 +58,14 @@
         java_version = charmdata.kv.get('java.version')
         if java_version:
             return {
-                'vendor': self.vendor,
-                'hadoop': self.hadoop_version,
+                'vendor': self.config.vendor,
+                'hadoop': self.config.hadoop_version,
                 'java': java_version,
                 'arch': self.cpu_arch,
             }
         else:
             return None
 
-    def load_dist_config(self, filename='dist.yaml'):
-        self._dist_config = yaml.load(Path(filename).text())
-
-        # validate dist.yaml
-        required_opts = ['vendor', 'hadoop_version', 'dirs']
-        optional_opts = {'packages': [], 'groups': [], 'users': {}, 'ports': {}}
-        missing_opts = set(required_opts) - set(self._dist_config.keys())
-        if missing_opts:
-            raise ValueError('{} is missing required option{}: {}'.format(
-                filename,
-                's' if len(missing_opts) > 1 else '',
-                ', '.join(missing_opts)))
-        required_dirs = ['hadoop']
-        missing_dirs = set(required_dirs) - set(self._dist_config['dirs'].keys())
-        if missing_dirs:
-            raise ValueError('dirs option in {} is missing required entr{}: {}'.format(
-                filename,
-                'ies' if len(missing_dirs) > 1 else 'y',
-                ', '.join(missing_dirs)))
-
-        for opt in required_opts:
-            setattr(self, opt, self._dist_config[opt])
-        for opt in optional_opts:
-            setattr(self, opt, self._dist_config.get(opt, optional_opts[opt]))
-        self.managed_dirs = self.dirs
-        for name, details in self.managed_dirs.items():
-            details['path'] = self._expand_path(details['path'])
-        self.managed_dirs['hadoop_conf'] = {'path': Path('/etc/hadoop/conf')}
-        self.dirs = {name: details['path'] for name, details in self.managed_dirs.items()}
-
-    def _expand_path(self, path):
-        return Path(path.format(config=hookenv.config()))
-
     def verify_conditional_resources(self):
         conditional_resources = ['hadoop-%s' % self.cpu_arch]
         mirror_url = hookenv.config()['resources_mirror']
@@ -103,8 +78,9 @@
         if not force and self.is_installed():
             return
         self.configure_hosts_file()
-        self.manage_users()
-        self.manage_dirs()
+        self.config.add_users()
+        self.config.add_dirs()
+        self.config.add_packages()
         self.install_base_packages()
         self.setup_hadoop_config()
         self.configure_hadoop()
@@ -129,30 +105,8 @@
             hosts.insert(0, line)
             etc_hosts.write_lines(hosts)
 
-    def manage_users(self):
-        for group in self.groups:
-            host.add_group(group)
-        for username, details in self.users.items():
-            primary_group = None
-            groups = details.get('groups', [])
-            if groups:
-                primary_group = groups[0]
-            host.adduser(username, group=primary_group)
-            for group in groups:
-                host.add_user_to_group(username, group)
-
-    def manage_dirs(self):
-        for name, details in self.managed_dirs.items():
-            host.mkdir(
-                self._expand_path(details['path']),
-                owner=details.get('owner', 'root'),
-                group=details.get('group', 'root'),
-                perms=details.get('perms', 0o755))
-
     def install_base_packages(self):
         with utils.disable_firewall():
-            fetch.apt_update()
-            fetch.apt_install(self.packages)
             self.install_java()
             self.install_hadoop()
 
@@ -176,25 +130,28 @@
         charmdata.kv.set('java.version', java_version)
 
     def install_hadoop(self):
-        jujuresources.install('hadoop-%s' % self.cpu_arch, destination=self.dirs['hadoop'], skip_top_level=True)
+        jujuresources.install('hadoop-%s' %
+                              self.cpu_arch,
+                              destination=self.config.dirs['hadoop'],
+                              skip_top_level=True)
 
     def setup_hadoop_config(self):
         # copy default config into alternate dir
-        conf_dir = self.dirs['hadoop'] / 'etc/hadoop'
-        self.dirs['hadoop_conf'].rmtree_p()
-        conf_dir.copytree(self.dirs['hadoop_conf'])
-        (self.dirs['hadoop_conf'] / 'slaves').remove_p()
-        mapred_site = self.dirs['hadoop_conf'] / 'mapred-site.xml'
+        conf_dir = self.config.dirs['hadoop'] / 'etc/hadoop'
+        self.config.dirs['hadoop_conf'].rmtree_p()
+        conf_dir.copytree(self.config.dirs['hadoop_conf'])
+        (self.config.dirs['hadoop_conf'] / 'slaves').remove_p()
+        mapred_site = self.config.dirs['hadoop_conf'] / 'mapred-site.xml'
         if not mapred_site.exists():
-            (self.dirs['hadoop_conf'] / 'mapred-site.xml.template').copy(mapred_site)
+            (self.config.dirs['hadoop_conf'] / 'mapred-site.xml.template').copy(mapred_site)
 
     def configure_hadoop(self):
         config = hookenv.config()
 
         java_home = Path(charmdata.kv.get('java.home'))
         java_bin = java_home / 'bin'
-        hadoop_bin = self.dirs['hadoop'] / 'bin'
-        hadoop_sbin = self.dirs['hadoop'] / 'sbin'
+        hadoop_bin = self.config.dirs['hadoop'] / 'bin'
+        hadoop_sbin = self.config.dirs['hadoop'] / 'sbin'
         with utils.environment_edit_in_place('/etc/environment') as env:
             env['JAVA_HOME'] = java_home
             if java_bin not in env['PATH']:
@@ -203,37 +160,37 @@
                 env['PATH'] = ':'.join([env['PATH'], hadoop_bin])
             if hadoop_sbin not in env['PATH']:
                 env['PATH'] = ':'.join([env['PATH'], hadoop_sbin])
-            env['HADOOP_LIBEXEC_DIR'] = self.dirs['hadoop'] / 'libexec'
-            env['HADOOP_INSTALL'] = self.dirs['hadoop']
-            env['HADOOP_HOME'] = self.dirs['hadoop']
-            env['HADOOP_COMMON_HOME'] = self.dirs['hadoop']
-            env['HADOOP_HDFS_HOME'] = self.dirs['hadoop']
-            env['HADOOP_MAPRED_HOME'] = self.dirs['hadoop']
-            env['HADOOP_YARN_HOME'] = self.dirs['hadoop']
-            env['YARN_HOME'] = self.dirs['hadoop']
-            env['HADOOP_CONF_DIR'] = self.dirs['hadoop_conf']
-            env['YARN_CONF_DIR'] = self.dirs['hadoop_conf']
-            env['YARN_LOG_DIR'] = self.dirs['yarn_log_dir']
-            env['HDFS_LOG_DIR'] = self.dirs['hdfs_log_dir']
-            env['HADOOP_LOG_DIR'] = self.dirs['hdfs_log_dir']  # for hadoop 2.2.0 only
+            env['HADOOP_LIBEXEC_DIR'] = self.config.dirs['hadoop'] / 'libexec'
+            env['HADOOP_INSTALL'] = self.config.dirs['hadoop']
+            env['HADOOP_HOME'] = self.config.dirs['hadoop']
+            env['HADOOP_COMMON_HOME'] = self.config.dirs['hadoop']
+            env['HADOOP_HDFS_HOME'] = self.config.dirs['hadoop']
+            env['HADOOP_MAPRED_HOME'] = self.config.dirs['hadoop']
+            env['HADOOP_YARN_HOME'] = self.config.dirs['hadoop']
+            env['YARN_HOME'] = self.config.dirs['hadoop']
+            env['HADOOP_CONF_DIR'] = self.config.dirs['hadoop_conf']
+            env['YARN_CONF_DIR'] = self.config.dirs['hadoop_conf']
+            env['YARN_LOG_DIR'] = self.config.dirs['yarn_log_dir']
+            env['HDFS_LOG_DIR'] = self.config.dirs['hdfs_log_dir']
+            env['HADOOP_LOG_DIR'] = self.config.dirs['hdfs_log_dir']  # for hadoop 2.2.0 only
             env['MAPRED_LOG_DIR'] = '/var/log/hadoop/mapred'  # should be moved to config, but could
             env['MAPRED_PID_DIR'] = '/var/run/hadoop/mapred'  # be destructive for mapreduce operation
 
-        hadoop_env = self.dirs['hadoop_conf'] / 'hadoop-env.sh'
+        hadoop_env = self.config.dirs['hadoop_conf'] / 'hadoop-env.sh'
         utils.re_edit_in_place(hadoop_env, {
             r'export JAVA_HOME *=.*': 'export JAVA_HOME=%s' % java_home,
         })
 
-        core_site = self.dirs['hadoop_conf'] / 'core-site.xml'
+        core_site = self.config.dirs['hadoop_conf'] / 'core-site.xml'
         with utils.xmlpropmap_edit_in_place(core_site) as props:
             props["io.file.buffer.size"] = config["io_file_buffer_size"]
 
     def config_for_hue(self):
         # Set hue-specific properties in our hdfs|core-site.xml files
-        hdfs_site = self.dirs['hadoop_conf'] / 'hdfs-site.xml'
+        hdfs_site = self.config.dirs['hadoop_conf'] / 'hdfs-site.xml'
         with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
             props['dfs.webhdfs.enabled'] = "true"
-        core_site = self.dirs['hadoop_conf'] / 'core-site.xml'
+        core_site = self.config.dirs['hadoop_conf'] / 'core-site.xml'
         with utils.xmlpropmap_edit_in_place(core_site) as props:
             props['hadoop.proxyuser.hue.hosts'] = "*"
             props['hadoop.proxyuser.hue.groups'] = "*"
@@ -245,15 +202,14 @@
         :param str command: Command to run, prefixed with `bin/` or `sbin/`
         :param list args: Additional args to pass to the command
         """
-        return utils.run_as(user, self.dirs['hadoop'] / command, *args, **kwargs)
+        return utils.run_as(user,
+                            self.config.dirs['hadoop'] / command,
+                            *args, **kwargs)
 
 
 class HDFS(object):
     def __init__(self, hadoop_base):
         self.hadoop_base = hadoop_base
-        self.master_ports = hadoop_base.ports['hdfs']['master']
-        self.secondarynamenode_ports = hadoop_base.ports['hdfs']['secondarynamenode']
-        self.slave_ports = hadoop_base.ports['hdfs']['slave']
 
     def stop_namenode(self):
         self._hadoop_daemon('stop', 'namenode')
@@ -300,7 +256,7 @@
 
     def configure_secondarynamenode(self):
         """
-        Configure the Secondary Namenode when the apache-hadoop-hdfs-checkpoint
+        Configure the Secondary Namenode when the apache-hadoop-hdfs-secondary
         charm is deployed and related to apache-hadoop-hdfs-master.
 
         The only purpose of the secondary namenode is to perform periodic
@@ -314,7 +270,7 @@
             # Set generic hdfs config
             self.configure_hdfs_base(remote)
             # Set secondarynamenode-specific config
-            hdfs_site = self.hadoop_base.dirs['hadoop_conf'] / 'hdfs-site.xml'
+            hdfs_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'hdfs-site.xml'
             with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
                 props['dfs.namenode.http-address'] = "{host}:50070".format(host=remote)
                 props['dfs.namenode.secondary.http-address'] = "{host}:50090".format(host=local)
@@ -334,10 +290,10 @@
 
     def configure_hdfs_base(self, host):
         config = hookenv.config()
-        core_site = self.hadoop_base.dirs['hadoop_conf'] / 'core-site.xml'
+        core_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'core-site.xml'
         with utils.xmlpropmap_edit_in_place(core_site) as props:
             props['fs.defaultFS'] = "hdfs://{host}:8020".format(host=host)
-        hdfs_site = self.hadoop_base.dirs['hadoop_conf'] / 'hdfs-site.xml'
+        hdfs_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'hdfs-site.xml'
         with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
             props['dfs.namenode.name.dir'] = Path(config['hadoop_dir_base']) / 'cache/hadoop/dfs/name'
             props['dfs.namenode.handler.count'] = config['dfs_namenode_handler_count']
@@ -374,7 +330,7 @@
 
     def register_slaves(self):
         slaves = charmdata.kv.get('relations.ready', {}).get('datanode', {})
-        slaves_file = self.hadoop_base.dirs['hadoop_conf'] / 'slaves'
+        slaves_file = self.hadoop_base.config.dirs['hadoop_conf'] / 'slaves'
         slaves_file.write_lines(
             [
                 '# DO NOT EDIT',
@@ -387,7 +343,8 @@
 
     def _hadoop_daemon(self, command, service):
         self.hadoop_base.run('hdfs', 'sbin/hadoop-daemon.sh',
-                             '--config', self.hadoop_base.dirs['hadoop_conf'],
+                             '--config',
+                             self.hadoop_base.config.dirs['hadoop_conf'],
                              command, service)
 
     def _hdfs(self, command, *args):
@@ -397,8 +354,6 @@
 class YARN(object):
     def __init__(self, hadoop_base):
         self.hadoop_base = hadoop_base
-        self.master_ports = hadoop_base.ports['yarn']['master']
-        self.slave_ports = hadoop_base.ports['yarn']['slave']
 
     def stop_resourcemanager(self):
         self._yarn_daemon('stop', 'resourcemanager')
@@ -434,7 +389,7 @@
         if not charmdata.kv.get('yarn.resourcemanager.configured'):
             host = self._local_host()
             self.configure_yarn_base(host)
-            yarn_site = self.hadoop_base.dirs['hadoop_conf'] / 'yarn-site.xml'
+            yarn_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'yarn-site.xml'
             with utils.xmlpropmap_edit_in_place(yarn_site) as props:
                 props["yarn.nodemanager.address"] = "{host}:8050".format(host=host)
                 props["yarn.nodemanager.localizer.address"] = "{host}:8045".format(host=host)
@@ -462,7 +417,7 @@
 
     def configure_yarn_base(self, host):
         config = hookenv.config()
-        yarn_site = self.hadoop_base.dirs['hadoop_conf'] / 'yarn-site.xml'
+        yarn_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'yarn-site.xml'
         with utils.xmlpropmap_edit_in_place(yarn_site) as props:
             props["yarn.resourcemanager.resource-tracker.address"] = "{host}:8031".format(host=host)
             props["yarn.resourcemanager.scheduler.address"] = "{host}:8030".format(host=host)
@@ -472,7 +427,7 @@
             props["yarn.nodemanager.aux-services"] = config["yarn_nodemanager_aux-services"]
             props["yarn.nodemanager.aux-services.mapreduce.shuffle.class"] = \
                 config["yarn_nodemanager_aux-services_mapreduce_shuffle_class"]
-        mapred_site = self.hadoop_base.dirs['hadoop_conf'] / 'mapred-site.xml'
+        mapred_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'mapred-site.xml'
         with utils.xmlpropmap_edit_in_place(mapred_site) as props:
             props["mapreduce.jobhistory.address"] = "{host}:10020".format(host=host)
             props["mapreduce.jobhistory.webapp.address"] = "{host}:19888".format(host=host)
@@ -495,11 +450,13 @@
 
     def _yarn_daemon(self, command, service):
         self.hadoop_base.run('yarn', 'sbin/yarn-daemon.sh',
-                             '--config', self.hadoop_base.dirs['hadoop_conf'],
+                             '--config',
+                             self.hadoop_base.config.dirs['hadoop_conf'],
                              command, service)
 
     def _jobhistory_daemon(self, command, service):
         # TODO refactor job history to separate class
         self.hadoop_base.run('mapred', 'sbin/mr-jobhistory-daemon.sh',
-                             '--config', self.hadoop_base.dirs['hadoop_conf'],
+                             '--config',
+                             self.hadoop_base.config.dirs['hadoop_conf'],
                              command, service)

=== modified file 'charmhelpers/contrib/bigdata/utils.py'
--- charmhelpers/contrib/bigdata/utils.py	2015-03-13 17:52:52 +0000
+++ charmhelpers/contrib/bigdata/utils.py	2015-03-17 05:14:30 +0000
@@ -16,14 +16,134 @@
 
 import re
 import time
+import yaml
 from contextlib import contextmanager
 from subprocess import check_call, check_output, CalledProcessError
 from xml.etree import ElementTree as ET
 from xml.dom import minidom
 from distutils.util import strtobool
-
 from path import Path
 
+from charmhelpers.core import hookenv
+from charmhelpers.core import host
+from charmhelpers import fetch
+
+
+class DistConfig(object):
+    """This class processes distribution-specific configuration options.
+
+    Some configuration options are specific to the Hadoop distribution,
+    (e.g. Apache, Hortonworks, MapR, etc). These options are immutable and
+    must not change throughout the charm deployment lifecycle.
+
+    Helper methods are provided for keys that require action. Presently, this
+    includes adding/removing directories, dependent packages, and groups/users.
+    Other required keys may be listed when instantiating this class, but this
+    will only validate these keys exist in the yaml; it will not provide any
+    helper functionality for unkown keys.
+
+    Attributes:
+        filename         File to process (default dist.yaml)
+        required_keys    A list of keys required to be present in the yaml
+
+    Example dist.yaml with supported keys:
+        vendor: '<name>'
+        hadoop_version: '<version>'
+        packages:
+            - '<package 1>'
+            - '<package 2>'
+        groups:
+            - '<name>'
+        users:
+            <user 1>:
+                groups: ['<primary>', '<group>', '<group>']
+            <user 2>:
+                groups: ['<primary>']
+        dirs:
+            <dir 1>:
+                path: '</path/to/dir>'
+                perms: 0777
+            <dir 2>:
+                path: '{config[<option>]}'  # value comes from config option
+                owner: '<user>'
+                group: '<group>'
+                perms: 0755
+        ports:
+            <service>:
+                <role 1>: [<port 1>, <port 2>]
+                <role 2>: [<port 1>]
+    """
+    def __init__(self, filename='dist.yaml', required_keys=None):
+        self.yaml_file = filename
+        self.dist_config = yaml.load(Path(self.yaml_file).text())
+
+        # validate dist.yaml
+        missing_keys = set(required_keys) - set(self.dist_config.keys())
+        if missing_keys:
+            raise ValueError('{} is missing required option{}: {}'.format(
+                filename,
+                's' if len(missing_keys) > 1 else '',
+                ', '.join(missing_keys)))
+
+        for opt in required_keys:
+            setattr(self, opt, self.dist_config[opt])
+
+        # If we have dirs, create a managed_dirs dict that contains the info
+        # we'll need to create each directory (owner/group/permissions).
+        if self.dirs:
+            self.managed_dirs = self.dirs
+            for name, details in self.managed_dirs.items():
+                # Change path refs to Paths to make future path ops easier
+                details['path'] = self._expand_path(details['path'])
+            # We only care about the dir path now, so reset dirs with only the
+            # name and path as key-value pairs.
+            self.dirs = {name: details['path'] for name, details in self.managed_dirs.items()}
+
+    def _expand_path(self, path):
+        return Path(path.format(config=hookenv.config()))
+
+    def add_dirs(self):
+        for name, details in self.managed_dirs.items():
+            host.mkdir(
+                details['path'],
+                owner=details.get('owner', 'root'),
+                group=details.get('group', 'root'),
+                perms=details.get('perms', 0o755))
+
+    def add_packages(self):
+        with disable_firewall():
+            fetch.apt_update()
+            fetch.apt_install(self.packages)
+
+    def add_users(self):
+        for group in self.groups:
+            host.add_group(group)
+        for username, details in self.users.items():
+            primary_group = None
+            groups = details.get('groups', [])
+            if groups:
+                primary_group = groups[0]
+            host.adduser(username, group=primary_group)
+            for group in groups:
+                host.add_user_to_group(username, group)
+
+    def remove_dirs(self):
+        # TODO: no removal function exists in CH, just log what we would do.
+        for name in self.dirs.items():
+            hookenv.log('noop: remove directory {0}'.format(name))
+
+    def remove_packages(self):
+        # TODO: no removal function exists in CH, just log what we would do.
+        for name in self.packages.items():
+            hookenv.log('noop: remove package {0}'.format(name))
+
+    def remove_users(self):
+        # TODO: no removal function exists in CH, just log what we would do.
+        for user in self.users.items():
+            hookenv.log('noop: remove user {0}'.format(user))
+        for group in self.groups:
+            hookenv.log('noop: remove group {0}'.format(group))
+
 
 @contextmanager
 def disable_firewall():


Follow ups