bigdata-dev team mailing list archive
-
bigdata-dev team
-
Mailing list archive
-
Message #00051
[Merge] lp:~bigdata-dev/charm-helpers/distconfig into lp:~bigdata-dev/charm-helpers/framework
Kevin W Monroe has proposed merging lp:~bigdata-dev/charm-helpers/distconfig into lp:~bigdata-dev/charm-helpers/framework.
Requested reviews:
Juju Big Data Development (bigdata-dev)
For more details, see:
https://code.launchpad.net/~bigdata-dev/charm-helpers/distconfig/+merge/253152
This separates the dist.yaml processing into a utils class, and refactors handlers/apache.py to use it.
--
Your team Juju Big Data Development is requested to review the proposed merge of lp:~bigdata-dev/charm-helpers/distconfig into lp:~bigdata-dev/charm-helpers/framework.
=== modified file 'charmhelpers/contrib/bigdata/handlers/apache.py'
--- charmhelpers/contrib/bigdata/handlers/apache.py 2015-03-13 19:04:46 +0000
+++ charmhelpers/contrib/bigdata/handlers/apache.py 2015-03-17 05:14:30 +0000
@@ -19,23 +19,31 @@
import time
from path import Path
-import yaml
import jujuresources
-from charmhelpers.core import host
from charmhelpers.core import hookenv
from charmhelpers.core import charmdata
-from charmhelpers import fetch
from charmhelpers.contrib.bigdata import utils
class HadoopBase(object):
- def __init__(self):
+ def __init__(self, dist_config):
+ self.config = dist_config
self.cpu_arch = check_output(['uname', '-p']).strip()
- self.load_dist_config()
+
+ # verify config
+ required_dirs = ['hadoop', 'hadoop_conf', 'hdfs_log_dir',
+ 'yarn_log_dir']
+ missing_dirs = set(required_dirs) - set(self.config.dirs.keys())
+ if missing_dirs:
+ raise ValueError('dirs option in {} is missing required entr{}: {}'.format(
+ self.config.yaml_file,
+ 'ies' if len(missing_dirs) > 1 else 'y',
+ ', '.join(missing_dirs)))
+
self.client_spec = {
- 'hadoop': self.hadoop_version,
+ 'hadoop': self.config.hadoop_version,
}
def spec(self):
@@ -50,47 +58,14 @@
java_version = charmdata.kv.get('java.version')
if java_version:
return {
- 'vendor': self.vendor,
- 'hadoop': self.hadoop_version,
+ 'vendor': self.config.vendor,
+ 'hadoop': self.config.hadoop_version,
'java': java_version,
'arch': self.cpu_arch,
}
else:
return None
- def load_dist_config(self, filename='dist.yaml'):
- self._dist_config = yaml.load(Path(filename).text())
-
- # validate dist.yaml
- required_opts = ['vendor', 'hadoop_version', 'dirs']
- optional_opts = {'packages': [], 'groups': [], 'users': {}, 'ports': {}}
- missing_opts = set(required_opts) - set(self._dist_config.keys())
- if missing_opts:
- raise ValueError('{} is missing required option{}: {}'.format(
- filename,
- 's' if len(missing_opts) > 1 else '',
- ', '.join(missing_opts)))
- required_dirs = ['hadoop']
- missing_dirs = set(required_dirs) - set(self._dist_config['dirs'].keys())
- if missing_dirs:
- raise ValueError('dirs option in {} is missing required entr{}: {}'.format(
- filename,
- 'ies' if len(missing_dirs) > 1 else 'y',
- ', '.join(missing_dirs)))
-
- for opt in required_opts:
- setattr(self, opt, self._dist_config[opt])
- for opt in optional_opts:
- setattr(self, opt, self._dist_config.get(opt, optional_opts[opt]))
- self.managed_dirs = self.dirs
- for name, details in self.managed_dirs.items():
- details['path'] = self._expand_path(details['path'])
- self.managed_dirs['hadoop_conf'] = {'path': Path('/etc/hadoop/conf')}
- self.dirs = {name: details['path'] for name, details in self.managed_dirs.items()}
-
- def _expand_path(self, path):
- return Path(path.format(config=hookenv.config()))
-
def verify_conditional_resources(self):
conditional_resources = ['hadoop-%s' % self.cpu_arch]
mirror_url = hookenv.config()['resources_mirror']
@@ -103,8 +78,9 @@
if not force and self.is_installed():
return
self.configure_hosts_file()
- self.manage_users()
- self.manage_dirs()
+ self.config.add_users()
+ self.config.add_dirs()
+ self.config.add_packages()
self.install_base_packages()
self.setup_hadoop_config()
self.configure_hadoop()
@@ -129,30 +105,8 @@
hosts.insert(0, line)
etc_hosts.write_lines(hosts)
- def manage_users(self):
- for group in self.groups:
- host.add_group(group)
- for username, details in self.users.items():
- primary_group = None
- groups = details.get('groups', [])
- if groups:
- primary_group = groups[0]
- host.adduser(username, group=primary_group)
- for group in groups:
- host.add_user_to_group(username, group)
-
- def manage_dirs(self):
- for name, details in self.managed_dirs.items():
- host.mkdir(
- self._expand_path(details['path']),
- owner=details.get('owner', 'root'),
- group=details.get('group', 'root'),
- perms=details.get('perms', 0o755))
-
def install_base_packages(self):
with utils.disable_firewall():
- fetch.apt_update()
- fetch.apt_install(self.packages)
self.install_java()
self.install_hadoop()
@@ -176,25 +130,28 @@
charmdata.kv.set('java.version', java_version)
def install_hadoop(self):
- jujuresources.install('hadoop-%s' % self.cpu_arch, destination=self.dirs['hadoop'], skip_top_level=True)
+ jujuresources.install('hadoop-%s' %
+ self.cpu_arch,
+ destination=self.config.dirs['hadoop'],
+ skip_top_level=True)
def setup_hadoop_config(self):
# copy default config into alternate dir
- conf_dir = self.dirs['hadoop'] / 'etc/hadoop'
- self.dirs['hadoop_conf'].rmtree_p()
- conf_dir.copytree(self.dirs['hadoop_conf'])
- (self.dirs['hadoop_conf'] / 'slaves').remove_p()
- mapred_site = self.dirs['hadoop_conf'] / 'mapred-site.xml'
+ conf_dir = self.config.dirs['hadoop'] / 'etc/hadoop'
+ self.config.dirs['hadoop_conf'].rmtree_p()
+ conf_dir.copytree(self.config.dirs['hadoop_conf'])
+ (self.config.dirs['hadoop_conf'] / 'slaves').remove_p()
+ mapred_site = self.config.dirs['hadoop_conf'] / 'mapred-site.xml'
if not mapred_site.exists():
- (self.dirs['hadoop_conf'] / 'mapred-site.xml.template').copy(mapred_site)
+ (self.config.dirs['hadoop_conf'] / 'mapred-site.xml.template').copy(mapred_site)
def configure_hadoop(self):
config = hookenv.config()
java_home = Path(charmdata.kv.get('java.home'))
java_bin = java_home / 'bin'
- hadoop_bin = self.dirs['hadoop'] / 'bin'
- hadoop_sbin = self.dirs['hadoop'] / 'sbin'
+ hadoop_bin = self.config.dirs['hadoop'] / 'bin'
+ hadoop_sbin = self.config.dirs['hadoop'] / 'sbin'
with utils.environment_edit_in_place('/etc/environment') as env:
env['JAVA_HOME'] = java_home
if java_bin not in env['PATH']:
@@ -203,37 +160,37 @@
env['PATH'] = ':'.join([env['PATH'], hadoop_bin])
if hadoop_sbin not in env['PATH']:
env['PATH'] = ':'.join([env['PATH'], hadoop_sbin])
- env['HADOOP_LIBEXEC_DIR'] = self.dirs['hadoop'] / 'libexec'
- env['HADOOP_INSTALL'] = self.dirs['hadoop']
- env['HADOOP_HOME'] = self.dirs['hadoop']
- env['HADOOP_COMMON_HOME'] = self.dirs['hadoop']
- env['HADOOP_HDFS_HOME'] = self.dirs['hadoop']
- env['HADOOP_MAPRED_HOME'] = self.dirs['hadoop']
- env['HADOOP_YARN_HOME'] = self.dirs['hadoop']
- env['YARN_HOME'] = self.dirs['hadoop']
- env['HADOOP_CONF_DIR'] = self.dirs['hadoop_conf']
- env['YARN_CONF_DIR'] = self.dirs['hadoop_conf']
- env['YARN_LOG_DIR'] = self.dirs['yarn_log_dir']
- env['HDFS_LOG_DIR'] = self.dirs['hdfs_log_dir']
- env['HADOOP_LOG_DIR'] = self.dirs['hdfs_log_dir'] # for hadoop 2.2.0 only
+ env['HADOOP_LIBEXEC_DIR'] = self.config.dirs['hadoop'] / 'libexec'
+ env['HADOOP_INSTALL'] = self.config.dirs['hadoop']
+ env['HADOOP_HOME'] = self.config.dirs['hadoop']
+ env['HADOOP_COMMON_HOME'] = self.config.dirs['hadoop']
+ env['HADOOP_HDFS_HOME'] = self.config.dirs['hadoop']
+ env['HADOOP_MAPRED_HOME'] = self.config.dirs['hadoop']
+ env['HADOOP_YARN_HOME'] = self.config.dirs['hadoop']
+ env['YARN_HOME'] = self.config.dirs['hadoop']
+ env['HADOOP_CONF_DIR'] = self.config.dirs['hadoop_conf']
+ env['YARN_CONF_DIR'] = self.config.dirs['hadoop_conf']
+ env['YARN_LOG_DIR'] = self.config.dirs['yarn_log_dir']
+ env['HDFS_LOG_DIR'] = self.config.dirs['hdfs_log_dir']
+ env['HADOOP_LOG_DIR'] = self.config.dirs['hdfs_log_dir'] # for hadoop 2.2.0 only
env['MAPRED_LOG_DIR'] = '/var/log/hadoop/mapred' # should be moved to config, but could
env['MAPRED_PID_DIR'] = '/var/run/hadoop/mapred' # be destructive for mapreduce operation
- hadoop_env = self.dirs['hadoop_conf'] / 'hadoop-env.sh'
+ hadoop_env = self.config.dirs['hadoop_conf'] / 'hadoop-env.sh'
utils.re_edit_in_place(hadoop_env, {
r'export JAVA_HOME *=.*': 'export JAVA_HOME=%s' % java_home,
})
- core_site = self.dirs['hadoop_conf'] / 'core-site.xml'
+ core_site = self.config.dirs['hadoop_conf'] / 'core-site.xml'
with utils.xmlpropmap_edit_in_place(core_site) as props:
props["io.file.buffer.size"] = config["io_file_buffer_size"]
def config_for_hue(self):
# Set hue-specific properties in our hdfs|core-site.xml files
- hdfs_site = self.dirs['hadoop_conf'] / 'hdfs-site.xml'
+ hdfs_site = self.config.dirs['hadoop_conf'] / 'hdfs-site.xml'
with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
props['dfs.webhdfs.enabled'] = "true"
- core_site = self.dirs['hadoop_conf'] / 'core-site.xml'
+ core_site = self.config.dirs['hadoop_conf'] / 'core-site.xml'
with utils.xmlpropmap_edit_in_place(core_site) as props:
props['hadoop.proxyuser.hue.hosts'] = "*"
props['hadoop.proxyuser.hue.groups'] = "*"
@@ -245,15 +202,14 @@
:param str command: Command to run, prefixed with `bin/` or `sbin/`
:param list args: Additional args to pass to the command
"""
- return utils.run_as(user, self.dirs['hadoop'] / command, *args, **kwargs)
+ return utils.run_as(user,
+ self.config.dirs['hadoop'] / command,
+ *args, **kwargs)
class HDFS(object):
def __init__(self, hadoop_base):
self.hadoop_base = hadoop_base
- self.master_ports = hadoop_base.ports['hdfs']['master']
- self.secondarynamenode_ports = hadoop_base.ports['hdfs']['secondarynamenode']
- self.slave_ports = hadoop_base.ports['hdfs']['slave']
def stop_namenode(self):
self._hadoop_daemon('stop', 'namenode')
@@ -300,7 +256,7 @@
def configure_secondarynamenode(self):
"""
- Configure the Secondary Namenode when the apache-hadoop-hdfs-checkpoint
+ Configure the Secondary Namenode when the apache-hadoop-hdfs-secondary
charm is deployed and related to apache-hadoop-hdfs-master.
The only purpose of the secondary namenode is to perform periodic
@@ -314,7 +270,7 @@
# Set generic hdfs config
self.configure_hdfs_base(remote)
# Set secondarynamenode-specific config
- hdfs_site = self.hadoop_base.dirs['hadoop_conf'] / 'hdfs-site.xml'
+ hdfs_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'hdfs-site.xml'
with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
props['dfs.namenode.http-address'] = "{host}:50070".format(host=remote)
props['dfs.namenode.secondary.http-address'] = "{host}:50090".format(host=local)
@@ -334,10 +290,10 @@
def configure_hdfs_base(self, host):
config = hookenv.config()
- core_site = self.hadoop_base.dirs['hadoop_conf'] / 'core-site.xml'
+ core_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'core-site.xml'
with utils.xmlpropmap_edit_in_place(core_site) as props:
props['fs.defaultFS'] = "hdfs://{host}:8020".format(host=host)
- hdfs_site = self.hadoop_base.dirs['hadoop_conf'] / 'hdfs-site.xml'
+ hdfs_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'hdfs-site.xml'
with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
props['dfs.namenode.name.dir'] = Path(config['hadoop_dir_base']) / 'cache/hadoop/dfs/name'
props['dfs.namenode.handler.count'] = config['dfs_namenode_handler_count']
@@ -374,7 +330,7 @@
def register_slaves(self):
slaves = charmdata.kv.get('relations.ready', {}).get('datanode', {})
- slaves_file = self.hadoop_base.dirs['hadoop_conf'] / 'slaves'
+ slaves_file = self.hadoop_base.config.dirs['hadoop_conf'] / 'slaves'
slaves_file.write_lines(
[
'# DO NOT EDIT',
@@ -387,7 +343,8 @@
def _hadoop_daemon(self, command, service):
self.hadoop_base.run('hdfs', 'sbin/hadoop-daemon.sh',
- '--config', self.hadoop_base.dirs['hadoop_conf'],
+ '--config',
+ self.hadoop_base.config.dirs['hadoop_conf'],
command, service)
def _hdfs(self, command, *args):
@@ -397,8 +354,6 @@
class YARN(object):
def __init__(self, hadoop_base):
self.hadoop_base = hadoop_base
- self.master_ports = hadoop_base.ports['yarn']['master']
- self.slave_ports = hadoop_base.ports['yarn']['slave']
def stop_resourcemanager(self):
self._yarn_daemon('stop', 'resourcemanager')
@@ -434,7 +389,7 @@
if not charmdata.kv.get('yarn.resourcemanager.configured'):
host = self._local_host()
self.configure_yarn_base(host)
- yarn_site = self.hadoop_base.dirs['hadoop_conf'] / 'yarn-site.xml'
+ yarn_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'yarn-site.xml'
with utils.xmlpropmap_edit_in_place(yarn_site) as props:
props["yarn.nodemanager.address"] = "{host}:8050".format(host=host)
props["yarn.nodemanager.localizer.address"] = "{host}:8045".format(host=host)
@@ -462,7 +417,7 @@
def configure_yarn_base(self, host):
config = hookenv.config()
- yarn_site = self.hadoop_base.dirs['hadoop_conf'] / 'yarn-site.xml'
+ yarn_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'yarn-site.xml'
with utils.xmlpropmap_edit_in_place(yarn_site) as props:
props["yarn.resourcemanager.resource-tracker.address"] = "{host}:8031".format(host=host)
props["yarn.resourcemanager.scheduler.address"] = "{host}:8030".format(host=host)
@@ -472,7 +427,7 @@
props["yarn.nodemanager.aux-services"] = config["yarn_nodemanager_aux-services"]
props["yarn.nodemanager.aux-services.mapreduce.shuffle.class"] = \
config["yarn_nodemanager_aux-services_mapreduce_shuffle_class"]
- mapred_site = self.hadoop_base.dirs['hadoop_conf'] / 'mapred-site.xml'
+ mapred_site = self.hadoop_base.config.dirs['hadoop_conf'] / 'mapred-site.xml'
with utils.xmlpropmap_edit_in_place(mapred_site) as props:
props["mapreduce.jobhistory.address"] = "{host}:10020".format(host=host)
props["mapreduce.jobhistory.webapp.address"] = "{host}:19888".format(host=host)
@@ -495,11 +450,13 @@
def _yarn_daemon(self, command, service):
self.hadoop_base.run('yarn', 'sbin/yarn-daemon.sh',
- '--config', self.hadoop_base.dirs['hadoop_conf'],
+ '--config',
+ self.hadoop_base.config.dirs['hadoop_conf'],
command, service)
def _jobhistory_daemon(self, command, service):
# TODO refactor job history to separate class
self.hadoop_base.run('mapred', 'sbin/mr-jobhistory-daemon.sh',
- '--config', self.hadoop_base.dirs['hadoop_conf'],
+ '--config',
+ self.hadoop_base.config.dirs['hadoop_conf'],
command, service)
=== modified file 'charmhelpers/contrib/bigdata/utils.py'
--- charmhelpers/contrib/bigdata/utils.py 2015-03-13 17:52:52 +0000
+++ charmhelpers/contrib/bigdata/utils.py 2015-03-17 05:14:30 +0000
@@ -16,14 +16,134 @@
import re
import time
+import yaml
from contextlib import contextmanager
from subprocess import check_call, check_output, CalledProcessError
from xml.etree import ElementTree as ET
from xml.dom import minidom
from distutils.util import strtobool
-
from path import Path
+from charmhelpers.core import hookenv
+from charmhelpers.core import host
+from charmhelpers import fetch
+
+
+class DistConfig(object):
+ """This class processes distribution-specific configuration options.
+
+ Some configuration options are specific to the Hadoop distribution,
+ (e.g. Apache, Hortonworks, MapR, etc). These options are immutable and
+ must not change throughout the charm deployment lifecycle.
+
+ Helper methods are provided for keys that require action. Presently, this
+ includes adding/removing directories, dependent packages, and groups/users.
+ Other required keys may be listed when instantiating this class, but this
+ will only validate these keys exist in the yaml; it will not provide any
+ helper functionality for unkown keys.
+
+ Attributes:
+ filename File to process (default dist.yaml)
+ required_keys A list of keys required to be present in the yaml
+
+ Example dist.yaml with supported keys:
+ vendor: '<name>'
+ hadoop_version: '<version>'
+ packages:
+ - '<package 1>'
+ - '<package 2>'
+ groups:
+ - '<name>'
+ users:
+ <user 1>:
+ groups: ['<primary>', '<group>', '<group>']
+ <user 2>:
+ groups: ['<primary>']
+ dirs:
+ <dir 1>:
+ path: '</path/to/dir>'
+ perms: 0777
+ <dir 2>:
+ path: '{config[<option>]}' # value comes from config option
+ owner: '<user>'
+ group: '<group>'
+ perms: 0755
+ ports:
+ <service>:
+ <role 1>: [<port 1>, <port 2>]
+ <role 2>: [<port 1>]
+ """
+ def __init__(self, filename='dist.yaml', required_keys=None):
+ self.yaml_file = filename
+ self.dist_config = yaml.load(Path(self.yaml_file).text())
+
+ # validate dist.yaml
+ missing_keys = set(required_keys) - set(self.dist_config.keys())
+ if missing_keys:
+ raise ValueError('{} is missing required option{}: {}'.format(
+ filename,
+ 's' if len(missing_keys) > 1 else '',
+ ', '.join(missing_keys)))
+
+ for opt in required_keys:
+ setattr(self, opt, self.dist_config[opt])
+
+ # If we have dirs, create a managed_dirs dict that contains the info
+ # we'll need to create each directory (owner/group/permissions).
+ if self.dirs:
+ self.managed_dirs = self.dirs
+ for name, details in self.managed_dirs.items():
+ # Change path refs to Paths to make future path ops easier
+ details['path'] = self._expand_path(details['path'])
+ # We only care about the dir path now, so reset dirs with only the
+ # name and path as key-value pairs.
+ self.dirs = {name: details['path'] for name, details in self.managed_dirs.items()}
+
+ def _expand_path(self, path):
+ return Path(path.format(config=hookenv.config()))
+
+ def add_dirs(self):
+ for name, details in self.managed_dirs.items():
+ host.mkdir(
+ details['path'],
+ owner=details.get('owner', 'root'),
+ group=details.get('group', 'root'),
+ perms=details.get('perms', 0o755))
+
+ def add_packages(self):
+ with disable_firewall():
+ fetch.apt_update()
+ fetch.apt_install(self.packages)
+
+ def add_users(self):
+ for group in self.groups:
+ host.add_group(group)
+ for username, details in self.users.items():
+ primary_group = None
+ groups = details.get('groups', [])
+ if groups:
+ primary_group = groups[0]
+ host.adduser(username, group=primary_group)
+ for group in groups:
+ host.add_user_to_group(username, group)
+
+ def remove_dirs(self):
+ # TODO: no removal function exists in CH, just log what we would do.
+ for name in self.dirs.items():
+ hookenv.log('noop: remove directory {0}'.format(name))
+
+ def remove_packages(self):
+ # TODO: no removal function exists in CH, just log what we would do.
+ for name in self.packages.items():
+ hookenv.log('noop: remove package {0}'.format(name))
+
+ def remove_users(self):
+ # TODO: no removal function exists in CH, just log what we would do.
+ for user in self.users.items():
+ hookenv.log('noop: remove user {0}'.format(user))
+ for group in self.groups:
+ hookenv.log('noop: remove group {0}'.format(group))
+
@contextmanager
def disable_firewall():
Follow ups