← Back to team overview

bigdata-dev team mailing list archive

[Merge] lp:~bigdata-dev/charms/trusty/apache-flume-hdfs/trunk into lp:charms/trusty/apache-flume-hdfs

 

Kevin W Monroe has proposed merging lp:~bigdata-dev/charms/trusty/apache-flume-hdfs/trunk into lp:charms/trusty/apache-flume-hdfs.

Requested reviews:
  Juju Big Data Development (bigdata-dev)

For more details, see:
https://code.launchpad.net/~bigdata-dev/charms/trusty/apache-flume-hdfs/trunk/+merge/271902
-- 
Your team Juju Big Data Development is requested to review the proposed merge of lp:~bigdata-dev/charms/trusty/apache-flume-hdfs/trunk into lp:charms/trusty/apache-flume-hdfs.
=== modified file 'README.md'
--- README.md	2015-08-25 05:52:44 +0000
+++ README.md	2015-09-22 03:36:37 +0000
@@ -53,17 +53,16 @@
 unit, locate an event, and cat it:
 
     juju ssh flume-hdfs/0
-    hdfs dfs -ls /user/flume/<event_dir>  # <-- find a date
-    hdfs dfs -ls /user/flume/<event_dir>/yy-mm-dd  # <-- find an hour
-    hdfs dfs -ls /user/flume/<event_dir>/yy-mm-dd/HH  # <-- find an event
-    hdfs dfs -cat /user/flume/<event_dir>/yy-mm-dd/HH/FlumeData.<id>
+    hdfs dfs -ls /user/flume/<event_dir>               # <-- find a date
+    hdfs dfs -ls /user/flume/<event_dir>/<yyyy-mm-dd>  # <-- find an event
+    hdfs dfs -cat /user/flume/<event_dir>/<yyyy-mm-dd>/FlumeData.<id>
 
 This process works well for data serialized in `text` format (the default).
 For data serialized in `avro` format, you'll need to copy the file locally
 and use the `dfs -text` command. For example, replace the `dfs -cat` command
 from above with the following to view files stored in `avro` format:
 
-    hdfs dfs -copyToLocal /user/flume/<event_dir>/yy-mm-dd/HH/FlumeData.<id> /home/ubuntu/myFile.txt
+    hdfs dfs -copyToLocal /user/flume/<event_dir>/<yyyy-mm-dd>/FlumeData.<id> /home/ubuntu/myFile.txt
     hdfs dfs -text file:///home/ubuntu/myFile.txt
 
 

=== added directory 'actions'
=== added file 'actions.yaml'
--- actions.yaml	1970-01-01 00:00:00 +0000
+++ actions.yaml	2015-09-22 03:36:37 +0000
@@ -0,0 +1,6 @@
+start-flume:
+    description: Start the Flume-HDFS daemon
+stop-flume:
+    description: Stop the Flume-HDFS daemon
+restart-flume:
+    description: Restart the Flume-HDFS daemon

=== added file 'actions/restart-flume'
--- actions/restart-flume	1970-01-01 00:00:00 +0000
+++ actions/restart-flume	2015-09-22 03:36:37 +0000
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+ps -ef | grep "flume-ng" | awk '{print $2}' | xargs kill -9
+sleep 2
+su flume -c "/usr/lib/flume-hdfs/bin/flume-ng agent -c /etc/flume-hdfs/conf -f /etc/flume-hdfs-conf/flume.conf -n a1"

=== added file 'actions/start-flume'
--- actions/start-flume	1970-01-01 00:00:00 +0000
+++ actions/start-flume	2015-09-22 03:36:37 +0000
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+su flume -c "/usr/lib/flume-hdfs/bin/flume-ng agent -c /etc/flume-hdfs/conf -f /etc/flume-hdfs-conf/flume.conf -n a1"

=== added file 'actions/stop-flume'
--- actions/stop-flume	1970-01-01 00:00:00 +0000
+++ actions/stop-flume	2015-09-22 03:36:37 +0000
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+ps -ef | grep "flume-ng" | awk '{print $2}' | xargs kill -9

=== modified file 'config.yaml'
--- config.yaml	2015-08-25 05:52:44 +0000
+++ config.yaml	2015-09-22 03:36:37 +0000
@@ -6,16 +6,42 @@
         URL from which to fetch resources (e.g., Hadoop binaries) instead
         of Launchpad.
     channel_capacity:
-      type: string
-      default: '1000'
+      type: int
+      default: 1000
       description: |
         The maximum number of events stored in the channel.
     channel_transaction_capacity:
-      type: string
-      default: '100'
+      type: int
+      default: 100
       description: |
         The maximum number of events the channel will take from a source or
         give to a sink per transaction.
+    dfs_replication:
+      type: int
+      default: 3
+      description: |
+        The DFS replication value. The default (3) is the same as the Namenode
+        provided by apache-hadoop-hdfs-master, but may be overriden for this
+        service.
+    roll_count:
+      type: int
+      default: 0
+      description: |
+        Number of events written to file before it is rolled. A value of 0 (the
+        default) means never roll based on number of events.
+    roll_interval:
+      type: int
+      default: 300
+      description: |
+        Number of seconds to wait before rolling the current file. Default
+        will roll the file after 5 minutes. A value of 0 means never roll
+        based on a time interval.
+    roll_size:
+      type: string
+      default: '10000000'
+      description: |
+        File size to trigger roll, in bytes. Default will roll the file once
+        it reaches 10 MB. A value of 0 means never roll based on file size.
     protocol:
       type: string
       default: 'avro'
@@ -35,7 +61,7 @@
         Specify the serializer used when the sink writes to HDFS. Either
         'avro_event' or 'text' are supported.
     source_port:
-      type: string
-      default: '4141'
+      type: int
+      default: 4141
       description: |
         Port on which the agent source is listening.

=== added file 'resources/python/jujuresources-0.2.11.tar.gz'
Binary files resources/python/jujuresources-0.2.11.tar.gz	1970-01-01 00:00:00 +0000 and resources/python/jujuresources-0.2.11.tar.gz	2015-09-22 03:36:37 +0000 differ
=== removed file 'resources/python/jujuresources-0.2.9.tar.gz'
Binary files resources/python/jujuresources-0.2.9.tar.gz	2015-07-22 14:25:27 +0000 and resources/python/jujuresources-0.2.9.tar.gz	1970-01-01 00:00:00 +0000 differ
=== modified file 'templates/flume.conf.j2'
--- templates/flume.conf.j2	2015-07-28 20:18:23 +0000
+++ templates/flume.conf.j2	2015-09-22 03:36:37 +0000
@@ -14,16 +14,19 @@
 
 # channel properties
 a1.channels.c1.type = memory
-a1.channels.c1.capacity = {{ config['channel_capacity']}}
-a1.channels.c1.transactionCapacity = {{ config['channel_transaction_capacity']}}
+a1.channels.c1.capacity = {{ config['channel_capacity'] }}
+a1.channels.c1.transactionCapacity = {{ config['channel_transaction_capacity'] }}
 
 # sink properties
 a1.sinks.k1.type = hdfs
 a1.sinks.k1.channel = c1
 a1.sinks.k1.hdfs.fileType = DataStream
 a1.sinks.k1.hdfs.writeFormat = Text
-a1.sinks.k1.hdfs.path = /user/flume/%{event_dir}/%y-%m-%d/%H
+a1.sinks.k1.hdfs.path = /user/flume/%{event_dir}/%Y-%m-%d
 a1.sinks.k1.hdfs.useLocalTimeStamp = true
+a1.sinks.k1.hdfs.rollCount = {{ config['roll_count'] }}
+a1.sinks.k1.hdfs.rollInterval = {{ config['roll_interval'] }}
+a1.sinks.k1.hdfs.rollSize = {{ config['roll_size'] }}
 a1.sinks.k1.serializer = {{ config['sink_serializer'] }}
 a1.sinks.k1.serializer.compressionCodec = {{ config['sink_compression'] }}
 {% if config['sink_serializer'] == 'avro_event' %}


Follow ups