← Back to team overview

graphite-dev team mailing list archive

[Merge] lp:~ziggy/graphite/whitelist into lp:graphite

 

ziggy has proposed merging lp:~ziggy/graphite/whitelist into lp:graphite.

Requested reviews:
  graphite-dev (graphite-dev)

For more details, see:
https://code.launchpad.net/~ziggy/graphite/whitelist/+merge/45710

We would be completely overwhelmed by the volume of metrics flowing into Graphite if we accepted everything that came our way.  I can't control what is sent to Graphite, but I can control what is accepted into Graphite.  To implement that, we've created whitelist and blacklist functionality that respectively allows or denies metrics entry into Graphite.  This code is based on code that has been running in production for months with several thousand metrics per minute after filtering.

Each list is a file with one regular expression per line.  If a metric matches a regex in the blacklist it is dropped.  Only if a metric matches a regex in the whitelist will it be accepted by carbon.

For example, a whitelist consisting of the line:
^server\.load\.
will allow any metric that starts with "server.load." into carbon, unless it also matches a regex in the blacklist.

To test, uncomment the WHITELIST and/or BLACKLIST lines in carbon.conf and add one or more regular expressions to graphite/conf/whitelist and/or graphite/conf/blacklist.  Restart carbon for your changes to take effect.  Metrics which are rejected are logged in listener.log.
-- 
https://code.launchpad.net/~ziggy/graphite/whitelist/+merge/45710
Your team graphite-dev is requested to review the proposed merge of lp:~ziggy/graphite/whitelist into lp:graphite.
=== modified file 'carbon/bin/carbon-cache.py'
--- carbon/bin/carbon-cache.py	2010-12-31 03:43:36 +0000
+++ carbon/bin/carbon-cache.py	2011-01-10 16:23:26 +0000
@@ -57,16 +57,6 @@
   sys.exit(1)
 
 
-# Import application components
-from carbon.conf import settings
-from carbon.log import logToStdout, logToDir
-from carbon.listeners import MetricLineReceiver, MetricPickleReceiver, CacheQueryHandler, startListener
-from carbon.cache import MetricCache
-from carbon.writer import startWriter
-from carbon.instrumentation import startRecordingCacheMetrics
-from carbon.events import metricReceived
-
-
 # Parse command line options
 parser = optparse.OptionParser(usage='%prog [options] <start|stop|status>')
 parser.add_option('--debug', action='store_true', help='Run in the foreground, log to stdout')
@@ -131,9 +121,16 @@
   print "Pidfile %s already exists, is %s already running?" % (options.pidfile, program)
   raise SystemExit(1)
 
-
-# Read config (we want failures to occur before daemonizing)
+# Import application components
+from carbon.conf import settings
+# Read config (we want failures to occur before daemonizing) and settings is needed in some imported modules below
 settings.readFrom(options.config, 'cache')
+from carbon.log import logToStdout, logToDir
+from carbon.listeners import MetricLineReceiver, MetricPickleReceiver, CacheQueryHandler, startListener
+from carbon.cache import MetricCache
+from carbon.writer import startWriter
+from carbon.instrumentation import startRecordingCacheMetrics
+from carbon.events import metricReceived
 
 use_amqp = settings.get("ENABLE_AMQP", False)
 if use_amqp:

=== added file 'carbon/conf/blacklist'
=== modified file 'carbon/conf/carbon.conf.example'
--- carbon/conf/carbon.conf.example	2010-12-31 07:19:18 +0000
+++ carbon/conf/carbon.conf.example	2011-01-10 16:23:26 +0000
@@ -40,6 +40,11 @@
 # degrade performance if logging on the same volume as the whisper data is stored.
 LOG_UPDATES = True
 
+# Adding regular expressions to the whitelist makes carbon accept only matching metrics
+#WHITELIST = /opt/graphite/conf/whitelist
+
+# Adding regular expressions to the blacklist prevents carbon from accepting matching metrics
+#BLACKLIST = /opt/graphite/conf/blacklist
 
 # Enable AMQP if you want to receve metrics using an amqp broker
 # ENABLE_AMQP = False

=== added file 'carbon/conf/whitelist'
=== modified file 'carbon/lib/carbon/listeners.py'
--- carbon/lib/carbon/listeners.py	2010-06-06 16:24:30 +0000
+++ carbon/lib/carbon/listeners.py	2011-01-10 16:23:26 +0000
@@ -7,12 +7,19 @@
 from carbon.instrumentation import increment
 from carbon.events import metricReceived
 from carbon import log
+from carbon.conf import settings
+import regexlist
 
 try:
   import cPickle as pickle
 except ImportError:
   import pickle
 
+# Load whitelist and blacklist
+if 'WHITELIST' in settings:
+	whitelist = regexlist.RegexList(settings.WHITELIST)
+if 'BLACKLIST' in settings:
+	blacklist = regexlist.RegexList(settings.BLACKLIST)
 
 class LoggingMixin:
   def connectionMade(self):
@@ -38,6 +45,14 @@
       log.listener('invalid line received from client %s, ignoring' % self.peerAddr)
       return
 
+    if 'BLACKLIST' in settings and blacklist.in_list(metric):
+      log.listener('ignoring metric %s which is blacklisted' % metric)
+      return
+
+    if 'WHITELIST' in settings and not whitelist.in_list(metric):
+      log.listener('ignoring metric %s which is not whitelisted' % metric)
+      return
+
     increment('metricsReceived')
     metricReceived(metric, datapoint)
 
@@ -56,10 +71,22 @@
       try:
         datapoint = ( float(datapoint[0]), float(datapoint[1]) ) #force proper types
       except:
-        continue
-
-      if datapoint[1] == datapoint[1]: # filter out NaN values
-        metricReceived(metric, datapoint)
+        log.listener('invalid pickle received from client %s, ignoring' % self.peerAddr)
+        continue
+
+      if 'BLACKLIST' in settings and blacklist.in_list(metric):
+        log.listener('ignoring metric %s which is blacklisted' % metric)
+        continue
+
+      if 'WHITELIST' in settings and not whitelist.in_list(metric):
+        log.listener('ignoring metric %s which is not whitelisted' % metric)
+        continue
+
+      if datapoint[1] != datapoint[1]: # filter out NaN values
+        log.listener('ingoring metric with NaN value, %s' % metric)
+        continue
+
+      metricReceived(metric, datapoint)
 
     increment('metricsReceived', len(datapoints))
 


Follow ups