From c75daf892bb6d7146a79639783e37f9d6d855dc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Holm=C3=A9n?= Date: Sun, 27 Apr 2014 01:07:15 +0200 Subject: [PATCH 1/4] Added collector for metrics from flume --- collectors/0/flume.py | 101 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100755 collectors/0/flume.py diff --git a/collectors/0/flume.py b/collectors/0/flume.py new file mode 100755 index 00000000..c4d68f7b --- /dev/null +++ b/collectors/0/flume.py @@ -0,0 +1,101 @@ +#!/usr/bin/python +# This file is part of tcollector. +# Copyright (C) 2011-2013 The tcollector Authors. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. This program is distributed in the hope that it +# will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty +# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +# General Public License for more details. You should have received a copy +# of the GNU Lesser General Public License along with this program. If not, +# see . + +"""flume collector""" +# Get metrics from flume. Tested with flume-ng 1.4.0 only + +import errno +import httplib +try: + import json +except ImportError: + json = None # Handled gracefully in main. Not available by default in <2.6 +import socket +import sys +import time + +from collectors.lib import utils + +# Need to config flume-ng to spit out metrics over http +# See http://flume.apache.org/FlumeUserGuide.html#reporting-metrics-from-custom-components + +COLLECTION_INTERVAL = 15 # seconds +DEFAULT_TIMEOUT = 10.0 # seconds +FLUME_HOST = "localhost" +FLUME_PORT = 34545 + +# Exclude values that are not really metrics +EXCLUDE = [ 'StartTime', 'StopTime', 'Type' ] + +def err(msg): + print >>sys.stderr, msg + +class FlumeError(RuntimeError): + """Exception raised if we don't get a 200 OK from Flume Monitor Server.""" + + def __init__(self, resp): + RuntimeError.__init__(self, str(resp)) + self.resp = resp + +def request(server, uri): + """Does a GET request of the given uri on the given HTTPConnection.""" + server.request("GET", uri) + resp = server.getresponse() + if resp.status != httplib.OK: + raise FlumError(resp) + return json.loads(resp.read()) + + +def flume_metrics(server): + return request(server, "/metrics") + +def main(argv): + utils.drop_privileges() + socket.setdefaulttimeout(DEFAULT_TIMEOUT) + server = httplib.HTTPConnection(FLUME_HOST, FLUME_PORT) + try: + server.connect() + except socket.error, (erno, e): + if erno == errno.ECONNREFUSED: + return 13 # No Flume Monitoring Service, ask tcollector to not respawn us. + raise + if json is None: + err("This collector requires the `json' Python module.") + return 1 + + def printmetric(metric, value, **tags): + if tags: + tags = " " + " ".join("%s=%s" % (name, value) + for name, value in tags.iteritems()) + else: + tags = "" + print ("flume.%s %d %s %s" % (metric, ts, value, tags)) + + while True: + # Get the metrics + ts = int(time.time()) # In case last call took a while. + stats = flume_metrics(server) + + for metric in stats: + (component, name) = metric.split(".") + tags = {component.lower(): name} + for key,value in stats[metric].items(): + if key not in EXCLUDE: + printmetric(key.lower(), value, **tags) + + time.sleep(COLLECTION_INTERVAL) + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) From 4ab418df78b7bbfe1d7c8223f59b82537352d6a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Holm=C3=A9n?= Date: Sun, 27 Apr 2014 01:19:53 +0200 Subject: [PATCH 2/4] Added a .gitignore to skip files that are generated when deb packages are built --- .gitignore | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..cdfd061a --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +debian/files +debian/files +debian/tcollector.debhelper.log +debian/tcollector.postinst.debhelper +debian/tcollector.postrm.debhelper +debian/tcollector.prerm.debhelper +debian/tcollector.substvars +debian/tcollector/ + From 0482555025e76a8803dea63ea35dbd65f435f271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Holm=C3=A9n?= Date: Sun, 27 Apr 2014 01:22:15 +0200 Subject: [PATCH 3/4] Cleaned up and made the link for setting up JSON metrics on flume point to the right URL --- collectors/0/flume.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/collectors/0/flume.py b/collectors/0/flume.py index c4d68f7b..d3315df7 100755 --- a/collectors/0/flume.py +++ b/collectors/0/flume.py @@ -1,19 +1,19 @@ #!/usr/bin/python -# This file is part of tcollector. -# Copyright (C) 2011-2013 The tcollector Authors. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. This program is distributed in the hope that it -# will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty -# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser -# General Public License for more details. You should have received a copy -# of the GNU Lesser General Public License along with this program. If not, -# see . - -"""flume collector""" -# Get metrics from flume. Tested with flume-ng 1.4.0 only + +""" + flume stats collector + +Connect to flume agents over http and gather metrics +and make them suitable for OpenTSDB to consume + +Need to config flume-ng to spit out json formatted metrics over http +See http://flume.apache.org/FlumeUserGuide.html#json-reporting + +Tested with flume-ng 1.4.0 only. So far + +Based on the elastichsearch collector + +""" import errno import httplib @@ -27,23 +27,20 @@ from collectors.lib import utils -# Need to config flume-ng to spit out metrics over http -# See http://flume.apache.org/FlumeUserGuide.html#reporting-metrics-from-custom-components COLLECTION_INTERVAL = 15 # seconds DEFAULT_TIMEOUT = 10.0 # seconds FLUME_HOST = "localhost" FLUME_PORT = 34545 -# Exclude values that are not really metrics +# Exclude values that are not really metrics and totally pointless to keep track of EXCLUDE = [ 'StartTime', 'StopTime', 'Type' ] def err(msg): print >>sys.stderr, msg class FlumeError(RuntimeError): - """Exception raised if we don't get a 200 OK from Flume Monitor Server.""" - + """Exception raised if we don't get a 200 OK from Flume webserver.""" def __init__(self, resp): RuntimeError.__init__(self, str(resp)) self.resp = resp @@ -68,7 +65,7 @@ def main(argv): server.connect() except socket.error, (erno, e): if erno == errno.ECONNREFUSED: - return 13 # No Flume Monitoring Service, ask tcollector to not respawn us. + return 13 # No Flume server available, ask tcollector to not respawn us. raise if json is None: err("This collector requires the `json' Python module.") From 37efc982912c42b5cb5a75bd629f76fb96f51a25 Mon Sep 17 00:00:00 2001 From: Jonathan Creasy Date: Wed, 24 Sep 2014 10:48:05 -0500 Subject: [PATCH 4/4] Added configuration for proposed Flume collector --- collectors/0/flume.py | 21 +++++++++++++++++++++ collectors/etc/flume_conf.py | 13 +++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 collectors/etc/flume_conf.py diff --git a/collectors/0/flume.py b/collectors/0/flume.py index d3315df7..711ae143 100755 --- a/collectors/0/flume.py +++ b/collectors/0/flume.py @@ -27,6 +27,10 @@ from collectors.lib import utils +try: + from collectors.etc import flume_conf +except ImportError: + flume_conf = None COLLECTION_INTERVAL = 15 # seconds DEFAULT_TIMEOUT = 10.0 # seconds @@ -58,6 +62,23 @@ def flume_metrics(server): return request(server, "/metrics") def main(argv): + if not (flume_conf and flume_conf.enabled() and flume_conf.get_settings()): + sys.exit(13) + + settings = flume_conf.get_settings() + + if (settings['default_timeout']): + DEFAULT_TIMEOUT = settings['default_timeout'] + + if (settings['default_timeout']): + COLLECTION_INTERVAL = settings['collection_interval'] + + if (settings['flume_host']): + FLUME_HOST = settings['flume_host'] + + if (settings['flume_port']): + FLUME_PORT = settings['flume_port'] + utils.drop_privileges() socket.setdefaulttimeout(DEFAULT_TIMEOUT) server = httplib.HTTPConnection(FLUME_HOST, FLUME_PORT) diff --git a/collectors/etc/flume_conf.py b/collectors/etc/flume_conf.py new file mode 100644 index 00000000..e1acd861 --- /dev/null +++ b/collectors/etc/flume_conf.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python + +def enabled(): + return False + +def get_settings(): + """Flume Connection Details""" + return { + 'flume_host': "localhost", # Flume Host to Connect to + 'flume_port': 34545, # Flume Port to connect to + 'collection_interval': 15, # seconds, How often to collect metric data + 'default_timeout': 10.0 # seconds + }