diff --git a/collectors/0/couchbase.py b/collectors/0/couchbase.py
index 46dc2e17..572a0b92 100755
--- a/collectors/0/couchbase.py
+++ b/collectors/0/couchbase.py
@@ -21,154 +21,162 @@
COLLECTION_INTERVAL = CONFIG['collection_interval']
COUCHBASE_INITFILE = CONFIG['couchbase_initfile']
-KEYS = frozenset( [
- 'bucket_active_conns',
- 'cas_hits',
- 'cas_misses',
- 'cmd_get',
- 'cmd_set',
- 'curr_connections',
- 'curr_conns_on_port_11209',
- 'curr_conns_on_port_11210',
- 'ep_queue_size',
- 'ep_num_value_ejects',
- 'ep_num_eject_failures',
- 'ep_oom_errors',
- 'ep_tmp_oom_errors',
- 'get_hits',
- 'get_misses',
- 'mem_used',
- 'total_connections',
- 'total_heap_bytes',
- 'total_free_bytes',
- 'total_allocated_bytes',
- 'total_fragmentation_bytes',
- 'tcmalloc_current_thread_cache_bytes',
- 'tcmalloc_max_thread_cache_bytes',
- 'tcmalloc_unmapped_bytes',
- ] )
+KEYS = frozenset([
+ 'bucket_active_conns',
+ 'cas_hits',
+ 'cas_misses',
+ 'cmd_get',
+ 'cmd_set',
+ 'curr_connections',
+ 'curr_conns_on_port_11209',
+ 'curr_conns_on_port_11210',
+ 'ep_queue_size',
+ 'ep_num_value_ejects',
+ 'ep_num_eject_failures',
+ 'ep_oom_errors',
+ 'ep_tmp_oom_errors',
+ 'get_hits',
+ 'get_misses',
+ 'mem_used',
+ 'total_connections',
+ 'total_heap_bytes',
+ 'total_free_bytes',
+ 'total_allocated_bytes',
+ 'total_fragmentation_bytes',
+ 'tcmalloc_current_thread_cache_bytes',
+ 'tcmalloc_max_thread_cache_bytes',
+ 'tcmalloc_unmapped_bytes',
+])
+
def find_couchbase_pid():
- """Find out the pid of couchbase"""
- if not os.path.isfile(COUCHBASE_INITFILE):
- return
-
- try:
- fd = open(COUCHBASE_INITFILE)
- for line in fd:
- if line.startswith("exec"):
- init_script = line.split()[1]
- fd.close()
- except IOError:
- utils.err("Check permission of file (%s)" % COUCHBASE_INITFILE)
- return
-
- try:
- fd = open(init_script)
- for line in fd:
- if line.startswith("PIDFILE"):
- pid_file = line.split("=")[1].rsplit()[0]
- fd.close()
- except IOError:
- utils.err("Check permission of file (%s)" % init_script)
- return
-
- try:
- fd = open(pid_file)
- pid = fd.read()
- fd.close()
- except IOError:
- utils.err("Couchbase-server is not running, since no pid file exists")
- sys.exit(13)
-
- return pid.split()[0]
+ """Find out the pid of couchbase"""
+ if not os.path.isfile(COUCHBASE_INITFILE):
+ return
+
+ try:
+ fd = open(COUCHBASE_INITFILE)
+ for line in fd:
+ if line.startswith("exec"):
+ init_script = line.split()[1]
+ fd.close()
+ except IOError:
+ utils.err("Check permission of file (%s)" % COUCHBASE_INITFILE)
+ return
+
+ try:
+ fd = open(init_script)
+ for line in fd:
+ if line.startswith("PIDFILE"):
+ pid_file = line.split("=")[1].rsplit()[0]
+ fd.close()
+ except IOError:
+ utils.err("Check permission of file (%s)" % init_script)
+ return
+
+ try:
+ fd = open(pid_file)
+ pid = fd.read()
+ fd.close()
+ except IOError:
+ utils.err("Couchbase-server is not running, since no pid file exists")
+ sys.exit(13)
+
+ return pid.split()[0]
+
def find_conf_file(pid):
- """Returns config file for couchbase-server."""
- try:
- fd = open('/proc/%s/cmdline' % pid)
- except IOError as e:
- utils.err("Couchbase (pid %s) went away ? %s" % (pid, e))
- return
- try:
- config = fd.read().split("config_path")[1].split("\"")[1]
- return config
- finally:
- fd.close()
+ """Returns config file for couchbase-server."""
+ try:
+ fd = open('/proc/%s/cmdline' % pid)
+ except IOError as e:
+ utils.err("Couchbase (pid %s) went away ? %s" % (pid, e))
+ return
+ try:
+ config = fd.read().split("config_path")[1].split("\"")[1]
+ return config
+ finally:
+ fd.close()
+
def find_bindir_path(config_file):
- """Returns the bin directory path"""
- try:
- fd = open(config_file)
- except IOError as e:
- utils.err("Error for Config file (%s): %s" % (config_file, e))
- return None
- try:
- for line in fd:
- if line.startswith("{path_config_bindir"):
- return line.split(",")[1].split("\"")[1]
- finally:
- fd.close()
+ """Returns the bin directory path"""
+ try:
+ fd = open(config_file)
+ except IOError as e:
+ utils.err("Error for Config file (%s): %s" % (config_file, e))
+ return None
+ try:
+ for line in fd:
+ if line.startswith("{path_config_bindir"):
+ return line.split(",")[1].split("\"")[1]
+ finally:
+ fd.close()
+
def list_bucket(bin_dir):
- """Returns the list of memcached or membase buckets"""
- buckets = []
- if not os.path.isfile("%s/couchbase-cli" % bin_dir):
+ """Returns the list of memcached or membase buckets"""
+ buckets = []
+ if not os.path.isfile("%s/couchbase-cli" % bin_dir):
+ return buckets
+ cli = ("%s/couchbase-cli" % bin_dir)
+ try:
+ buck = subprocess.check_output([cli, "bucket-list", "--cluster",
+ "localhost:8091"])
+ except subprocess.CalledProcessError:
+ return buckets
+ regex = re.compile("[\s\w]+:[\s\w]+$")
+ for i in buck.splitlines():
+ if not regex.match(i):
+ buckets.append(i)
return buckets
- cli = ("%s/couchbase-cli" % bin_dir)
- try:
- buck = subprocess.check_output([cli, "bucket-list", "--cluster",
- "localhost:8091"])
- except subprocess.CalledProcessError:
- return buckets
- regex = re.compile("[\s\w]+:[\s\w]+$")
- for i in buck.splitlines():
- if not regex.match(i):
- buckets.append(i)
- return buckets
+
def collect_stats(bin_dir, bucket):
- """Returns statistics related to a particular bucket"""
- if not os.path.isfile("%s/cbstats" % bin_dir):
- return
- cli = ("%s/cbstats" % bin_dir)
- try:
- ts = time.time()
- stats = subprocess.check_output([cli, "localhost:11211", "-b", bucket,
- "all"])
- except subprocess.CalledProcessError:
- return
- for stat in stats.splitlines():
- metric = stat.split(":")[0].lstrip(" ")
- value = stat.split(":")[1].lstrip(" \t")
- if metric in KEYS:
- print("couchbase.%s %i %s bucket=%s" % (metric, ts, value, bucket))
+ """Returns statistics related to a particular bucket"""
+ if not os.path.isfile("%s/cbstats" % bin_dir):
+ return
+ cli = ("%s/cbstats" % bin_dir)
+ try:
+ ts = time.time()
+ stats = subprocess.check_output([cli, "localhost:11211", "-b", bucket,
+ "all"])
+ except subprocess.CalledProcessError:
+ return
+ for stat in stats.splitlines():
+ metric = stat.split(":")[0].lstrip(" ")
+ value = stat.split(":")[1].lstrip(" \t")
+ if metric in KEYS:
+ print("couchbase.%s %i %s bucket=%s" % (metric, ts, value, bucket))
+
def main():
- utils.drop_privileges()
- pid = find_couchbase_pid()
- if not pid:
- utils.err("Error: Either couchbase-server is not running or file (%s)"
- " doesn't exist" % COUCHBASE_INITFILE)
- return 13
-
- conf_file = find_conf_file(pid)
- if not conf_file:
- utils.err("Error: Can't find config file (%s)" % conf_file)
- return 13
-
- bin_dir = find_bindir_path(conf_file)
- if not bin_dir:
- utils.err("Error: Can't find bindir path in config file")
- return 13
-
- while True:
- # Listing bucket everytime so as to start collecting datapoints
- # of any new bucket.
- buckets = list_bucket(bin_dir)
- for b in buckets:
- collect_stats(bin_dir, b)
- time.sleep(COLLECTION_INTERVAL)
+ utils.drop_privileges()
+ pid = find_couchbase_pid()
+ if not pid:
+ utils.err("Error: Either couchbase-server is not running or file (%s)"
+ " doesn't exist" % COUCHBASE_INITFILE)
+ return 13 # ask tcollector to not respawn us
+
+ conf_file = find_conf_file(pid)
+ if not conf_file:
+ utils.err("Error: Can't find config file (%s)" % conf_file)
+ return 13
+
+ bin_dir = find_bindir_path(conf_file)
+ if not bin_dir:
+ utils.err("Error: Can't find bindir path in config file")
+ return 13
+
+ while True:
+ # Listing bucket everytime so as to start collecting datapoints
+ # of any new bucket.
+ buckets = list_bucket(bin_dir)
+ for b in buckets:
+ collect_stats(bin_dir, b)
+ sys.stdout.flush()
+ time.sleep(COLLECTION_INTERVAL)
+
if __name__ == "__main__":
- sys.exit(main())
+ sys.exit(main())
diff --git a/collectors/0/dfstat.py b/collectors/0/dfstat.py
index fe4d7f2f..57b0b565 100755
--- a/collectors/0/dfstat.py
+++ b/collectors/0/dfstat.py
@@ -39,115 +39,115 @@
# File system types to ignore
FSTYPE_IGNORE = frozenset([
- "cgroup",
- "debugfs",
- "devtmpfs",
- "nfs",
- "rpc_pipefs",
- "rootfs",
+ "cgroup",
+ "debugfs",
+ "devtmpfs",
+ "nfs",
+ "rpc_pipefs",
+ "rootfs",
])
+
def main():
- """dfstats main loop"""
- try:
- f_mounts = open("/proc/mounts", "r")
- except IOError as e:
- utils.err("error: can't open /proc/mounts: %s" % e)
- return 13 # Ask tcollector to not respawn us
-
- utils.drop_privileges()
-
- while True:
- devices = []
- f_mounts.seek(0)
- ts = int(time.time())
-
- for line in f_mounts:
- # Docs come from the fstab(5)
- # fs_spec # Mounted block special device or remote filesystem
- # fs_file # Mount point
- # fs_vfstype # File system type
- # fs_mntops # Mount options
- # fs_freq # Dump(8) utility flags
- # fs_passno # Order in which filesystem checks are done at reboot time
- try:
- fs_spec, fs_file, fs_vfstype, fs_mntops, fs_freq, fs_passno = line.split(None)
- except ValueError as e:
- utils.err("error: can't parse line at /proc/mounts: %s" % e)
- continue
-
- if fs_spec == "none":
- continue
- elif fs_vfstype in FSTYPE_IGNORE or fs_vfstype.startswith("fuse."):
- continue
- # startswith(tuple) avoided to preserve support of Python 2.4
- elif fs_file.startswith("/dev") or fs_file.startswith("/sys") or \
- fs_file.startswith("/proc") or fs_file.startswith("/lib") or \
- fs_file.startswith("net:") or fs_file.startswith("/var/lib/kubelet"):
- continue
-
- # keep /dev/xxx device with shorter fs_file (remove mount binds)
- device_found = False
- if fs_spec.startswith("/dev"):
+ """dfstats main loop"""
+ try:
+ f_mounts = open("/proc/mounts", "r")
+ except IOError as e:
+ utils.err("error: can't open /proc/mounts: %s" % e)
+ return 13 # Ask tcollector to not respawn us
+
+ utils.drop_privileges()
+
+ while True:
+ devices = []
+ f_mounts.seek(0)
+ ts = int(time.time())
+
+ for line in f_mounts:
+ # Docs come from the fstab(5)
+ # fs_spec # Mounted block special device or remote filesystem
+ # fs_file # Mount point
+ # fs_vfstype # File system type
+ # fs_mntops # Mount options
+ # fs_freq # Dump(8) utility flags
+ # fs_passno # Order in which filesystem checks are done at reboot time
+ try:
+ fs_spec, fs_file, fs_vfstype, fs_mntops, fs_freq, fs_passno = line.split(None)
+ except ValueError as e:
+ utils.err("error: can't parse line at /proc/mounts: %s" % e)
+ continue
+
+ if fs_spec == "none":
+ continue
+ elif fs_vfstype in FSTYPE_IGNORE or fs_vfstype.startswith("fuse."):
+ continue
+ # startswith(tuple) avoided to preserve support of Python 2.4
+ elif fs_file.startswith("/dev") or fs_file.startswith("/sys") or \
+ fs_file.startswith("/proc") or fs_file.startswith("/lib") or \
+ fs_file.startswith("net:") or fs_file.startswith("/var/lib/kubelet"):
+ continue
+
+ # keep /dev/xxx device with shorter fs_file (remove mount binds)
+ device_found = False
+ if fs_spec.startswith("/dev"):
+ for device in devices:
+ if fs_spec == device[0]:
+ device_found = True
+ if len(fs_file) < len(device[1]):
+ device[1] = fs_file
+ break
+ if not device_found:
+ devices.append([fs_spec, fs_file, fs_vfstype])
+ else:
+ devices.append([fs_spec, fs_file, fs_vfstype])
+
for device in devices:
- if fs_spec == device[0]:
- device_found = True
- if len(fs_file) < len(device[1]):
- device[1] = fs_file
- break
- if not device_found:
- devices.append([fs_spec, fs_file, fs_vfstype])
- else:
- devices.append([fs_spec, fs_file, fs_vfstype])
-
-
- for device in devices:
- fs_spec, fs_file, fs_vfstype = device
- try:
- r = os.statvfs(fs_file)
- except OSError as e:
- utils.err("can't get info for mount point: %s: %s" % (fs_file, e))
- continue
-
- used = r.f_blocks - r.f_bfree
-
- # conditional expression avoided to preserve support of Python 2.4
- # percent_used = 100 if r.f_blocks == 0 else used * 100.0 / (used + r.f_bavail)
- if r.f_blocks == 0:
- percent_used = 100
- else:
- percent_used = used * 100.0 / (used + r.f_bavail)
-
- print("df.bytes.total %d %s mount=%s fstype=%s"
- % (ts, r.f_frsize * r.f_blocks, fs_file, fs_vfstype))
- print("df.bytes.used %d %s mount=%s fstype=%s"
- % (ts, r.f_frsize * used, fs_file, fs_vfstype))
- print("df.bytes.percentused %d %s mount=%s fstype=%s"
- % (ts, percent_used, fs_file, fs_vfstype))
- print("df.bytes.free %d %s mount=%s fstype=%s"
- % (ts, r.f_frsize * r.f_bavail, fs_file, fs_vfstype))
-
- used = r.f_files - r.f_ffree
-
- # percent_used = 100 if r.f_files == 0 else used * 100.0 / r.f_files
- if r.f_files == 0:
- percent_used = 100
- else:
- percent_used = used * 100.0 / r.f_files
-
- print("df.inodes.total %d %s mount=%s fstype=%s"
- % (ts, r.f_files, fs_file, fs_vfstype))
- print("df.inodes.used %d %s mount=%s fstype=%s"
- % (ts, used, fs_file, fs_vfstype))
- print("df.inodes.percentused %d %s mount=%s fstype=%s"
- % (ts, percent_used, fs_file, fs_vfstype))
- print("df.inodes.free %d %s mount=%s fstype=%s"
- % (ts, r.f_ffree, fs_file, fs_vfstype))
-
- sys.stdout.flush()
- time.sleep(COLLECTION_INTERVAL)
+ fs_spec, fs_file, fs_vfstype = device
+ try:
+ r = os.statvfs(fs_file)
+ except OSError as e:
+ utils.err("can't get info for mount point: %s: %s" % (fs_file, e))
+ continue
+
+ used = r.f_blocks - r.f_bfree
+
+ # conditional expression avoided to preserve support of Python 2.4
+ # percent_used = 100 if r.f_blocks == 0 else used * 100.0 / (used + r.f_bavail)
+ if r.f_blocks == 0:
+ percent_used = 100
+ else:
+ percent_used = used * 100.0 / (used + r.f_bavail)
+
+ print("df.bytes.total %d %s mount=%s fstype=%s"
+ % (ts, r.f_frsize * r.f_blocks, fs_file, fs_vfstype))
+ print("df.bytes.used %d %s mount=%s fstype=%s"
+ % (ts, r.f_frsize * used, fs_file, fs_vfstype))
+ print("df.bytes.percentused %d %s mount=%s fstype=%s"
+ % (ts, percent_used, fs_file, fs_vfstype))
+ print("df.bytes.free %d %s mount=%s fstype=%s"
+ % (ts, r.f_frsize * r.f_bavail, fs_file, fs_vfstype))
+
+ used = r.f_files - r.f_ffree
+
+ # percent_used = 100 if r.f_files == 0 else used * 100.0 / r.f_files
+ if r.f_files == 0:
+ percent_used = 100
+ else:
+ percent_used = used * 100.0 / r.f_files
+
+ print("df.inodes.total %d %s mount=%s fstype=%s"
+ % (ts, r.f_files, fs_file, fs_vfstype))
+ print("df.inodes.used %d %s mount=%s fstype=%s"
+ % (ts, used, fs_file, fs_vfstype))
+ print("df.inodes.percentused %d %s mount=%s fstype=%s"
+ % (ts, percent_used, fs_file, fs_vfstype))
+ print("df.inodes.free %d %s mount=%s fstype=%s"
+ % (ts, r.f_ffree, fs_file, fs_vfstype))
+
+ sys.stdout.flush()
+ time.sleep(COLLECTION_INTERVAL)
if __name__ == "__main__":
- sys.stdin.close()
- sys.exit(main())
+ sys.stdin.close()
+ sys.exit(main())
diff --git a/collectors/0/docker.py b/collectors/0/docker.py
index 422b0386..e137c113 100755
--- a/collectors/0/docker.py
+++ b/collectors/0/docker.py
@@ -17,13 +17,13 @@
CONFIG = docker_conf.get_config()
COLLECTION_INTERVAL = CONFIG['interval']
-CGROUP_PATH =CONFIG['cgroup_path']
+CGROUP_PATH = CONFIG['cgroup_path']
ENABLED = docker_conf.enabled()
DOCKER_SOCK = CONFIG['socket_path']
if not ENABLED:
- sys.stderr.write("Docker collector is not enabled")
- sys.exit(13)
+ utils.err("Docker collector is not enabled")
+ sys.exit(13)
# proc_names example:
# $ cat cpuacct.stat
@@ -54,6 +54,7 @@
),
}
+
def getnameandimage(containerid):
# Retrieve container json configuration file
@@ -61,7 +62,7 @@ def getnameandimage(containerid):
sock.settimeout(5)
try:
r = sock.connect_ex(DOCKER_SOCK)
- if (r != 0):
+ if r != 0:
print("Can not connect to %s" % (DOCKER_SOCK), file=sys.stderr)
else:
message = 'GET /containers/' + containerid + '/json HTTP/1.1\r\nHost: http\n\n'
@@ -92,6 +93,7 @@ def getnameandimage(containerid):
except socket.timeout as e:
print("Socket: %s" % (e,), file=sys.stderr)
+
def senddata(datatosend, containerid):
if datatosend:
datatosend += " containerid="+containerid
@@ -102,6 +104,7 @@ def senddata(datatosend, containerid):
print("docker.%s" % datatosend)
sys.stdout.flush()
+
def readdockerstats(path, containerid):
# update containername and containerimage if needed
@@ -164,20 +167,21 @@ def readdockerstats(path, containerid):
senddata("%s %d %s" % (datatosend, ts, count), containerid)
f_stat.close()
+
def main():
"""docker_cpu main loop"""
global containernames
global containerimages
utils.drop_privileges()
- cache=0
+ cache = 0
while True:
# Connect to Docker socket to get informations about containers every 4 times
- if (cache == 0):
+ if cache == 0:
containernames={}
containerimages={}
cache += 1
- if (cache == 4):
+ if cache == 4:
cache = 0
if os.path.isdir(CGROUP_PATH):
@@ -207,5 +211,6 @@ def main():
readdockerstats(CGROUP_PATH + "/lxc/"+level1, level1)
time.sleep(COLLECTION_INTERVAL)
+
if __name__ == "__main__":
sys.exit(main())
diff --git a/collectors/0/docker_engine.py b/collectors/0/docker_engine.py
index 9ea4b812..d82d6993 100755
--- a/collectors/0/docker_engine.py
+++ b/collectors/0/docker_engine.py
@@ -16,6 +16,7 @@
from __future__ import print_function
import sys
+from collectors.lib import utils
from collectors.etc import docker_engine_conf
from collectors.lib.docker_engine.docker_metrics import DockerMetrics
@@ -26,8 +27,8 @@
def main():
if not ENABLED:
- sys.stderr.write("Docker-engine collector is not enabled")
- sys.exit(13)
+ utils.err("Docker-engine collector is not enabled")
+ return 13 # ask tcollector to not respawn us
"""docker_cpu main loop"""
cli = DockerMetrics(METRICS_PATH)
diff --git a/collectors/0/elasticsearch.py b/collectors/0/elasticsearch.py
index 8329be75..afd6b056 100755
--- a/collectors/0/elasticsearch.py
+++ b/collectors/0/elasticsearch.py
@@ -16,10 +16,7 @@
# Tested with ES 0.16.5, 0.17.x, 0.90.1 .
import errno
-try:
- import json
-except ImportError:
- json = None # Handled gracefully in main. Not available by default in <2.6
+import json
import socket
import sys
import threading
@@ -28,212 +25,213 @@
from collectors.lib import utils
from collectors.etc import elasticsearch_conf
-
-try:
- from http.client import HTTPConnection, OK
-except ImportError:
- from httplib import HTTPConnection, OK
-
+from http.client import HTTPConnection, OK
COLLECTION_INTERVAL = 15 # seconds
-DEFAULT_TIMEOUT = 10.0 # seconds
+DEFAULT_TIMEOUT = 10.0 # seconds
# regexes to separate differences in version numbers
PRE_VER1 = re.compile(r'^0\.')
VER1 = re.compile(r'^1\.')
STATUS_MAP = {
- "green": 0,
- "yellow": 1,
- "red": 2,
+ "green": 0,
+ "yellow": 1,
+ "red": 2,
}
class ESError(RuntimeError):
- """Exception raised if we don't get a 200 OK from ElasticSearch."""
+ """Exception raised if we don't get a 200 OK from ElasticSearch."""
- def __init__(self, resp):
- RuntimeError.__init__(self, str(resp))
- self.resp = resp
+ def __init__(self, resp):
+ RuntimeError.__init__(self, str(resp))
+ self.resp = resp
-def request(server, uri, json_in = True):
- """Does a GET request of the given uri on the given HTTPConnection."""
- server.request("GET", uri)
- resp = server.getresponse()
- if resp.status != OK:
- raise ESError(resp)
- if json_in:
- return json.loads(resp.read())
- else:
- return resp.read()
+def request(server, uri, json_in=True):
+ """Does a GET request of the given uri on the given HTTPConnection."""
+ server.request("GET", uri)
+ resp = server.getresponse()
+ if resp.status != OK:
+ raise ESError(resp)
+ if json_in:
+ return json.loads(resp.read())
+ else:
+ return resp.read()
def cluster_health(server):
- return request(server, "/_cluster/health")
+ return request(server, "/_cluster/health")
def cluster_stats(server):
- return request(server, "/_cluster/stats")
+ return request(server, "/_cluster/stats")
def cluster_master_node(server):
- return request(server, "/_cat/master", json_in = False).split()[0]
+ return request(server, "/_cat/master", json_in=False).split()[0]
def index_stats(server):
- return request(server, "/_cat/indices?v&bytes=b", json_in = False)
+ return request(server, "/_cat/indices?v&bytes=b", json_in=False)
def node_status(server):
- return request(server, "/")
+ return request(server, "/")
def node_stats(server, version):
- # API changed in v1.0
- if PRE_VER1.match(version):
- url = "/_cluster/nodes/_local/stats"
- # elif VER1.match(version):
- # url = "/_nodes/_local/stats"
- else:
- url = "/_nodes/_local/stats"
- return request(server, url)
+ # API changed in v1.0
+ if PRE_VER1.match(version):
+ url = "/_cluster/nodes/_local/stats"
+ # elif VER1.match(version):
+ # url = "/_nodes/_local/stats"
+ else:
+ url = "/_nodes/_local/stats"
+ return request(server, url)
+
def printmetric(metric, ts, value, tags):
- # Warning, this should be called inside a lock
- if tags:
- tags = " " + " ".join("%s=%s" % (name.replace(" ",""), value.replace(" ",""))
- for name, value in tags.items())
- else:
- tags = ""
- # Convert any bool values to int, as opentsdb only accepts int or float.
- if isinstance(value, bool):
- value = int(value)
- print("%s %d %s %s"
- % (metric, ts, value, tags))
+ # Warning, this should be called inside a lock
+ if tags:
+ tags = " " + " ".join("%s=%s" % (name.replace(" ", ""), value.replace(" ", ""))
+ for name, value in tags.items())
+ else:
+ tags = ""
+ # Convert any bool values to int, as opentsdb only accepts int or float.
+ if isinstance(value, bool):
+ value = int(value)
+ print("%s %d %s %s"
+ % (metric, ts, value, tags))
+
def _traverse(metric, stats, ts, tags):
- """
- Recursively traverse the json tree and print out leaf numeric values
- Please make sure you call this inside a lock and don't add locking
- inside this function
- """
- #print metric,stats,ts,tags
- if isinstance(stats,dict):
- if "timestamp" in stats:
- ts = stats["timestamp"] / 1000 # ms -> s
- for key in stats.keys():
- if key != "timestamp":
- _traverse(metric + "." + key, stats[key], ts, tags)
- if isinstance(stats, (list, set, tuple)):
- count = 0
- for value in stats:
- _traverse(metric + "." + str(count), value, ts, tags)
- count += 1
- if utils.is_numeric(stats) and not isinstance(stats, bool):
- if isinstance(stats, int):
- stats = int(stats)
- printmetric(metric, ts, stats, tags)
- return
+ """
+ Recursively traverse the json tree and print out leaf numeric values
+ Please make sure you call this inside a lock and don't add locking
+ inside this function
+ """
+ # print metric,stats,ts,tags
+ if isinstance(stats, dict):
+ if "timestamp" in stats:
+ ts = stats["timestamp"] / 1000 # ms -> s
+ for key in stats.keys():
+ if key != "timestamp":
+ _traverse(metric + "." + key, stats[key], ts, tags)
+ if isinstance(stats, (list, set, tuple)):
+ count = 0
+ for value in stats:
+ _traverse(metric + "." + str(count), value, ts, tags)
+ count += 1
+ if utils.is_numeric(stats) and not isinstance(stats, bool):
+ if isinstance(stats, int):
+ stats = int(stats)
+ printmetric(metric, ts, stats, tags)
+ return
+
def _collect_indices(server, metric, tags, lock):
- ts = int(time.time())
- rawtable = index_stats(server).split("\n")
- header = rawtable.pop(0).strip()
- headerlist = [x.strip() for x in header.split()]
- for line in rawtable:
- # Copy the cluster tag
- newtags = {"cluster": tags["cluster"]}
- # Now parse each input
- values = line.split()
- count = 0
- for value in values:
- try:
- value = float(value)
- if int(value) == value:
- value = int(value)
- # now print value
- with lock:
- printmetric(metric + ".cluster.byindex." + headerlist[count], ts, value, newtags)
- except ValueError:
- # add this as a tag
- newtags[headerlist[count]] = value
- count += 1
+ ts = int(time.time())
+ rawtable = index_stats(server).split("\n")
+ header = rawtable.pop(0).strip()
+ headerlist = [x.strip() for x in header.split()]
+ for line in rawtable:
+ # Copy the cluster tag
+ newtags = {"cluster": tags["cluster"]}
+ # Now parse each input
+ values = line.split()
+ count = 0
+ for value in values:
+ try:
+ value = float(value)
+ if int(value) == value:
+ value = int(value)
+ # now print value
+ with lock:
+ printmetric(metric + ".cluster.byindex." + headerlist[count], ts, value, newtags)
+ except ValueError:
+ # add this as a tag
+ newtags[headerlist[count]] = value
+ count += 1
+
def _collect_master(server, nodeid, metric, tags, lock):
- ts = int(time.time())
- chealth = cluster_health(server)
- if "status" in chealth:
+ ts = int(time.time())
+ chealth = cluster_health(server)
+ if "status" in chealth:
+ with lock:
+ printmetric(metric + ".cluster.status", ts,
+ STATUS_MAP.get(chealth["status"], -1), tags)
with lock:
- printmetric(metric + ".cluster.status", ts,
- STATUS_MAP.get(chealth["status"], -1), tags)
- with lock:
- _traverse(metric + ".cluster", chealth, ts, tags)
+ _traverse(metric + ".cluster", chealth, ts, tags)
+
+ ts = int(time.time()) # In case last call took a while.
+ cstats = cluster_stats(server)
+ with lock:
+ _traverse(metric + ".cluster", cstats, ts, tags)
- ts = int(time.time()) # In case last call took a while.
- cstats = cluster_stats(server)
- with lock:
- _traverse(metric + ".cluster", cstats, ts, tags)
def _collect_server(server, version, lock):
- ts = int(time.time())
- rootmetric = "elasticsearch"
- nstats = node_stats(server, version)
- cluster_name = nstats["cluster_name"]
- nodeid, nstats = nstats["nodes"].popitem()
- node_name = nstats["name"]
- tags = {"cluster": cluster_name, "node": node_name}
- #tags.update(nstats["attributes"])
-
- if nodeid == cluster_master_node(server):
- is_master = 1
- else:
- is_master = 0
- with lock:
- printmetric(rootmetric + ".is_master", ts, is_master, tags)
- if is_master:
- _collect_master(server, nodeid, rootmetric, tags, lock)
-
- _collect_indices(server, rootmetric, tags, lock)
-
- with lock:
- _traverse(rootmetric, nstats, ts, tags)
+ ts = int(time.time())
+ rootmetric = "elasticsearch"
+ nstats = node_stats(server, version)
+ cluster_name = nstats["cluster_name"]
+ nodeid, nstats = nstats["nodes"].popitem()
+ node_name = nstats["name"]
+ tags = {"cluster": cluster_name, "node": node_name}
+ # tags.update(nstats["attributes"])
+
+ if nodeid == cluster_master_node(server):
+ is_master = 1
+ else:
+ is_master = 0
+ with lock:
+ printmetric(rootmetric + ".is_master", ts, is_master, tags)
+ if is_master:
+ _collect_master(server, nodeid, rootmetric, tags, lock)
+
+ _collect_indices(server, rootmetric, tags, lock)
+
+ with lock:
+ _traverse(rootmetric, nstats, ts, tags)
def main(argv):
- utils.drop_privileges()
- socket.setdefaulttimeout(DEFAULT_TIMEOUT)
- servers = []
-
- if json is None:
- utils.err("This collector requires the `json' Python module.")
- return 1
-
- for conf in elasticsearch_conf.get_servers():
- server = HTTPConnection( *conf )
- try:
- server.connect()
- except socket.error as exc:
- if exc.errno == errno.ECONNREFUSED:
- continue
- raise
- servers.append( server )
-
- if len( servers ) == 0:
- return 13 # No ES running, ask tcollector to not respawn us.
-
- lock = threading.Lock()
- while True:
- threads = []
- for server in servers:
- status = node_status(server)
- version = status["version"]["number"]
- t = threading.Thread(target = _collect_server, args = (server, version, lock))
- t.start()
- threads.append(t)
- for thread in threads:
- thread.join()
- time.sleep(COLLECTION_INTERVAL)
+ utils.drop_privileges()
+ socket.setdefaulttimeout(DEFAULT_TIMEOUT)
+ servers = []
+
+ if json is None:
+ utils.err("This collector requires the `json' Python module.")
+ return 13
+
+ for conf in elasticsearch_conf.get_servers():
+ server = HTTPConnection(*conf)
+ try:
+ server.connect()
+ except socket.error as exc:
+ if exc.errno == errno.ECONNREFUSED:
+ continue
+ raise
+ servers.append(server)
+
+ if len(servers) == 0:
+ return 13 # No ES running, ask tcollector to not respawn us.
+
+ lock = threading.Lock()
+ while True:
+ threads = []
+ for server in servers:
+ status = node_status(server)
+ version = status["version"]["number"]
+ t = threading.Thread(target=_collect_server, args=(server, version, lock))
+ t.start()
+ threads.append(t)
+ for thread in threads:
+ thread.join()
+ time.sleep(COLLECTION_INTERVAL)
+
if __name__ == "__main__":
- sys.exit(main(sys.argv))
+ sys.exit(main(sys.argv))
diff --git a/collectors/0/flume.py b/collectors/0/flume.py
index d7202cca..521d1ff8 100755
--- a/collectors/0/flume.py
+++ b/collectors/0/flume.py
@@ -31,108 +31,102 @@
from __future__ import print_function
import errno
-try:
- import json
-except ImportError:
- json = None # Handled gracefully in main. Not available by default in <2.6
+import json
import socket
import sys
import time
+from http.client import HTTPConnection, OK
from collectors.lib import utils
try:
- from collectors.etc import flume_conf
-except ImportError:
- flume_conf = None
-
-try:
- from http.client import HTTPConnection, OK
+ from collectors.etc import flume_conf
except ImportError:
- from httplib import HTTPConnection, OK
+ flume_conf = None
COLLECTION_INTERVAL = 15 # seconds
-DEFAULT_TIMEOUT = 10.0 # seconds
+DEFAULT_TIMEOUT = 10.0 # seconds
FLUME_HOST = "localhost"
FLUME_PORT = 34545
# Exclude values that are not really metrics and totally pointless to keep track of
-EXCLUDE = [ 'StartTime', 'StopTime', 'Type' ]
+EXCLUDE = ['StartTime', 'StopTime', 'Type']
-def err(msg):
- print(msg, file=sys.stderr)
class FlumeError(RuntimeError):
- """Exception raised if we don't get a 200 OK from Flume webserver."""
- def __init__(self, resp):
- RuntimeError.__init__(self, str(resp))
- self.resp = resp
+ """Exception raised if we don't get a 200 OK from Flume webserver."""
+
+ def __init__(self, resp):
+ RuntimeError.__init__(self, str(resp))
+ self.resp = resp
+
def request(server, uri):
- """Does a GET request of the given uri on the given HTTPConnection."""
- server.request("GET", uri)
- resp = server.getresponse()
- if resp.status != OK:
- raise FlumeError(resp)
- return json.loads(resp.read())
+ """Does a GET request of the given uri on the given HTTPConnection."""
+ server.request("GET", uri)
+ resp = server.getresponse()
+ if resp.status != OK:
+ raise FlumeError(resp)
+ return json.loads(resp.read())
def flume_metrics(server):
- return request(server, "/metrics")
+ return request(server, "/metrics")
+
def main(argv):
- if not (flume_conf and flume_conf.enabled() and flume_conf.get_settings()):
- sys.exit(13)
-
- settings = flume_conf.get_settings()
-
- if (settings['default_timeout']):
- DEFAULT_TIMEOUT = settings['default_timeout']
-
- if (settings['default_timeout']):
- COLLECTION_INTERVAL = settings['collection_interval']
-
- if (settings['flume_host']):
- FLUME_HOST = settings['flume_host']
-
- if (settings['flume_port']):
- FLUME_PORT = settings['flume_port']
-
- utils.drop_privileges()
- socket.setdefaulttimeout(DEFAULT_TIMEOUT)
- server = HTTPConnection(FLUME_HOST, FLUME_PORT)
- try:
- server.connect()
- except socket.error as exc:
- if exc.errno == errno.ECONNREFUSED:
- return 13 # No Flume server available, ask tcollector to not respawn us.
- raise
- if json is None:
- err("This collector requires the `json' Python module.")
- return 1
-
- def printmetric(metric, value, **tags):
- if tags:
- tags = " " + " ".join("%s=%s" % (name, value)
- for name, value in tags.items())
- else:
- tags = ""
- print(("flume.%s %d %s %s" % (metric, ts, value, tags)))
-
- while True:
- # Get the metrics
- ts = int(time.time()) # In case last call took a while.
- stats = flume_metrics(server)
-
- for metric in stats:
- (component, name) = metric.split(".")
- tags = {component.lower(): name}
- for key,value in stats[metric].items():
- if key not in EXCLUDE:
- printmetric(key.lower(), value, **tags)
-
- time.sleep(COLLECTION_INTERVAL)
+ if not (flume_conf and flume_conf.enabled() and flume_conf.get_settings()):
+ sys.exit(13)
+
+ settings = flume_conf.get_settings()
+
+ if settings['default_timeout']:
+ DEFAULT_TIMEOUT = settings['default_timeout']
+
+ if settings['default_timeout']:
+ COLLECTION_INTERVAL = settings['collection_interval']
+
+ if settings['flume_host']:
+ FLUME_HOST = settings['flume_host']
+
+ if settings['flume_port']:
+ FLUME_PORT = settings['flume_port']
+
+ utils.drop_privileges()
+ socket.setdefaulttimeout(DEFAULT_TIMEOUT)
+ server = HTTPConnection(FLUME_HOST, FLUME_PORT)
+ try:
+ server.connect()
+ except socket.error as exc:
+ if exc.errno == errno.ECONNREFUSED:
+ return 13 # No Flume server available, ask tcollector to not respawn us.
+ raise
+ if json is None:
+ utils.err("This collector requires the `json' Python module.")
+ return 13 # ask tcollector to not respawn us
+
+ def printmetric(metric, value, **tags):
+ if tags:
+ tags = " " + " ".join("%s=%s" % (name, value)
+ for name, value in tags.items())
+ else:
+ tags = ""
+ print(("flume.%s %d %s %s" % (metric, ts, value, tags)))
+
+ while True:
+ # Get the metrics
+ ts = int(time.time()) # In case last call took a while.
+ stats = flume_metrics(server)
+
+ for metric in stats:
+ (component, name) = metric.split(".")
+ tags = {component.lower(): name}
+ for key, value in stats[metric].items():
+ if key not in EXCLUDE:
+ printmetric(key.lower(), value, **tags)
+
+ time.sleep(COLLECTION_INTERVAL)
if __name__ == "__main__":
- sys.exit(main(sys.argv))
+ sys.exit(main(sys.argv))
diff --git a/collectors/0/g1gc.py b/collectors/0/g1gc.py
index 593db037..26f88307 100755
--- a/collectors/0/g1gc.py
+++ b/collectors/0/g1gc.py
@@ -70,7 +70,6 @@
import traceback
from datetime import datetime, timedelta
-from subprocess import Popen, PIPE
from collectors.lib import utils
from collectors.etc import g1gc_conf
@@ -128,11 +127,13 @@
}
+
# Utilities
def get_file_end(file_handler):
file_handler.seek(0, 2)
return file_handler.tell()
+
def get_latest_gc_log(log_dir, log_name_pattern):
sorted_gc_logs = sorted(glob.glob(os.path.join(log_dir, log_name_pattern)))
if len(sorted_gc_logs) == 0:
@@ -140,25 +141,30 @@ def get_latest_gc_log(log_dir, log_name_pattern):
log_dir + '" with pattern: "' + log_name_pattern + '"')
return sorted_gc_logs[-1]
+
def true_unix_timestamp(year, month, day, hour, minute, second, timezone):
d = datetime(year, month, day, hour, minute, second) - timedelta(seconds=36 * timezone)
return calendar.timegm(d.utctimetuple())
+
def to_size_in_mb(data_size, unit):
'''Convert size in given unit: GB or B to size in MB '''
if unit == 'G': return data_size * 1024
elif unit == 'B': return data_size / (1024 * 1024.0)
else: return data_size
+
def match_pattern(line):
for pattern_name, pattern in pattern_map.items():
m = pattern.match(line)
if m: return (pattern_name, m)
return (None, None)
+
def sec2milli(seconds):
return 1000 * seconds
+
def flush_collector(collector):
for metric_name, value in collector['data'].items():
print(metric_name % (collector['timestamp'], value))
@@ -166,6 +172,7 @@ def flush_collector(collector):
collector['timestamp'] = None
collector['data'] = {}
+
def collect_metric(metric_name, timestamp, value, collector):
if collector['timestamp'] != timestamp:
flush_collector(collector)
@@ -173,6 +180,7 @@ def collect_metric(metric_name, timestamp, value, collector):
collector['timestamp'] = timestamp
collector['data'][metric_name] = collector['data'].get(metric_name, 0) + value
+
def collect_metric_with_prefix(prefix, metric_name, timestamp, value, collector):
new_metric_name = metric_name
p = '' if prefix is None else prefix.strip()
@@ -180,38 +188,47 @@ def collect_metric_with_prefix(prefix, metric_name, timestamp, value, collector)
new_metric_name = '.'.join([p, metric_name])
collect_metric(new_metric_name, timestamp, value, collector)
+
def unmatched_gc_log(line): pass
+
# Simple gc events, don't have inner gc events
def concurrent_cleanup_handler(prefix, log_line, timestamp, collector, file_handler):
concurrent_clean_up_time = sec2milli(float(pattern_map[GC_PAUSE_PATTERN].match(log_line).group(1)))
collect_metric_with_prefix(prefix, "gc.g1.concurrent_cleanup %s %s", timestamp, concurrent_clean_up_time, collector)
+
def concurrent_mark_handler(prefix, log_line, timestamp, collector, file_handler):
concurrent_mark_time = sec2milli(float(pattern_map[GC_PAUSE_PATTERN].match(log_line).group(1)))
collect_metric_with_prefix(prefix, "gc.g1.concurrent_mark %s %s", timestamp, concurrent_mark_time, collector)
+
def concurrent_root_region_scan_handler(prefix, log_line, timestamp, collector, file_handler):
concurrent_root_region_scan_time = sec2milli(float(pattern_map[GC_PAUSE_PATTERN].match(log_line).group(1)))
collect_metric_with_prefix(prefix, "gc.g1.concurrent_root_region_scan %s %s", timestamp, concurrent_root_region_scan_time, collector)
+
def cleanup_handler(prefix, log_line, timestamp, collector, file_handler):
clean_up_time = sec2milli(float(pattern_map[GC_PAUSE_PATTERN].match(log_line).group(1)))
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=cleanup", timestamp, clean_up_time, collector)
+
def fullgc_handler(prefix, log_line, timestamp, collector, file_handler):
full_gc_time = sec2milli(float(pattern_map[GC_PAUSE_PATTERN].match(log_line).group(1)))
collect_metric_with_prefix(prefix, "gc.g1.fullgc.duration %s %s", timestamp, full_gc_time, collector)
+
# Inner gc events, which we should have a matcher object
def parallel_time_handler(prefix, matcher, timestamp, collector, file_handler):
parallel_time, num_of_gc_workers = float(matcher.group(1)), float(matcher.group(2))
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=parallel-time", timestamp, parallel_time, collector)
+
def object_copy_handler(prefix, matcher, timestamp, collector, file_handler):
min_time, avg_time, max_time = [float(matcher.group(i)) for i in range(1, 4)]
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=object-copy", timestamp, avg_time, collector)
+
def allocation_handler(prefix, matcher, timestamp, collector, file_handler):
eden_before_in_size, eden_after_in_size = matcher.group(2), matcher.group(4)
eden_before = to_size_in_mb(float(matcher.group(1)), eden_before_in_size)
@@ -236,30 +253,37 @@ def allocation_handler(prefix, matcher, timestamp, collector, file_handler):
collector['gensize']['survivor'] = survivor_after
collector['gensize']['heap'] = heap_after_in_mb
+
def free_cset_handler(prefix, matcher, timestamp, collector, file_handler):
free_cset_time = float(matcher.group(1))
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=free-cset", timestamp, free_cset_time, collector)
+
def ref_enq_handler(prefix, matcher, timestamp, collector, file_handler):
ref_enq_time = float(matcher.group(1))
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=ref-enq", timestamp, ref_enq_time, collector)
+
def ref_proc_handler(prefix, matcher, timestamp, collector, file_handler):
ref_proc_time = float(matcher.group(1))
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=ref-proc", timestamp, ref_proc_time, collector)
+
def choose_cset_handler(prefix, matcher, timestamp, collector, file_handler):
choose_cset_time = float(matcher.group(1))
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=choose-cset", timestamp, choose_cset_time, collector)
+
def clear_ct_handler(prefix, matcher, timestamp, collector, file_handler):
clear_ct_time = float(matcher.group(1))
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=clear-ct", timestamp, clear_ct_time, collector)
+
def scan_rs_handler(prefix, matcher, timestamp, collector, file_handler):
min_time, avg_time, max_time = [float(matcher.group(i)) for i in range(1, 4)]
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=scan-rs", timestamp, avg_time, collector)
+
# Complex GC events: initial-mark, young-pause, mixed-pause and remark
# These GC events contains several inner gc events and we must call match_remaining_log to parse remaining gc events
def initial_mark_handler(prefix, log_line, timestamp, collector, file_handler):
@@ -268,24 +292,28 @@ def initial_mark_handler(prefix, log_line, timestamp, collector, file_handler):
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=initial-mark", timestamp, initial_mark_pause_time, collector)
match_remaining_log(prefix, timestamp, collector, file_handler)
+
def young_pause_handler(prefix, log_line, timestamp, collector, file_handler):
m = pattern_map[GC_PAUSE_PATTERN].match(log_line)
young_pause_time = sec2milli(float(m.group(1)))
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=young-pause", timestamp, young_pause_time, collector)
match_remaining_log(prefix, timestamp, collector, file_handler)
+
def mixed_pause_handler(prefix, log_line, timestamp, collector, file_handler):
m = pattern_map[GC_PAUSE_PATTERN].match(log_line)
mixed_pause_time = sec2milli(float(m.group(1)))
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=mixed-pause", timestamp, mixed_pause_time, collector)
match_remaining_log(prefix, timestamp, collector, file_handler)
+
def remark_handler(prefix, log_line, timestamp, collector, file_handler):
m = pattern_map[REMARK_PATTERN].match(log_line)
ref_process_time, remark_time = [sec2milli(float(m.group(i))) for i in range(1, 3)]
collect_metric_with_prefix(prefix, "gc.g1.duration %s %s phase=remark", timestamp, remark_time, collector)
match_remaining_log(prefix, timestamp, collector, file_handler)
+
def match_remaining_log(prefix, timestamp, collector, file_handler):
while True:
line = file_handler.readline()
@@ -304,9 +332,11 @@ def match_remaining_log(prefix, timestamp, collector, file_handler):
elif pattern_name == CLEAR_CT_PATTERN: clear_ct_handler(prefix, matcher, timestamp, collector, file_handler)
else: unmatched_gc_log(line)
+
def isPause(type, cause):
return 'GC pause' in cause and type in cause
+
def process_gc_record(prefix, file_handler, timestamp, cause, collector):
# process simple gc events
if 'concurrent-cleanup-end' in cause: concurrent_cleanup_handler(prefix, cause, timestamp, collector, file_handler)
@@ -332,6 +362,7 @@ def process_gc_record(prefix, file_handler, timestamp, cause, collector):
remark_handler(prefix, cause, timestamp, collector, file_handler)
elif cause[-1] == ']': return
+
def process_gc_log(collector):
prefix = collector['prefix']
@@ -385,6 +416,7 @@ def process_gc_log(collector):
return 0
+
def main():
interval = g1gc_conf.get_interval()
@@ -407,5 +439,7 @@ def main():
sys.stdout.flush()
time.sleep(interval)
+
if __name__ == '__main__':
- exit(main())
+ sys.exit(main())
+
diff --git a/collectors/0/graphite_bridge.py b/collectors/0/graphite_bridge.py
index 17cfde35..01664e11 100755
--- a/collectors/0/graphite_bridge.py
+++ b/collectors/0/graphite_bridge.py
@@ -19,25 +19,24 @@
from collectors.lib import utils
import threading
-try:
- from socketserver import ThreadingTCPServer, BaseRequestHandler
-except ImportError:
- from SocketServer import ThreadingTCPServer, BaseRequestHandler
+from socketserver import ThreadingTCPServer, BaseRequestHandler
try:
- from collectors.etc import graphite_bridge_conf
+ from collectors.etc import graphite_bridge_conf
except ImportError:
- graphite_bridge_conf = None
+ graphite_bridge_conf = None
HOST = '127.0.0.1'
PORT = 2003
SIZE = 8192
+
class GraphiteServer(ThreadingTCPServer):
allow_reuse_address = True
print_lock = threading.Lock()
+
class GraphiteHandler(BaseRequestHandler):
def handle_line(self, line):
@@ -48,7 +47,6 @@ def handle_line(self, line):
else:
print(line_parts[0], line_parts[2], line_parts[1])
-
def handle(self):
data = ''
while True:
@@ -69,7 +67,7 @@ def handle(self):
def main():
if not (graphite_bridge_conf and graphite_bridge_conf.enabled()):
- sys.exit(13)
+ return 13 # ask tcollector to not respawn us
utils.drop_privileges()
server = GraphiteServer((HOST, PORT), GraphiteHandler)
@@ -80,7 +78,6 @@ def main():
server.shutdown()
server.server_close()
-if __name__ == "__main__":
- main()
-sys.exit(0)
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/collectors/0/gstat.py b/collectors/0/gstat.py
index 0904ec22..37728095 100755
--- a/collectors/0/gstat.py
+++ b/collectors/0/gstat.py
@@ -58,19 +58,21 @@
except ImportError:
gstat_conf = None
-DEFAULT_COLLECTION_INTERVAL=15
-
+DEFAULT_COLLECTION_INTERVAL = 15
signal_received = None
+
+
def handlesignal(signum, stack):
global signal_received
signal_received = signum
+
def main():
"""top main loop"""
- collection_interval=DEFAULT_COLLECTION_INTERVAL
- collection_filter=".*"
- if(gstat_conf):
+ collection_interval = DEFAULT_COLLECTION_INTERVAL
+ collection_filter = ".*"
+ if gstat_conf:
config = gstat_conf.get_config()
collection_interval=config['collection_interval']
collection_filter=config['collection_filter']
@@ -88,7 +90,7 @@ def main():
except OSError as e:
if e.errno == errno.ENOENT:
# it makes no sense to run this collector here
- sys.exit(13) # we signal tcollector to not run us
+ return 13 # ask tcollector to not respawn us
raise
timestamp = 0
@@ -138,5 +140,6 @@ def main():
pass
p_gstat.wait()
+
if __name__ == "__main__":
- main()
+ sys.exit(main())
diff --git a/collectors/0/hadoop_datanode.py b/collectors/0/hadoop_datanode.py
index 45ac7318..56bf87fa 100755
--- a/collectors/0/hadoop_datanode.py
+++ b/collectors/0/hadoop_datanode.py
@@ -23,6 +23,7 @@
from collectors.lib import utils
from collectors.lib.hadoop_http import HadoopHttp
+COLLECTION_INTERVAL = 15
REPLACEMENTS = {
"datanodeactivity-": ["activity"],
@@ -57,14 +58,13 @@ def main(args):
utils.drop_privileges()
if json is None:
utils.err("This collector requires the `json' Python module.")
- return 13 # Ask tcollector not to respawn us
+ return 13 # ask tcollector not to respawn us
datanode_service = HadoopDataNode()
while True:
datanode_service.emit()
- time.sleep(15)
+ time.sleep(COLLECTION_INTERVAL)
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
-
diff --git a/collectors/0/hadoop_journalnode.py b/collectors/0/hadoop_journalnode.py
index 0771edaa..50e326cc 100755
--- a/collectors/0/hadoop_journalnode.py
+++ b/collectors/0/hadoop_journalnode.py
@@ -23,6 +23,7 @@
from collectors.lib import utils
from collectors.lib.hadoop_http import HadoopHttp
+COLLECTION_INTERVAL = 90
REPLACEMENTS = {
"rpcdetailedactivityforport": ["rpc_activity"],
@@ -59,10 +60,9 @@ def main(args):
journalnode_service = HadoopJournalNode()
while True:
journalnode_service.emit()
- time.sleep(90)
+ time.sleep(COLLECTION_INTERVAL)
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
-
diff --git a/collectors/0/hadoop_namenode.py b/collectors/0/hadoop_namenode.py
index 67ec9fac..cade73f6 100755
--- a/collectors/0/hadoop_namenode.py
+++ b/collectors/0/hadoop_namenode.py
@@ -23,6 +23,7 @@
from collectors.lib import utils
from collectors.lib.hadoop_http import HadoopHttp
+COLLECTION_INTERVAL = 90
REPLACEMENTS = {
"rpcdetailedactivityforport": ["rpc_activity"],
@@ -55,14 +56,13 @@ def main(args):
utils.drop_privileges()
if json is None:
utils.err("This collector requires the `json' Python module.")
- return 13 # Ask tcollector not to respawn us
+ return 13 # ask tcollector not to respawn us
name_node_service = HadoopNameNode()
while True:
name_node_service.emit()
- time.sleep(90)
+ time.sleep(COLLECTION_INTERVAL)
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
-
diff --git a/collectors/0/hadoop_yarn_node_manager.py b/collectors/0/hadoop_yarn_node_manager.py
index 1df8b138..e61b0681 100755
--- a/collectors/0/hadoop_yarn_node_manager.py
+++ b/collectors/0/hadoop_yarn_node_manager.py
@@ -23,9 +23,9 @@
import time
try:
- import json
+ import json
except ImportError:
- json = None
+ json = None
import argparse
SRCDIR = os.path.join(os.path.dirname(__file__))
@@ -40,53 +40,53 @@
class HadoopYarnNodeManager(HadoopHttp):
- """
- Class that will retrieve metrics from an Apache Hadoop Yarn Node Manager JMX API
+ """
+ Class that will retrieve metrics from an Apache Hadoop Yarn Node Manager JMX API
- Tested on Apache Hadoop 2.7
- """
+ Tested on Apache Hadoop 2.7
+ """
- def __init__(self, host='localhost', port=8042):
- super(HadoopYarnNodeManager, self).__init__('hadoop',
- 'yarn.node_manager',
- host,
- port)
+ def __init__(self, host='localhost', port=8042):
+ super(HadoopYarnNodeManager, self).__init__('hadoop',
+ 'yarn.node_manager',
+ host,
+ port)
- def emit(self):
- current_time = int(time.time())
- metrics = self.poll()
- for context, metric_name, value in metrics:
- for key, value in REPLACEMENTS.items():
- if any(_.startswith(key) for _ in context):
- context = value
- self.emit_metric(context, current_time, metric_name, value)
+ def emit(self):
+ current_time = int(time.time())
+ metrics = self.poll()
+ for context, metric_name, value in metrics:
+ for key, value in REPLACEMENTS.items():
+ if any(_.startswith(key) for _ in context):
+ context = value
+ self.emit_metric(context, current_time, metric_name, value)
# args are useful for testing but no given by TCollector so will inherit defaults normally
def main(args):
- """ Calls HadoopYarnNodeManager at interval secs
- and emits metrics to stdout for TCollector """
- if json is None:
- utils.err("This collector requires the `json' Python module.")
- return 13 # Ask tcollector not to respawn us
- utils.drop_privileges()
- parser = argparse.ArgumentParser()
- parser.add_argument('-H', '--host', default='localhost',
- help='Host to connect to (default: localhost)')
- parser.add_argument('-P', '--port', default=8042, type=int,
- help='Port to connect to (default: 8042)')
- parser.add_argument('-i', '--interval', default=90, type=int,
- help='Interval at which to emit metrics')
- args = parser.parse_args(args[1:])
- host = args.host
- port = args.port
- interval = args.interval
- yarn_service = HadoopYarnNodeManager(host=host, port=port)
- while True:
- yarn_service.emit()
- time.sleep(interval)
- return 0
+ """ Calls HadoopYarnNodeManager at interval secs
+ and emits metrics to stdout for TCollector """
+ if json is None:
+ utils.err("This collector requires the `json' Python module.")
+ return 13 # ask tcollector not to respawn us
+ utils.drop_privileges()
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-H', '--host', default='localhost',
+ help='Host to connect to (default: localhost)')
+ parser.add_argument('-P', '--port', default=8042, type=int,
+ help='Port to connect to (default: 8042)')
+ parser.add_argument('-i', '--interval', default=90, type=int,
+ help='Interval at which to emit metrics')
+ args = parser.parse_args(args[1:])
+ host = args.host
+ port = args.port
+ interval = args.interval
+ yarn_service = HadoopYarnNodeManager(host=host, port=port)
+ while True:
+ yarn_service.emit()
+ time.sleep(interval)
+ return 0
if __name__ == "__main__":
- sys.exit(main(sys.argv))
+ sys.exit(main(sys.argv))
diff --git a/collectors/0/hadoop_yarn_resource_manager.py b/collectors/0/hadoop_yarn_resource_manager.py
index cc5621f2..bb3912e1 100755
--- a/collectors/0/hadoop_yarn_resource_manager.py
+++ b/collectors/0/hadoop_yarn_resource_manager.py
@@ -21,72 +21,70 @@
import os
import sys
import time
-
-try:
- import json
-except ImportError:
- json = None
+import json
import argparse
+from collectors.lib import utils
+from collectors.lib.hadoop_http import HadoopHttp
+
+
SRCDIR = os.path.join(os.path.dirname(__file__))
LIBDIR = os.path.join(SRCDIR, '..', 'lib')
sys.path.append(LIBDIR)
-# pylint: disable=wrong-import-position
-from collectors.lib import utils
-from collectors.lib.hadoop_http import HadoopHttp
+
REPLACEMENTS = {
}
class HadoopYarnResourceManager(HadoopHttp):
- """
- Class that will retrieve metrics from an Apache Hadoop Yarn Resource Manager JMX API
+ """
+ Class that will retrieve metrics from an Apache Hadoop Yarn Resource Manager JMX API
- Tested on Apache Hadoop 2.7
- """
+ Tested on Apache Hadoop 2.7
+ """
- def __init__(self, host='localhost', port=8088):
- super(HadoopYarnResourceManager, self).__init__('hadoop',
- 'yarn.resource_manager',
- host,
- port)
+ def __init__(self, host='localhost', port=8088):
+ super(HadoopYarnResourceManager, self).__init__('hadoop',
+ 'yarn.resource_manager',
+ host,
+ port)
- def emit(self):
- current_time = int(time.time())
- metrics = self.poll()
- for context, metric_name, value in metrics:
- for key, value in REPLACEMENTS.items():
- if any(_.startswith(key) for _ in context):
- context = value
- self.emit_metric(context, current_time, metric_name, value)
+ def emit(self):
+ current_time = int(time.time())
+ metrics = self.poll()
+ for context, metric_name, value in metrics:
+ for key, value in REPLACEMENTS.items():
+ if any(_.startswith(key) for _ in context):
+ context = value
+ self.emit_metric(context, current_time, metric_name, value)
# args are useful for testing but no given by TCollector so will inherit defaults normally
def main(args):
- """ Calls HadoopYarnResourceManager at interval secs
- and emits metrics to stdout for TCollector """
- if json is None:
- utils.err("This collector requires the `json' Python module.")
- return 13 # Ask tcollector not to respawn us
- utils.drop_privileges()
- parser = argparse.ArgumentParser()
- parser.add_argument('-H', '--host', default='localhost',
- help='Host to connect to (default: localhost)')
- parser.add_argument('-P', '--port', default=8088, type=int,
- help='Port to connect to (default: 8088)')
- parser.add_argument('-i', '--interval', default=90, type=int,
- help='Interval at which to emit metrics')
- args = parser.parse_args(args[1:])
- host = args.host
- port = args.port
- interval = args.interval
- yarn_service = HadoopYarnResourceManager(host=host, port=port)
- while True:
- yarn_service.emit()
- time.sleep(interval)
- return 0
+ """ Calls HadoopYarnResourceManager at interval secs
+ and emits metrics to stdout for TCollector """
+ if json is None:
+ utils.err("This collector requires the `json' Python module.")
+ return 13 # ask tcollector not to respawn us
+ utils.drop_privileges()
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-H', '--host', default='localhost',
+ help='Host to connect to (default: localhost)')
+ parser.add_argument('-P', '--port', default=8088, type=int,
+ help='Port to connect to (default: 8088)')
+ parser.add_argument('-i', '--interval', default=90, type=int,
+ help='Interval at which to emit metrics')
+ args = parser.parse_args(args[1:])
+ host = args.host
+ port = args.port
+ interval = args.interval
+ yarn_service = HadoopYarnResourceManager(host=host, port=port)
+ while True:
+ yarn_service.emit()
+ time.sleep(interval)
+ return 0
if __name__ == "__main__":
- sys.exit(main(sys.argv))
+ sys.exit(main(sys.argv))
diff --git a/collectors/0/haproxy.py b/collectors/0/haproxy.py
index 0fd82331..e131fa88 100755
--- a/collectors/0/haproxy.py
+++ b/collectors/0/haproxy.py
@@ -90,50 +90,53 @@
"srv_abrt": "server_aborted_data_transfers"
}
+
def haproxy_pid():
- """Finds out the pid of haproxy process"""
- try:
- pid = subprocess.check_output(["pidof", "-s", "haproxy"])
- except subprocess.CalledProcessError:
- return None
- return pid.rstrip()
+ """Finds out the pid of haproxy process"""
+ try:
+ pid = subprocess.check_output(["pidof", "-s", "haproxy"])
+ except subprocess.CalledProcessError:
+ return None
+ return pid.rstrip()
+
def find_conf_file(pid):
- """Returns the conf file of haproxy."""
- try:
- output = subprocess.check_output(["ps", "--no-headers", "-o", "cmd", pid])
- except subprocess.CalledProcessError as e:
- utils.err("HAProxy (pid %s) went away? %s" % (pid, e))
- return None
- return output.split("-f")[1].split()[0]
+ """Returns the conf file of haproxy."""
+ try:
+ output = subprocess.check_output(["ps", "--no-headers", "-o", "cmd", pid])
+ except subprocess.CalledProcessError as e:
+ utils.err("HAProxy (pid %s) went away? %s" % (pid, e))
+ return None
+ return output.split("-f")[1].split()[0]
+
def find_sock_file(conf_file):
- """Returns the unix socket file of haproxy."""
- try:
- fd = open(conf_file)
- except IOError as e:
- utils.err("Error: %s. Config file path is relative: %s" % (e, conf_file))
- return None
- try:
- for line in fd:
- if line.lstrip(" \t").startswith("stats socket"):
- sock_file = line.split()[2]
- if utils.is_sockfile(sock_file):
- return sock_file
- finally:
- fd.close()
+ """Returns the unix socket file of haproxy."""
+ try:
+ fd = open(conf_file)
+ except IOError as e:
+ utils.err("Error: %s. Config file path is relative: %s" % (e, conf_file))
+ return None
+ try:
+ for line in fd:
+ if line.lstrip(" \t").startswith("stats socket"):
+ sock_file = line.split()[2]
+ if utils.is_sockfile(sock_file):
+ return sock_file
+ finally:
+ fd.close()
def collect_stats(sock_file):
"""Collects stats from haproxy unix domain socket"""
- sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
try:
- sock.settimeout(COLLECTION_INTERVAL)
- sock.connect(sock_file)
- sock.send("show stat\n")
- statlines = sock.recv(10240).split('\n')
+ sock.settimeout(COLLECTION_INTERVAL)
+ sock.connect(sock_file)
+ sock.send("show stat\n")
+ statlines = sock.recv(10240).split('\n')
finally:
- sock.close()
+ sock.close()
ts = time.time()
# eat up any empty lines that may be present
@@ -183,32 +186,32 @@ def print_metric(line, metric, timestamp):
if not value:
value = 0
print("haproxy.%s %i %s source=%s cluster=%s"
- % (METRIC_NAMES[metric],
- timestamp,
- value,
- line["svname"],
- line["pxname"]))
+ % (METRIC_NAMES[metric],
+ timestamp,
+ value,
+ line["svname"],
+ line["pxname"]))
def main():
- pid = haproxy_pid()
- if not pid:
- utils.err("Error: HAProxy is not running")
- return 13 # Ask tcollector to not respawn us.
+ pid = haproxy_pid()
+ if not pid:
+ utils.err("Error: HAProxy is not running")
+ return 13 # ask tcollector to not respawn us.
- conf_file = find_conf_file(pid)
- if not conf_file:
- return 13
+ conf_file = find_conf_file(pid)
+ if not conf_file:
+ return 13
- sock_file = find_sock_file(conf_file)
- if sock_file is None:
- utils.err("Error: HAProxy is not listening on any unix domain socket")
- return 13
+ sock_file = find_sock_file(conf_file)
+ if sock_file is None:
+ utils.err("Error: HAProxy is not listening on any unix domain socket")
+ return 13
+ while True:
+ collect_stats(sock_file)
+ time.sleep(COLLECTION_INTERVAL)
- while True:
- collect_stats(sock_file)
- time.sleep(COLLECTION_INTERVAL)
if __name__ == "__main__":
- sys.exit(main())
+ sys.exit(main())
diff --git a/collectors/0/hbase_master.py b/collectors/0/hbase_master.py
index bf7f87d9..cb893bcb 100755
--- a/collectors/0/hbase_master.py
+++ b/collectors/0/hbase_master.py
@@ -15,11 +15,6 @@
import sys
import time
-try:
- import json
-except ImportError:
- json = None
-
from collectors.lib import utils
from collectors.lib.hadoop_http import HadoopHttp
@@ -48,9 +43,6 @@ def emit(self):
def main(args):
utils.drop_privileges()
- if json is None:
- utils.err("This collector requires the `json' Python module.")
- return 13 # Ask tcollector not to respawn us
hbase_service = HBaseMaster()
while True:
hbase_service.emit()
@@ -60,4 +52,3 @@ def main(args):
if __name__ == "__main__":
sys.exit(main(sys.argv))
-
diff --git a/collectors/0/hbase_regionserver.py b/collectors/0/hbase_regionserver.py
index e2b67b36..876783fd 100755
--- a/collectors/0/hbase_regionserver.py
+++ b/collectors/0/hbase_regionserver.py
@@ -14,20 +14,18 @@
import time
import re
-
-try:
- import json
-except ImportError:
- json = None
+import sys
from collectors.lib import utils
from collectors.lib.hadoop_http import HadoopHttp
+COLLECTION_INTERVAL = 15
EMIT_REGION = True
EXCLUDED_CONTEXTS = ("master")
REGION_METRIC_PATTERN = re.compile(r"[N|n]amespace_(.*)_table_(.*)_region_(.*)_metric_(.*)")
+
class HBaseRegionserver(HadoopHttp):
def __init__(self):
super(HBaseRegionserver, self).__init__("hbase", "regionserver", "localhost", 16030)
@@ -70,16 +68,12 @@ def emit(self):
def main(args):
utils.drop_privileges()
- if json is None:
- utils.err("This collector requires the `json' Python module.")
- return 13 # Ask tcollector not to respawn us
hbase_service = HBaseRegionserver()
while True:
hbase_service.emit()
- time.sleep(15)
+ time.sleep(COLLECTION_INTERVAL)
+
if __name__ == "__main__":
- import sys
sys.exit(main(sys.argv))
-
diff --git a/collectors/0/ifrate.py b/collectors/0/ifrate.py
index 2f6d7f82..f22ebf06 100755
--- a/collectors/0/ifrate.py
+++ b/collectors/0/ifrate.py
@@ -48,23 +48,25 @@
except ImportError:
ifrate_conf = None
-DEFAULT_COLLECTION_INTERVAL=15
-
+DEFAULT_COLLECTION_INTERVAL = 15
signal_received = None
+
+
def handlesignal(signum, stack):
global signal_received
signal_received = signum
+
def main():
"""top main loop"""
collection_interval=DEFAULT_COLLECTION_INTERVAL
- if(ifrate_conf):
+ if ifrate_conf:
config = ifrate_conf.get_config()
- collection_interval=config['collection_interval']
- interfaces=config['interfaces']
- report_packets=config['report_packets']
- merge_err_in_out=config['merge_err_in_out']
+ collection_interval = config['collection_interval']
+ interfaces = config['interfaces']
+ report_packets = config['report_packets']
+ merge_err_in_out = config['merge_err_in_out']
global signal_received
@@ -80,20 +82,20 @@ def main():
["netstat", "-I", intname, "-d", "-w", str(collection_interval)],
stdout=subprocess.PIPE,
))
- intnum+=1
+ intnum += 1
else:
- sys.exit(13) # we signal tcollector to not run us
+ return 13 # we signal tcollector to not run us
except OSError as e:
if e.errno == errno.ENOENT:
# it makes no sense to run this collector here
- sys.exit(13) # we signal tcollector to not run us
+ return 13 # we signal tcollector to not run us
raise
timestamp = 0
procnum = 0
while signal_received is None:
- if (procnum >= intnum):
+ if procnum >= intnum:
procnum=0
try:
line = p_net[procnum].stdout.readline()
@@ -109,14 +111,14 @@ def main():
if (re.match("^[0-9 ]+$",line)):
fields = line.split()
if len(fields) == 9:
- if(procnum == 0):
+ if procnum == 0:
timestamp = int(time.time())
print("ifrate.byt.in %s %s int=%s" % (timestamp, int(fields[3])/collection_interval, interfaces[procnum]))
print("ifrate.byt.out %s %s int=%s" % (timestamp, int(fields[6])/collection_interval, interfaces[procnum]))
- if(report_packets):
+ if report_packets:
print("ifrate.pkt.in %s %s int=%s" % (timestamp, int(fields[0])/collection_interval, interfaces[procnum]))
print("ifrate.pkt.out %s %s int=%s" % (timestamp, int(fields[4])/collection_interval, interfaces[procnum]))
- if(merge_err_in_out):
+ if merge_err_in_out:
print("ifrate.err %s %s int=%s" % (timestamp, (int(fields[1])+int(fields[5]))/collection_interval, interfaces[procnum]))
print("ifrate.drp %s %s int=%s" % (timestamp, (int(fields[2])+int(fields[8]))/collection_interval, interfaces[procnum]))
else:
@@ -127,7 +129,7 @@ def main():
print("ifrate.col %s %s int=%s" % (timestamp, int(fields[7])/collection_interval, interfaces[procnum]))
# analyze next process
- procnum+=1
+ procnum += 1
sys.stdout.flush()
@@ -142,8 +144,9 @@ def main():
p_net[procnum].wait()
# If no line at all has been proceeded (wrong interface name ?), we signal tcollector to not run us
- if(timestamp == 0):
- exit(13)
+ if timestamp == 0:
+ return 13
+
if __name__ == "__main__":
- main()
+ sys.exit(main())
diff --git a/collectors/0/ifstat.py b/collectors/0/ifstat.py
index 8e0aaf93..6a19865c 100755
--- a/collectors/0/ifstat.py
+++ b/collectors/0/ifstat.py
@@ -20,7 +20,7 @@
from collectors.lib import utils
-interval = 15 # seconds
+COLLECTION_INTERVAL = 15 # seconds
# /proc/net/dev has 16 fields, 8 for receive and 8 for transmit,
# defined below.
@@ -89,7 +89,8 @@ def direction(i):
% (FIELDS[i], ts, stats[i], intf, direction(i)))
sys.stdout.flush()
- time.sleep(interval)
+ time.sleep(COLLECTION_INTERVAL)
+
if __name__ == "__main__":
sys.exit(main())
diff --git a/collectors/0/iostat.py b/collectors/0/iostat.py
index 64810676..be2d434d 100755
--- a/collectors/0/iostat.py
+++ b/collectors/0/iostat.py
@@ -261,7 +261,7 @@ def main():
% (metric, FIELDS_PART[i], ts, values[i + 3], device))
else:
print("Cannot parse /proc/diskstats line: ", line, file=sys.stderr)
- exit(13) # tcollector does not restart collectors with return code 13
+ return 13 # tcollector does not restart collectors with return code 13
sys.stdout.flush()
time.sleep(COLLECTION_INTERVAL)
diff --git a/collectors/0/jolokia.py b/collectors/0/jolokia.py
index 8d54ebac..d3fd96ac 100755
--- a/collectors/0/jolokia.py
+++ b/collectors/0/jolokia.py
@@ -176,7 +176,7 @@ def parse_attribute(self, attr, not_tags=[]):
def main():
if not (jolokia_conf and jolokia_conf.enabled()):
utils.err("Jolokia collector disable by config")
- sys.exit(13)
+ return 13 # ask tcollector to not respawn us
utils.drop_privileges()
CONFIG = jolokia_conf.get_config()
@@ -217,7 +217,6 @@ def main():
break
# End while True
-if __name__ == "__main__":
- main()
-sys.exit(0)
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/collectors/0/mapr_metrics.py b/collectors/0/mapr_metrics.py
index eae7fa8f..dc0745ba 100755
--- a/collectors/0/mapr_metrics.py
+++ b/collectors/0/mapr_metrics.py
@@ -11,188 +11,193 @@
from collectors.etc import mapr_metrics_conf
from collectors.lib import utils
-
try:
- import requests
+ import requests
except ImportError:
- print >>sys.stderr, "Please install the requests module."
- sys.exit(1)
+ print >> sys.stderr, "Please install the requests module."
+ sys.exit(1)
try:
- from collectors.etc import mapr_metrics_conf
+ from collectors.etc import mapr_metrics_conf
except ImportError:
- utils.err("No mapr_metrics configuration found!")
- sys.exit(13)
+ utils.err("No mapr_metrics configuration found!")
+ sys.exit(13)
CONFIG = mapr_metrics_conf.get_config()
def get_metrics(webserver_url, username, password, params):
- try:
- r = requests.get(webserver_url, auth=(username,password), verify=False, params=params)
- except requests.exceptions.ConnectionError as error:
- print >>sys.stderr, "Error connecting: %s" % error
- utils.err("Connection error: %s" % error)
- raise
-
- try:
- r.raise_for_status()
- except requests.exceptions.HTTPError as error:
- print >>sys.stderr, "Request was not successful: %s" % error
- utils.err("HTTP error getting metrics from '%s' - %s" % (webserver_url, error))
- return 13 # tell tcollector to not respawn
-
- response = r.json()
- try:
- data = response['data']
- except KeyError as e:
- print >>sys.stderr, "Did not get a 'data' key in the response."
- print >>sys.stderr, response
- raise
- return data
+ try:
+ r = requests.get(webserver_url, auth=(username, password), verify=False, params=params)
+ except requests.exceptions.ConnectionError as error:
+ print >> sys.stderr, "Error connecting: %s" % error
+ utils.err("Connection error: %s" % error)
+ raise
+
+ try:
+ r.raise_for_status()
+ except requests.exceptions.HTTPError as error:
+ print >> sys.stderr, "Request was not successful: %s" % error
+ utils.err("HTTP error getting metrics from '%s' - %s" % (webserver_url, error))
+ return 13 # tell tcollector to not respawn
+
+ response = r.json()
+ try:
+ data = response['data']
+ except KeyError as e:
+ print >> sys.stderr, "Did not get a 'data' key in the response."
+ print >> sys.stderr, response
+ raise
+ return data
+
def main():
- schema = "https"
+ schema = "https"
- username = CONFIG['username']
- password = CONFIG['password']
- webserver = CONFIG['webserver']
- port = CONFIG['port']
- if CONFIG['no_ssl']:
- schema = "http"
- webserver_url = "%s://%s:%d/rest/node/metrics" % (schema, webserver, port)
+ username = CONFIG['username']
+ password = CONFIG['password']
+ webserver = CONFIG['webserver']
+ port = CONFIG['port']
+ if CONFIG['no_ssl']:
+ schema = "http"
+ webserver_url = "%s://%s:%d/rest/node/metrics" % (schema, webserver, port)
+
+ m = Metrics2TSD(webserver_url, username, password)
+ m.run()
- m = Metrics2TSD(webserver_url, username, password)
- m.run()
class Metrics2TSD:
- def __init__(self, webserver_url, username='mapr', password='mapr'):
- self.metric_template = Template('mapr.$grouping.$metric')
- self.webserver_url = webserver_url
- self.username = username
- self.password = password
- self.failed_attempts = 0
- self.last_values = { }
-
- self.cluster_name = self.get_cluster_name()
-
- def get_cluster_name(self):
- cluster_name = None
- with open('/opt/mapr/conf/mapr-clusters.conf', 'r') as clusters_conf:
- firstline = clusters_conf.readline()
- cluster_name = re.split('\s+', firstline)[0]
- return re.sub('\.', '_', cluster_name)
-
- def run(self):
- seconds_delay = CONFIG['interval']
-
- while True:
- end = datetime.datetime.now()
- start = end - timedelta(seconds=seconds_delay)
- ms_start = int(start.strftime('%s')) * 1000
- ms_end = int(end.strftime('%s')) * 1000
- nodename = platform.node().split('.')[0] # if node() returns the fqdn, the metrics can't be retrieved
- params = { 'nodes': nodename, 'start': ms_start, 'end': ms_end }
-
- try:
- all_metrics = get_metrics(self.webserver_url, self.username, self.password, params)
+ def __init__(self, webserver_url, username='mapr', password='mapr'):
+ self.metric_template = Template('mapr.$grouping.$metric')
+ self.webserver_url = webserver_url
+ self.username = username
+ self.password = password
self.failed_attempts = 0
- except requests.exceptions.ConnectionError as error:
- self.failed_attempts += 1
- utils.err("Error connecting to %s, have experienced %d errors so far." % (self.webserver_url, self.failed_attempts))
- if self.failed_attempts > 5:
- print >>sys.stderr, "Failed 5 times, exiting."
- return 13
- continue
-
- if len(all_metrics) > 0:
- for d in all_metrics[-1:]:
- node = d['NODE']
- timestamp = int(d['TIMESTAMP']) / 1000
- tags = {
- 'node': node,
- 'cluster': self.cluster_name
- }
-
- for group in ('DISKS','CPUS','NETWORK'):
- if group in d:
- self.group_metrics(group, self.last_values, d, tags=tags)
- try:
- self.send_gauge('mapr.memory.used', int(d['MEMORYUSED']) * (1024*1024), timestamp, tags=tags)
- except KeyError as e:
- utils.err('%s not in metrics data.' % e)
-
- try:
- self.send_gauge('mapr.mfs.available', int(d['SERVAVAILSIZEMB']) * (1024 * 1024), timestamp, tags=tags)
- except KeyError as e:
- utils.err('%s not in metrics data.' % e)
-
- try:
- self.send_gauge('mapr.mfs.used', int(d['SERVUSEDSIZEMB']) * (1024 * 1024), timestamp, tags=tags)
- except KeyError as e:
- utils.err('%s not in metrics data.' % e)
-
- try:
- rpccount_metric = self.metric_template.substitute(grouping='rpc', metric='count')
- if rpccount_metric in self.last_values:
- self.send_counter(rpccount_metric, self.last_values[rpccount_metric], d['RPCCOUNT'], timestamp, tags=tags)
- self.last_values[rpccount_metric] = d['RPCCOUNT']
- except KeyError as e:
- utils.err('%s is not in metrics data.' % e)
-
- try:
- rpcinbytes_metric = self.metric_template.substitute(grouping='rpc', metric='inbytes')
- if rpcinbytes_metric in self.last_values:
- self.send_counter(rpcinbytes_metric, self.last_values[rpcinbytes_metric], d['RPCINBYTES'], timestamp, tags=tags)
- self.last_values[rpcinbytes_metric] = d['RPCINBYTES']
- except KeyError as e:
- utils.err('%s is not in metrics data.' % e)
-
- try:
- rpcoutbytes_metric = self.metric_template.substitute(grouping='rpc', metric='outbytes')
- if rpcoutbytes_metric in self.last_values:
- self.send_counter(rpcoutbytes_metric, self.last_values[rpcoutbytes_metric], d['RPCOUTBYTES'], timestamp, tags=tags)
- self.last_values[rpcoutbytes_metric] = d['RPCOUTBYTES']
- except KeyError as e:
- utils.err('%s is not in metrics data.' % e)
- time.sleep(seconds_delay)
-
-
- def group_metrics(self, group, last_values, all_metrics, tags={}):
- node = all_metrics['NODE']
- timestamp = int(all_metrics['TIMESTAMP']) / 1000
-
- for (obj, obj_metrics) in all_metrics[group].items():
- for (metric_name, value) in obj_metrics.items():
- t = tags.copy()
- if group == 'DISKS':
- t['disk'] = obj
- if metric_name.endswith('KB'):
- metric_name = re.sub("KB", "BYTES", metric_name)
- value = value * 1024
- if group == 'CPUS':
- t['cpu'] = obj
- if group == 'NETWORK':
- t['interface'] = obj
- metric = self.metric_template.substitute(grouping=group.lower(), metric=metric_name)
- self.print_opentsdb_message(metric, timestamp, value, t)
-
- def print_opentsdb_message(self, metric, timestamp, value, tags):
- tag_string = " ".join(map(lambda x: "%s=%s" % x, tags.items()))
- print("%s %i %d %s" % (metric, timestamp, value, tag_string))
-
- def send_gauge(self, metric, value, timestamp, tags={}):
- self.print_opentsdb_message(metric, timestamp, value, tags)
-
- def send_counter(self, metric, last_value, value, timestamp, tags={}):
- delta = value - last_value
- self.print_opentsdb_message(metric, timestamp, delta, tags)
+ self.last_values = {}
+
+ self.cluster_name = self.get_cluster_name()
+
+ def get_cluster_name(self):
+ cluster_name = None
+ with open('/opt/mapr/conf/mapr-clusters.conf', 'r') as clusters_conf:
+ firstline = clusters_conf.readline()
+ cluster_name = re.split('\s+', firstline)[0]
+ return re.sub('\.', '_', cluster_name)
+
+ def run(self):
+ seconds_delay = CONFIG['interval']
+
+ while True:
+ end = datetime.datetime.now()
+ start = end - timedelta(seconds=seconds_delay)
+ ms_start = int(start.strftime('%s')) * 1000
+ ms_end = int(end.strftime('%s')) * 1000
+ nodename = platform.node().split('.')[0] # if node() returns the fqdn, the metrics can't be retrieved
+ params = {'nodes': nodename, 'start': ms_start, 'end': ms_end}
+
+ try:
+ all_metrics = get_metrics(self.webserver_url, self.username, self.password, params)
+ self.failed_attempts = 0
+ except requests.exceptions.ConnectionError as error:
+ self.failed_attempts += 1
+ utils.err("Error connecting to %s, have experienced %d errors so far." % (
+ self.webserver_url, self.failed_attempts))
+ if self.failed_attempts > 5:
+ print >> sys.stderr, "Failed 5 times, exiting."
+ return 13
+ continue
+
+ if len(all_metrics) > 0:
+ for d in all_metrics[-1:]:
+ node = d['NODE']
+ timestamp = int(d['TIMESTAMP']) / 1000
+ tags = {
+ 'node': node,
+ 'cluster': self.cluster_name
+ }
+
+ for group in ('DISKS', 'CPUS', 'NETWORK'):
+ if group in d:
+ self.group_metrics(group, self.last_values, d, tags=tags)
+ try:
+ self.send_gauge('mapr.memory.used', int(d['MEMORYUSED']) * (1024 * 1024), timestamp, tags=tags)
+ except KeyError as e:
+ utils.err('%s not in metrics data.' % e)
+
+ try:
+ self.send_gauge('mapr.mfs.available', int(d['SERVAVAILSIZEMB']) * (1024 * 1024), timestamp,
+ tags=tags)
+ except KeyError as e:
+ utils.err('%s not in metrics data.' % e)
+
+ try:
+ self.send_gauge('mapr.mfs.used', int(d['SERVUSEDSIZEMB']) * (1024 * 1024), timestamp, tags=tags)
+ except KeyError as e:
+ utils.err('%s not in metrics data.' % e)
+
+ try:
+ rpccount_metric = self.metric_template.substitute(grouping='rpc', metric='count')
+ if rpccount_metric in self.last_values:
+ self.send_counter(rpccount_metric, self.last_values[rpccount_metric], d['RPCCOUNT'],
+ timestamp, tags=tags)
+ self.last_values[rpccount_metric] = d['RPCCOUNT']
+ except KeyError as e:
+ utils.err('%s is not in metrics data.' % e)
+
+ try:
+ rpcinbytes_metric = self.metric_template.substitute(grouping='rpc', metric='inbytes')
+ if rpcinbytes_metric in self.last_values:
+ self.send_counter(rpcinbytes_metric, self.last_values[rpcinbytes_metric], d['RPCINBYTES'],
+ timestamp, tags=tags)
+ self.last_values[rpcinbytes_metric] = d['RPCINBYTES']
+ except KeyError as e:
+ utils.err('%s is not in metrics data.' % e)
+
+ try:
+ rpcoutbytes_metric = self.metric_template.substitute(grouping='rpc', metric='outbytes')
+ if rpcoutbytes_metric in self.last_values:
+ self.send_counter(rpcoutbytes_metric, self.last_values[rpcoutbytes_metric],
+ d['RPCOUTBYTES'], timestamp, tags=tags)
+ self.last_values[rpcoutbytes_metric] = d['RPCOUTBYTES']
+ except KeyError as e:
+ utils.err('%s is not in metrics data.' % e)
+ time.sleep(seconds_delay)
+
+ def group_metrics(self, group, last_values, all_metrics, tags={}):
+ node = all_metrics['NODE']
+ timestamp = int(all_metrics['TIMESTAMP']) / 1000
+
+ for (obj, obj_metrics) in all_metrics[group].items():
+ for (metric_name, value) in obj_metrics.items():
+ t = tags.copy()
+ if group == 'DISKS':
+ t['disk'] = obj
+ if metric_name.endswith('KB'):
+ metric_name = re.sub("KB", "BYTES", metric_name)
+ value = value * 1024
+ if group == 'CPUS':
+ t['cpu'] = obj
+ if group == 'NETWORK':
+ t['interface'] = obj
+ metric = self.metric_template.substitute(grouping=group.lower(), metric=metric_name)
+ self.print_opentsdb_message(metric, timestamp, value, t)
+
+ def print_opentsdb_message(self, metric, timestamp, value, tags):
+ tag_string = " ".join(map(lambda x: "%s=%s" % x, tags.items()))
+ print("%s %i %d %s" % (metric, timestamp, value, tag_string))
+
+ def send_gauge(self, metric, value, timestamp, tags={}):
+ self.print_opentsdb_message(metric, timestamp, value, tags)
+
+ def send_counter(self, metric, last_value, value, timestamp, tags={}):
+ delta = value - last_value
+ self.print_opentsdb_message(metric, timestamp, delta, tags)
if __name__ == "__main__":
- if mapr_metrics_conf.enabled():
- sys.stdin.close()
- sys.exit(main())
- else:
- utils.err("Enable the mapr_metrics collector if you want MapR stats.")
- sys.exit(13)
+ if mapr_metrics_conf.enabled():
+ sys.stdin.close()
+ sys.exit(main())
+ else:
+ utils.err("Enable the mapr_metrics collector if you want MapR stats.")
+ sys.exit(13)
diff --git a/collectors/0/mongo.py b/collectors/0/mongo.py
index 0f640ff5..1f58c571 100755
--- a/collectors/0/mongo.py
+++ b/collectors/0/mongo.py
@@ -62,11 +62,12 @@
('opcounters', ('command', 'delete', 'getmore', 'insert', 'query', 'update')),
)
+
def main():
utils.drop_privileges()
if pymongo is None:
- print("error: Python module `pymongo' is missing", file=sys.stderr)
- return 13
+ print("error: Python module `pymongo' is missing", file=sys.stderr)
+ return 13 # ask tcollector to not respawn us
c = pymongo.Connection(host=HOST, port=PORT)
@@ -90,5 +91,6 @@ def main():
sys.stdout.flush()
time.sleep(INTERVAL)
+
if __name__ == '__main__':
sys.exit(main())
diff --git a/collectors/0/mongo3.py b/collectors/0/mongo3.py
index 2fb56564..a17d4034 100755
--- a/collectors/0/mongo3.py
+++ b/collectors/0/mongo3.py
@@ -18,7 +18,6 @@
import sys
import time
-import os
try:
import pymongo
except ImportError:
@@ -205,6 +204,7 @@ def runServerStatus(c):
for k, v in cur.items():
print('mongo.%s %d %s mode=%s' % (metric, ts, v, k))
+
def runDbStats(c):
for db_name in DB_NAMES:
res = c[db_name].command('dbStats')
@@ -233,6 +233,7 @@ def runDbStats(c):
continue
print('mongo.rs.%s %d %s replica=%s db=%s' % (metric, ts, cur, replica_name, db_name))
+
def runReplSetGetStatus(c):
res = c.admin.command('replSetGetStatus')
ts = int(time.time())
@@ -258,6 +259,7 @@ def runReplSetGetStatus(c):
continue
print('mongo.replica.%s %d %s replica_set=%s replica=%s replica_state=%s replica_health=%s' % (metric, ts, cur, replica_set_name, replica_name, replica_state, replica_health))
+
def loadEnv():
global USER, PASS, INTERVAL, DB_NAMES, CONFIG_CONN, MONGOS_CONN, REPLICA_CONN
for item in mongodb3_conf.get_settings()['db'].split(','):
@@ -281,13 +283,14 @@ def loadEnv():
PASS = mongodb3_conf.get_settings()['password']
INTERVAL = mongodb3_conf.get_settings()['interval']
+
def main():
loadEnv()
utils.drop_privileges()
if pymongo is None:
print("error: Python module `pymongo' is missing", file=sys.stderr)
- return 13
+ return 13 # ask tcollector to not respawn us
for index, item in enumerate(CONFIG_CONN, start=0):
conn = pymongo.MongoClient(host=item['host'], port=item['port'])
@@ -320,5 +323,6 @@ def main():
sys.stdout.flush()
time.sleep(INTERVAL)
+
if __name__ == '__main__':
sys.exit(main())
diff --git a/collectors/0/mountstats.py b/collectors/0/mountstats.py
index ad31bddf..01b1e1be 100755
--- a/collectors/0/mountstats.py
+++ b/collectors/0/mountstats.py
@@ -79,22 +79,11 @@
# proc.mountstats.bytes.writepages 1464196613 2477054 nfshost=fls1.sys.lab1.syseng.tmcs nfsvol=/vol/vol0
"""
-import os
-import socket
import sys
import time
-PY3 = sys.version_info[0] > 2
-if PY3:
- from hashlib import md5
+from hashlib import md5
- def md5_digest(line):
- return md5(line.encode("utf8")).digest()
-else:
- import md5 # pylint: disable=import-error
-
- def md5_digest(line):
- return md5.new(line).digest()
COLLECTION_INTERVAL = 10 # seconds
@@ -107,12 +96,17 @@ def md5_digest(line):
# RPC_FIELDS is the individual metric fields on the RPC metric lines
RPC_FIELDS = ['ops', 'txs', 'timeouts', 'txbytes', 'rxbytes', 'qtime', 'rttime', 'totaltime']
+
+def md5_digest(line):
+ return md5(line.encode("utf8")).digest()
+
+
def main():
"""nfsstats main loop."""
try:
f_nfsstats = open("/proc/self/mountstats", "r")
except:
- sys.exit(13)
+ return 13
while True:
device = None
@@ -174,7 +168,6 @@ def main():
for i in range(1, len(RPC_FIELDS) + 1):
rpc_metrics[device]['other'][RPC_FIELDS[i-1]] += int(values[i])
-
for device in rpc_metrics:
# Skip the duplicates
if 'dupe' in rpc_metrics[device]:
@@ -195,6 +188,5 @@ def main():
time.sleep(COLLECTION_INTERVAL)
-
if __name__ == "__main__":
- main()
+ sys.exit(main())
diff --git a/collectors/0/mysql.py b/collectors/0/mysql.py
index c0064c85..15fe5452 100755
--- a/collectors/0/mysql.py
+++ b/collectors/0/mysql.py
@@ -20,19 +20,15 @@
import sys
import time
-PY3 = sys.version_info[0] > 2
-if PY3:
- INTEGER_TYPES = (int, )
-else:
- INTEGER_TYPES = (int, long) # pylint: disable=undefined-variable
+from collectors.etc import mysqlconf
+from collectors.lib import utils
+
+INTEGER_TYPES = (int,)
try:
- import MySQLdb
+ import MySQLdb
except ImportError:
- MySQLdb = None # This is handled gracefully in main()
-
-from collectors.etc import mysqlconf
-from collectors.lib import utils
+ MySQLdb = None # This is handled gracefully in main()
COLLECTION_INTERVAL = 15 # seconds
CONNECT_TIMEOUT = 2 # seconds
@@ -40,368 +36,373 @@
DB_REFRESH_INTERVAL = 60 # seconds
# Usual locations where to find the default socket file.
DEFAULT_SOCKFILES = set([
- "/tmp/mysql.sock", # MySQL's own default.
- "/var/lib/mysql/mysql.sock", # RH-type / RPM systems.
- "/var/run/mysqld/mysqld.sock", # Debian-type systems.
+ "/tmp/mysql.sock", # MySQL's own default.
+ "/var/lib/mysql/mysql.sock", # RH-type / RPM systems.
+ "/var/run/mysqld/mysqld.sock", # Debian-type systems.
])
# Directories under which to search additional socket files.
SEARCH_DIRS = [
- "/var/lib/mysql",
+ "/var/lib/mysql",
]
-class DB(object):
- """Represents a MySQL server (as we can monitor more than 1 MySQL)."""
- def __init__(self, sockfile, dbname, db, cursor, version):
- """Constructor.
-
- Args:
- sockfile: Path to the socket file.
- dbname: Name of the database for that socket file.
- db: A MySQLdb connection opened to that socket file.
- cursor: A cursor acquired from that connection.
- version: What version is this MySQL running (from `SELECT VERSION()').
- """
- self.sockfile = sockfile
- self.dbname = dbname
- self.db = db
- self.cursor = cursor
- self.version = version
- self.master = None
- self.slave_bytes_executed = None
- self.relay_bytes_relayed = None
-
- version = version.split(".")
- try:
- self.major = int(version[0])
- self.medium = int(version[1])
- except (ValueError, IndexError) as e:
- self.major = self.medium = 0
-
- def __str__(self):
- return "DB(%r, %r, version=%r)" % (self.sockfile, self.dbname,
- self.version)
-
- def __repr__(self):
- return self.__str__()
-
- def isShowGlobalStatusSafe(self):
- """Returns whether or not SHOW GLOBAL STATUS is safe to run."""
- # We can't run SHOW GLOBAL STATUS on versions prior to 5.1 because it
- # locks the entire database for too long and severely impacts traffic.
- return self.major > 5 or (self.major == 5 and self.medium >= 1)
-
- def query(self, sql):
- """Executes the given SQL statement and returns a sequence of rows."""
- assert self.cursor, "%s already closed?" % (self,)
- try:
- self.cursor.execute(sql)
- except MySQLdb.OperationalError as exc:
- if exc.errno != 2006: # "MySQL server has gone away" # pylint:disable=no-member
- raise
- self._reconnect()
- return self.cursor.fetchall()
-
- def close(self):
- """Closes the connection to this MySQL server."""
- if self.cursor:
- self.cursor.close()
- self.cursor = None
- if self.db:
- self.db.close()
- self.db = None
-
- def _reconnect(self):
- """Reconnects to this MySQL server."""
- self.close()
- self.db = mysql_connect(self.sockfile)
- self.cursor = self.db.cursor()
+class DB(object):
+ """Represents a MySQL server (as we can monitor more than 1 MySQL)."""
+
+ def __init__(self, sockfile, dbname, db, cursor, version):
+ """Constructor.
+
+ Args:
+ sockfile: Path to the socket file.
+ dbname: Name of the database for that socket file.
+ db: A MySQLdb connection opened to that socket file.
+ cursor: A cursor acquired from that connection.
+ version: What version is this MySQL running (from `SELECT VERSION()').
+ """
+ self.sockfile = sockfile
+ self.dbname = dbname
+ self.db = db
+ self.cursor = cursor
+ self.version = version
+ self.master = None
+ self.slave_bytes_executed = None
+ self.relay_bytes_relayed = None
+
+ version = version.split(".")
+ try:
+ self.major = int(version[0])
+ self.medium = int(version[1])
+ except (ValueError, IndexError) as e:
+ self.major = self.medium = 0
+
+ def __str__(self):
+ return "DB(%r, %r, version=%r)" % (self.sockfile, self.dbname,
+ self.version)
+
+ def __repr__(self):
+ return self.__str__()
+
+ def isShowGlobalStatusSafe(self):
+ """Returns whether or not SHOW GLOBAL STATUS is safe to run."""
+ # We can't run SHOW GLOBAL STATUS on versions prior to 5.1 because it
+ # locks the entire database for too long and severely impacts traffic.
+ return self.major > 5 or (self.major == 5 and self.medium >= 1)
+
+ def query(self, sql):
+ """Executes the given SQL statement and returns a sequence of rows."""
+ assert self.cursor, "%s already closed?" % (self,)
+ try:
+ self.cursor.execute(sql)
+ except MySQLdb.OperationalError as exc:
+ if exc.errno != 2006: # "MySQL server has gone away" # pylint:disable=no-member
+ raise
+ self._reconnect()
+ return self.cursor.fetchall()
+
+ def close(self):
+ """Closes the connection to this MySQL server."""
+ if self.cursor:
+ self.cursor.close()
+ self.cursor = None
+ if self.db:
+ self.db.close()
+ self.db = None
+
+ def _reconnect(self):
+ """Reconnects to this MySQL server."""
+ self.close()
+ self.db = mysql_connect(self.sockfile)
+ self.cursor = self.db.cursor()
def mysql_connect(sockfile):
- """Connects to the MySQL server using the specified socket file."""
- user, passwd = mysqlconf.get_user_password(sockfile)
- return MySQLdb.connect(unix_socket=sockfile,
- connect_timeout=CONNECT_TIMEOUT,
- user=user, passwd=passwd)
+ """Connects to the MySQL server using the specified socket file."""
+ user, passwd = mysqlconf.get_user_password(sockfile)
+ return MySQLdb.connect(unix_socket=sockfile,
+ connect_timeout=CONNECT_TIMEOUT,
+ user=user, passwd=passwd)
def todict(db, row):
- """Transforms a row (returned by DB.query) into a dict keyed by column names.
-
- Args:
- db: The DB instance from which this row was obtained.
- row: A row as returned by DB.query
- """
- d = {}
- for i, field in enumerate(db.cursor.description):
- column = field[0].lower() # Lower-case to normalize field names.
- d[column] = row[i]
- return d
+ """Transforms a row (returned by DB.query) into a dict keyed by column names.
+
+ Args:
+ db: The DB instance from which this row was obtained.
+ row: A row as returned by DB.query
+ """
+ d = {}
+ for i, field in enumerate(db.cursor.description):
+ column = field[0].lower() # Lower-case to normalize field names.
+ d[column] = row[i]
+ return d
+
def get_dbname(sockfile):
- """Returns the name of the DB based on the path to the socket file."""
- if sockfile in DEFAULT_SOCKFILES:
- return "default"
- m = re.search("/mysql-(.+)/[^.]+\.sock$", sockfile)
- if not m:
- utils.err("error: couldn't guess the name of the DB for " + sockfile)
- return None
- return m.group(1)
+ """Returns the name of the DB based on the path to the socket file."""
+ if sockfile in DEFAULT_SOCKFILES:
+ return "default"
+ m = re.search("/mysql-(.+)/[^.]+\.sock$", sockfile)
+ if not m:
+ utils.err("error: couldn't guess the name of the DB for " + sockfile)
+ return None
+ return m.group(1)
def find_sockfiles():
- """Returns a list of paths to socket files to monitor."""
- paths = []
- # Look for socket files.
- for dir in SEARCH_DIRS:
- if not os.path.isdir(dir) or not os.access(dir, os.R_OK):
- continue
- for name in os.listdir(dir):
- subdir = os.path.join(dir, name)
- if not os.path.isdir(subdir) or not os.access(subdir, os.R_OK):
- continue
- for subname in os.listdir(subdir):
- path = os.path.join(subdir, subname)
- if utils.is_sockfile(path):
- paths.append(path)
- break # We only expect 1 socket file per DB, so get out.
- # Try the default locations.
- for sockfile in DEFAULT_SOCKFILES:
- if not utils.is_sockfile(sockfile):
- continue
- paths.append(sockfile)
- return paths
+ """Returns a list of paths to socket files to monitor."""
+ paths = []
+ # Look for socket files.
+ for dir in SEARCH_DIRS:
+ if not os.path.isdir(dir) or not os.access(dir, os.R_OK):
+ continue
+ for name in os.listdir(dir):
+ subdir = os.path.join(dir, name)
+ if not os.path.isdir(subdir) or not os.access(subdir, os.R_OK):
+ continue
+ for subname in os.listdir(subdir):
+ path = os.path.join(subdir, subname)
+ if utils.is_sockfile(path):
+ paths.append(path)
+ break # We only expect 1 socket file per DB, so get out.
+ # Try the default locations.
+ for sockfile in DEFAULT_SOCKFILES:
+ if not utils.is_sockfile(sockfile):
+ continue
+ paths.append(sockfile)
+ return paths
def find_databases(dbs=None):
- """Returns a map of dbname (string) to DB instances to monitor.
-
- Args:
- dbs: A map of dbname (string) to DB instances already monitored.
- This map will be modified in place if it's not None.
- """
- sockfiles = find_sockfiles()
- if dbs is None:
- dbs = {}
- for sockfile in sockfiles:
- dbname = get_dbname(sockfile)
- if dbname in dbs:
- continue
- if not dbname:
- continue
- try:
- db = mysql_connect(sockfile)
- cursor = db.cursor()
- cursor.execute("SELECT VERSION()")
- except (EnvironmentError, EOFError, RuntimeError, socket.error,
- MySQLdb.MySQLError) as e:
- utils.err("Couldn't connect to %s: %s" % (sockfile, e))
- continue
- version = cursor.fetchone()[0]
- dbs[dbname] = DB(sockfile, dbname, db, cursor, version)
- return dbs
+ """Returns a map of dbname (string) to DB instances to monitor.
+
+ Args:
+ dbs: A map of dbname (string) to DB instances already monitored.
+ This map will be modified in place if it's not None.
+ """
+ sockfiles = find_sockfiles()
+ if dbs is None:
+ dbs = {}
+ for sockfile in sockfiles:
+ dbname = get_dbname(sockfile)
+ if dbname in dbs:
+ continue
+ if not dbname:
+ continue
+ try:
+ db = mysql_connect(sockfile)
+ cursor = db.cursor()
+ cursor.execute("SELECT VERSION()")
+ except (EnvironmentError, EOFError, RuntimeError, socket.error,
+ MySQLdb.MySQLError) as e:
+ utils.err("Couldn't connect to %s: %s" % (sockfile, e))
+ continue
+ version = cursor.fetchone()[0]
+ dbs[dbname] = DB(sockfile, dbname, db, cursor, version)
+ return dbs
def now():
- return int(time.time())
+ return int(time.time())
def isyes(s):
- if s.lower() == "yes":
- return 1
- return 0
+ if s.lower() == "yes":
+ return 1
+ return 0
def collectInnodbStatus(db):
- """Collects and prints InnoDB stats about the given DB instance."""
- ts = now()
- def printmetric(metric, value, tags=""):
- print("mysql.%s %d %s schema=%s%s" % (metric, ts, value, db.dbname, tags))
-
- innodb_status = db.query("SHOW ENGINE INNODB STATUS")[0][2]
- m = re.search("^(\d{6}\s+\d{1,2}:\d\d:\d\d) INNODB MONITOR OUTPUT$",
- innodb_status, re.M)
- if m: # If we have it, try to use InnoDB's own timestamp.
- ts = int(time.mktime(time.strptime(m.group(1), "%y%m%d %H:%M:%S")))
-
- line = None
- def match(regexp):
- return re.match(regexp, line)
-
- for line in innodb_status.split("\n"):
- # SEMAPHORES
- m = match("OS WAIT ARRAY INFO: reservation count (\d+), signal count (\d+)")
- if m:
- printmetric("innodb.oswait_array.reservation_count", m.group(1))
- printmetric("innodb.oswait_array.signal_count", m.group(2))
- continue
- m = match("Mutex spin waits (\d+), rounds (\d+), OS waits (\d+)")
- if m:
- printmetric("innodb.locks.spin_waits", m.group(1), " type=mutex")
- printmetric("innodb.locks.rounds", m.group(2), " type=mutex")
- printmetric("innodb.locks.os_waits", m.group(3), " type=mutex")
- continue
- m = match("RW-shared spins (\d+), OS waits (\d+);"
- " RW-excl spins (\d+), OS waits (\d+)")
- if m:
- printmetric("innodb.locks.spin_waits", m.group(1), " type=rw-shared")
- printmetric("innodb.locks.os_waits", m.group(2), " type=rw-shared")
- printmetric("innodb.locks.spin_waits", m.group(3), " type=rw-exclusive")
- printmetric("innodb.locks.os_waits", m.group(4), " type=rw-exclusive")
- continue
- # GG 20141015 - RW-shared and RW-excl got separate lines and rounds in 5.5+
- m = match("RW-shared spins (\d+), rounds (\d+), OS waits (\d+)")
- if m:
- printmetric("locks.spin_waits", m.group(1), " type=rw-shared")
- printmetric("locks.rounds", m.group(2), " type=rw-shared")
- printmetric("locks.os_waits", m.group(3), " type=rw-shared")
- continue
- m = match("RW-excl spins (\d+), rounds (\d+), OS waits (\d+)")
- if m:
- printmetric("locks.spin_waits", m.group(1), " type=rw-exclusive")
- printmetric("locks.rounds", m.group(2), " type=rw-exclusive")
- printmetric("locks.os_waits", m.group(3), " type=rw-exclusive")
- continue
- # INSERT BUFFER AND ADAPTIVE HASH INDEX
- # TODO(tsuna): According to the code in ibuf0ibuf.c, this line and
- # the following one can appear multiple times. I've never seen this.
- # If that happens, we need to aggregate the values here instead of
- # printing them directly.
- m = match("Ibuf: size (\d+), free list len (\d+), seg size (\d+),")
- if m:
- printmetric("innodb.ibuf.size", m.group(1))
- printmetric("innodb.ibuf.free_list_len", m.group(2))
- printmetric("innodb.ibuf.seg_size", m.group(3))
- continue
- m = match("(\d+) inserts, (\d+) merged recs, (\d+) merges")
- if m:
- printmetric("innodb.ibuf.inserts", m.group(1))
- printmetric("innodb.ibuf.merged_recs", m.group(2))
- printmetric("innodb.ibuf.merges", m.group(3))
- continue
- # ROW OPERATIONS
- m = match("\d+ queries inside InnoDB, (\d+) queries in queue")
- if m:
- printmetric("innodb.queries_queued", m.group(1))
- continue
- m = match("(\d+) read views open inside InnoDB")
- if m:
- printmetric("innodb.opened_read_views", m.group(1))
- continue
- # TRANSACTION
- m = match("History list length (\d+)")
- if m:
- printmetric("innodb.history_list_length", m.group(1))
- continue
+ """Collects and prints InnoDB stats about the given DB instance."""
+ ts = now()
+
+ def printmetric(metric, value, tags=""):
+ print("mysql.%s %d %s schema=%s%s" % (metric, ts, value, db.dbname, tags))
+
+ innodb_status = db.query("SHOW ENGINE INNODB STATUS")[0][2]
+ m = re.search("^(\d{6}\s+\d{1,2}:\d\d:\d\d) INNODB MONITOR OUTPUT$",
+ innodb_status, re.M)
+ if m: # If we have it, try to use InnoDB's own timestamp.
+ ts = int(time.mktime(time.strptime(m.group(1), "%y%m%d %H:%M:%S")))
+
+ line = None
+
+ def match(regexp):
+ return re.match(regexp, line)
+
+ for line in innodb_status.split("\n"):
+ # SEMAPHORES
+ m = match("OS WAIT ARRAY INFO: reservation count (\d+), signal count (\d+)")
+ if m:
+ printmetric("innodb.oswait_array.reservation_count", m.group(1))
+ printmetric("innodb.oswait_array.signal_count", m.group(2))
+ continue
+ m = match("Mutex spin waits (\d+), rounds (\d+), OS waits (\d+)")
+ if m:
+ printmetric("innodb.locks.spin_waits", m.group(1), " type=mutex")
+ printmetric("innodb.locks.rounds", m.group(2), " type=mutex")
+ printmetric("innodb.locks.os_waits", m.group(3), " type=mutex")
+ continue
+ m = match("RW-shared spins (\d+), OS waits (\d+);"
+ " RW-excl spins (\d+), OS waits (\d+)")
+ if m:
+ printmetric("innodb.locks.spin_waits", m.group(1), " type=rw-shared")
+ printmetric("innodb.locks.os_waits", m.group(2), " type=rw-shared")
+ printmetric("innodb.locks.spin_waits", m.group(3), " type=rw-exclusive")
+ printmetric("innodb.locks.os_waits", m.group(4), " type=rw-exclusive")
+ continue
+ # GG 20141015 - RW-shared and RW-excl got separate lines and rounds in 5.5+
+ m = match("RW-shared spins (\d+), rounds (\d+), OS waits (\d+)")
+ if m:
+ printmetric("locks.spin_waits", m.group(1), " type=rw-shared")
+ printmetric("locks.rounds", m.group(2), " type=rw-shared")
+ printmetric("locks.os_waits", m.group(3), " type=rw-shared")
+ continue
+ m = match("RW-excl spins (\d+), rounds (\d+), OS waits (\d+)")
+ if m:
+ printmetric("locks.spin_waits", m.group(1), " type=rw-exclusive")
+ printmetric("locks.rounds", m.group(2), " type=rw-exclusive")
+ printmetric("locks.os_waits", m.group(3), " type=rw-exclusive")
+ continue
+ # INSERT BUFFER AND ADAPTIVE HASH INDEX
+ # TODO(tsuna): According to the code in ibuf0ibuf.c, this line and
+ # the following one can appear multiple times. I've never seen this.
+ # If that happens, we need to aggregate the values here instead of
+ # printing them directly.
+ m = match("Ibuf: size (\d+), free list len (\d+), seg size (\d+),")
+ if m:
+ printmetric("innodb.ibuf.size", m.group(1))
+ printmetric("innodb.ibuf.free_list_len", m.group(2))
+ printmetric("innodb.ibuf.seg_size", m.group(3))
+ continue
+ m = match("(\d+) inserts, (\d+) merged recs, (\d+) merges")
+ if m:
+ printmetric("innodb.ibuf.inserts", m.group(1))
+ printmetric("innodb.ibuf.merged_recs", m.group(2))
+ printmetric("innodb.ibuf.merges", m.group(3))
+ continue
+ # ROW OPERATIONS
+ m = match("\d+ queries inside InnoDB, (\d+) queries in queue")
+ if m:
+ printmetric("innodb.queries_queued", m.group(1))
+ continue
+ m = match("(\d+) read views open inside InnoDB")
+ if m:
+ printmetric("innodb.opened_read_views", m.group(1))
+ continue
+ # TRANSACTION
+ m = match("History list length (\d+)")
+ if m:
+ printmetric("innodb.history_list_length", m.group(1))
+ continue
def collect(db):
- """Collects and prints stats about the given DB instance."""
-
- ts = now()
- def printmetric(metric, value, tags=""):
- print("mysql.%s %d %s schema=%s%s" % (metric, ts, value, db.dbname, tags))
-
- has_innodb = False
- if db.isShowGlobalStatusSafe():
- for metric, value in db.query("SHOW GLOBAL STATUS"):
- try:
- if "." in value:
- value = float(value)
- else:
- value = int(value)
- except ValueError:
- continue
- metric = metric.lower()
- has_innodb = has_innodb or metric.startswith("innodb")
- printmetric(metric, value)
-
- if has_innodb:
- collectInnodbStatus(db)
-
- if has_innodb and False: # Disabled because it's too expensive for InnoDB.
- waits = {} # maps a mutex name to the number of waits
+ """Collects and prints stats about the given DB instance."""
+
ts = now()
- for engine, mutex, status in db.query("SHOW ENGINE INNODB MUTEX"):
- if not status.startswith("os_waits"):
- continue
- m = re.search("&(\w+)(?:->(\w+))?$", mutex)
- if not m:
- continue
- mutex, kind = m.groups()
- if kind:
- mutex += "." + kind
- wait_count = int(status.split("=", 1)[1])
- waits[mutex] = waits.get(mutex, 0) + wait_count
- for mutex, wait_count in waits.items():
- printmetric("innodb.locks", wait_count, " mutex=" + mutex)
-
- ts = now()
-
- mysql_slave_status = db.query("SHOW SLAVE STATUS")
- if mysql_slave_status:
- slave_status = todict(db, mysql_slave_status[0])
- master_host = slave_status["master_host"]
- else:
- master_host = None
-
- if master_host and master_host != "None":
- sbm = slave_status.get("seconds_behind_master")
- if isinstance(sbm, INTEGER_TYPES):
- printmetric("slave.seconds_behind_master", sbm)
- printmetric("slave.bytes_executed", slave_status["exec_master_log_pos"])
- printmetric("slave.bytes_relayed", slave_status["read_master_log_pos"])
- printmetric("slave.thread_io_running",
- isyes(slave_status["slave_io_running"]))
- printmetric("slave.thread_sql_running",
- isyes(slave_status["slave_sql_running"]))
-
- states = {} # maps a connection state to number of connections in that state
- for row in db.query("SHOW PROCESSLIST"):
- id, user, host, db_, cmd, time, state = row[:7]
- states[cmd] = states.get(cmd, 0) + 1
- for state, count in states.items():
- state = state.lower().replace(" ", "_")
- printmetric("connection_states", count, " state=%s" % state)
+ def printmetric(metric, value, tags=""):
+ print("mysql.%s %d %s schema=%s%s" % (metric, ts, value, db.dbname, tags))
+
+ has_innodb = False
+ if db.isShowGlobalStatusSafe():
+ for metric, value in db.query("SHOW GLOBAL STATUS"):
+ try:
+ if "." in value:
+ value = float(value)
+ else:
+ value = int(value)
+ except ValueError:
+ continue
+ metric = metric.lower()
+ has_innodb = has_innodb or metric.startswith("innodb")
+ printmetric(metric, value)
+
+ if has_innodb:
+ collectInnodbStatus(db)
+
+ if has_innodb and False: # Disabled because it's too expensive for InnoDB.
+ waits = {} # maps a mutex name to the number of waits
+ ts = now()
+ for engine, mutex, status in db.query("SHOW ENGINE INNODB MUTEX"):
+ if not status.startswith("os_waits"):
+ continue
+ m = re.search("&(\w+)(?:->(\w+))?$", mutex)
+ if not m:
+ continue
+ mutex, kind = m.groups()
+ if kind:
+ mutex += "." + kind
+ wait_count = int(status.split("=", 1)[1])
+ waits[mutex] = waits.get(mutex, 0) + wait_count
+ for mutex, wait_count in waits.items():
+ printmetric("innodb.locks", wait_count, " mutex=" + mutex)
-def main(args):
- """Collects and dumps stats from a MySQL server."""
- if not find_sockfiles(): # Nothing to monitor.
- return 13 # Ask tcollector to not respawn us.
- if MySQLdb is None:
- utils.err("error: Python module `MySQLdb' is missing")
- return 1
-
- last_db_refresh = now()
- dbs = find_databases()
- while True:
ts = now()
- if ts - last_db_refresh >= DB_REFRESH_INTERVAL:
- find_databases(dbs)
- last_db_refresh = ts
-
- errs = []
- for dbname, db in dbs.items():
- try:
- collect(db)
- except (EnvironmentError, EOFError, RuntimeError, socket.error,
- MySQLdb.MySQLError) as e:
- if isinstance(e, IOError) and e[0] == errno.EPIPE:
- # Exit on a broken pipe. There's no point in continuing
- # because no one will read our stdout anyway.
- return 2
- utils.err("error: failed to collect data from %s: %s" % (db, e))
- errs.append(dbname)
-
- for dbname in errs:
- del dbs[dbname]
-
- sys.stdout.flush()
- time.sleep(COLLECTION_INTERVAL)
+
+ mysql_slave_status = db.query("SHOW SLAVE STATUS")
+ if mysql_slave_status:
+ slave_status = todict(db, mysql_slave_status[0])
+ master_host = slave_status["master_host"]
+ else:
+ master_host = None
+
+ if master_host and master_host != "None":
+ sbm = slave_status.get("seconds_behind_master")
+ if isinstance(sbm, INTEGER_TYPES):
+ printmetric("slave.seconds_behind_master", sbm)
+ printmetric("slave.bytes_executed", slave_status["exec_master_log_pos"])
+ printmetric("slave.bytes_relayed", slave_status["read_master_log_pos"])
+ printmetric("slave.thread_io_running",
+ isyes(slave_status["slave_io_running"]))
+ printmetric("slave.thread_sql_running",
+ isyes(slave_status["slave_sql_running"]))
+
+ states = {} # maps a connection state to number of connections in that state
+ for row in db.query("SHOW PROCESSLIST"):
+ id, user, host, db_, cmd, time, state = row[:7]
+ states[cmd] = states.get(cmd, 0) + 1
+ for state, count in states.items():
+ state = state.lower().replace(" ", "_")
+ printmetric("connection_states", count, " state=%s" % state)
+
+
+def main(args):
+ """Collects and dumps stats from a MySQL server."""
+ if not find_sockfiles(): # Nothing to monitor.
+ return 13 # ask tcollector to not respawn us.
+ if MySQLdb is None:
+ utils.err("error: Python module `MySQLdb' is missing")
+ return 1
+
+ last_db_refresh = now()
+ dbs = find_databases()
+ while True:
+ ts = now()
+ if ts - last_db_refresh >= DB_REFRESH_INTERVAL:
+ find_databases(dbs)
+ last_db_refresh = ts
+
+ errs = []
+ for dbname, db in dbs.items():
+ try:
+ collect(db)
+ except (EnvironmentError, EOFError, RuntimeError, socket.error,
+ MySQLdb.MySQLError) as e:
+ if isinstance(e, IOError) and e[0] == errno.EPIPE:
+ # Exit on a broken pipe. There's no point in continuing
+ # because no one will read our stdout anyway.
+ return 2
+ utils.err("error: failed to collect data from %s: %s" % (db, e))
+ errs.append(dbname)
+
+ for dbname in errs:
+ del dbs[dbname]
+
+ sys.stdout.flush()
+ time.sleep(COLLECTION_INTERVAL)
if __name__ == "__main__":
- sys.stdin.close()
- sys.exit(main(sys.argv))
+ sys.stdin.close()
+ sys.exit(main(sys.argv))
diff --git a/collectors/0/netfilter.py b/collectors/0/netfilter.py
index 1dced88c..ab5f860b 100755
--- a/collectors/0/netfilter.py
+++ b/collectors/0/netfilter.py
@@ -38,33 +38,34 @@
basedir = "/proc/sys/net/netfilter"
+
def main():
"""netfilter main loop"""
utils.drop_privileges()
- if (os.path.isdir(basedir)):
+ if os.path.isdir(basedir):
while True:
ts = int(time.time())
-
- for s in STATS:
- try:
- f = open(basedir + "/" + s, 'r')
- value = f.readline().rstrip()
- print("proc.sys.net.netfilter.%s %d %s" % (s, ts, value))
- f.close()
+
+ for s in STATS:
+ try:
+ f = open(basedir + "/" + s, 'r')
+ value = f.readline().rstrip()
+ print("proc.sys.net.netfilter.%s %d %s" % (s, ts, value))
+ f.close()
except:
- # brute'ish, but should keep the collector reasonably future
- # proof if some of the stats disappear between kernel module
- # versions
- continue
+ # brute'ish, but should keep the collector reasonably future
+ # proof if some of the stats disappear between kernel module
+ # versions
+ continue
sys.stdout.flush()
time.sleep(interval)
- else:
- print ("%s does not exist - ip_conntrack probably missing")
- sys.exit(13) # we signal tcollector to not run us
-
+ else:
+ print("%s does not exist - ip_conntrack probably missing")
+ return 13 # we signal tcollector to not run us
+
if __name__ == "__main__":
sys.exit(main())
diff --git a/collectors/0/netstat.py b/collectors/0/netstat.py
index 2f3487e7..8a91217e 100755
--- a/collectors/0/netstat.py
+++ b/collectors/0/netstat.py
@@ -390,5 +390,6 @@ def parse_stats(stats, filename):
sys.stdout.flush()
time.sleep(interval)
+
if __name__ == "__main__":
sys.exit(main())
diff --git a/collectors/0/nfsstat.py b/collectors/0/nfsstat.py
index 36a9c5d8..91c8ba8e 100755
--- a/collectors/0/nfsstat.py
+++ b/collectors/0/nfsstat.py
@@ -90,5 +90,6 @@ def main():
sys.stdout.flush()
time.sleep(COLLECTION_INTERVAL)
+
if __name__ == "__main__":
sys.exit(main())
diff --git a/collectors/0/ntpstat.py b/collectors/0/ntpstat.py
index d07ed5e0..98111430 100755
--- a/collectors/0/ntpstat.py
+++ b/collectors/0/ntpstat.py
@@ -19,8 +19,6 @@
from __future__ import print_function
-import os
-import socket
import subprocess
import sys
import time
@@ -29,19 +27,20 @@
from collectors.lib import utils
try:
- from collectors.etc import ntpstat_conf
+ from collectors.etc import ntpstat_conf
except ImportError:
- ntpstat_conf = None
+ ntpstat_conf = None
+
+DEFAULT_COLLECTION_INTERVAL = 60
-DEFAULT_COLLECTION_INTERVAL=60
def main():
"""ntpstats main loop"""
- collection_interval=DEFAULT_COLLECTION_INTERVAL
- if(ntpstat_conf):
+ collection_interval = DEFAULT_COLLECTION_INTERVAL
+ if (ntpstat_conf):
config = ntpstat_conf.get_config()
- collection_interval=config['collection_interval']
+ collection_interval = config['collection_interval']
utils.drop_privileges()
@@ -52,7 +51,7 @@ def main():
except OSError as e:
if e.errno == errno.ENOENT:
# looks like ntpdc is not available, stop using this collector
- sys.exit(13) # we signal tcollector to stop using this
+ return 13 # we signal tcollector to stop using this
raise
stdout, _ = ntp_proc.communicate()
@@ -64,7 +63,7 @@ def main():
if len(fields) <= 0:
continue
if fields[0].startswith("*"):
- offset=fields[8]
+ offset = fields[8]
continue
print("ntp.offset %d %s" % (ts, offset))
else:
@@ -73,5 +72,6 @@ def main():
sys.stdout.flush()
time.sleep(collection_interval)
+
if __name__ == "__main__":
- main()
+ sys.exit(main())
diff --git a/collectors/0/postgresql.py b/collectors/0/postgresql.py
index ab003acc..c4824fc3 100755
--- a/collectors/0/postgresql.py
+++ b/collectors/0/postgresql.py
@@ -22,87 +22,89 @@
is set in postgresql.conf.
"""
+from collectors.lib import utils
+from collectors.lib import postgresqlutils
+
import sys
-import os
import time
import socket
import errno
-COLLECTION_INTERVAL = 15 # seconds
+COLLECTION_INTERVAL = 15 # seconds
-from collectors.lib import utils
-from collectors.lib import postgresqlutils
def collect(db):
- """
- Collects and prints stats.
-
- Here we collect only general info, for full list of data for collection
- see http://www.postgresql.org/docs/9.2/static/monitoring-stats.html
- """
-
- try:
- cursor = db.cursor()
-
- # general statics
- cursor.execute("SELECT pg_stat_database.*, pg_database_size"
- " (pg_database.datname) AS size FROM pg_database JOIN"
- " pg_stat_database ON pg_database.datname ="
- " pg_stat_database.datname WHERE pg_stat_database.datname"
- " NOT IN ('template0', 'template1', 'postgres')")
- ts = time.time()
- stats = cursor.fetchall()
-
-# datid | datname | numbackends | xact_commit | xact_rollback | blks_read | blks_hit | tup_returned | tup_fetched | tup_inserted | tup_updated | tup_deleted | conflicts | temp_files | temp_bytes | deadlocks | blk_read_time | blk_write_time | stats_reset | size
- result = {}
- for stat in stats:
- database = stat[1]
- result[database] = stat
-
- for database in result:
- for i in range(2,len(cursor.description)):
- metric = cursor.description[i].name
- value = result[database][i]
- try:
- if metric in ("stats_reset"):
- continue
- print("postgresql.%s %i %s database=%s"
- % (metric, ts, value, database))
- except:
- utils.err("got here")
- continue
-
- # connections
- cursor.execute("SELECT datname, count(datname) FROM pg_stat_activity"
- " GROUP BY pg_stat_activity.datname")
- ts = time.time()
- connections = cursor.fetchall()
-
- for database, connection in connections:
- print("postgresql.connections %i %s database=%s"
- % (ts, connection, database))
-
- except (EnvironmentError, EOFError, RuntimeError, socket.error) as e:
- if isinstance(e, IOError) and e[0] == errno.EPIPE:
- # exit on a broken pipe. There is no point in continuing
- # because no one will read our stdout anyway.
- return 2
- utils.err("error: failed to collect data: %s" % e)
+ """
+ Collects and prints stats.
+
+ Here we collect only general info, for full list of data for collection
+ see http://www.postgresql.org/docs/9.2/static/monitoring-stats.html
+ """
+
+ try:
+ cursor = db.cursor()
+
+ # general statics
+ cursor.execute("SELECT pg_stat_database.*, pg_database_size"
+ " (pg_database.datname) AS size FROM pg_database JOIN"
+ " pg_stat_database ON pg_database.datname ="
+ " pg_stat_database.datname WHERE pg_stat_database.datname"
+ " NOT IN ('template0', 'template1', 'postgres')")
+ ts = time.time()
+ stats = cursor.fetchall()
+
+ # datid | datname | numbackends | xact_commit | xact_rollback | blks_read | blks_hit | tup_returned | tup_fetched | tup_inserted | tup_updated | tup_deleted | conflicts | temp_files | temp_bytes | deadlocks | blk_read_time | blk_write_time | stats_reset | size
+ result = {}
+ for stat in stats:
+ database = stat[1]
+ result[database] = stat
+
+ for database in result:
+ for i in range(2, len(cursor.description)):
+ metric = cursor.description[i].name
+ value = result[database][i]
+ try:
+ if metric in ("stats_reset"):
+ continue
+ print("postgresql.%s %i %s database=%s"
+ % (metric, ts, value, database))
+ except:
+ utils.err("got here")
+ continue
+
+ # connections
+ cursor.execute("SELECT datname, count(datname) FROM pg_stat_activity"
+ " GROUP BY pg_stat_activity.datname")
+ ts = time.time()
+ connections = cursor.fetchall()
+
+ for database, connection in connections:
+ print("postgresql.connections %i %s database=%s"
+ % (ts, connection, database))
+
+ except (EnvironmentError, EOFError, RuntimeError, socket.error) as e:
+ if isinstance(e, IOError) and e[0] == errno.EPIPE:
+ # exit on a broken pipe. There is no point in continuing
+ # because no one will read our stdout anyway.
+ return 2
+ utils.err("error: failed to collect data: %s" % e)
+
def main(args):
- """Collects and dumps stats from a PostgreSQL server."""
+ """Collects and dumps stats from a PostgreSQL server."""
+
+ try:
+ db = postgresqlutils.connect()
+ except (Exception) as e:
+ utils.err("error: Could not initialize collector : %s" % (e))
+ return 13 # Ask tcollector to not respawn us
- try:
- db = postgresqlutils.connect()
- except (Exception) as e:
- utils.err("error: Could not initialize collector : %s" % (e))
- return 13 # Ask tcollector to not respawn us
+ while True:
+ collect(db)
+ sys.stdout.flush()
+ time.sleep(COLLECTION_INTERVAL)
- while True:
- collect(db)
- sys.stdout.flush()
- time.sleep(COLLECTION_INTERVAL)
if __name__ == "__main__":
- sys.stdin.close()
- sys.exit(main(sys.argv))
\ No newline at end of file
+ sys.stdin.close()
+ sys.exit(main(sys.argv))
diff --git a/collectors/0/postgresql_replication.py b/collectors/0/postgresql_replication.py
index d5e99221..274771ff 100755
--- a/collectors/0/postgresql_replication.py
+++ b/collectors/0/postgresql_replication.py
@@ -22,96 +22,98 @@
is set in postgresql.conf.
"""
+from collectors.lib import utils
+from collectors.lib import postgresqlutils
+
import sys
-import os
import time
import socket
import errno
import re
import subprocess
-COLLECTION_INTERVAL = 5 # seconds
+COLLECTION_INTERVAL = 5 # seconds
-from collectors.lib import utils
-from collectors.lib import postgresqlutils
def collect(db):
- """
- Collects and prints replication statistics.
- """
-
- try:
- cursor = db.cursor()
-
- # Replication lag time (could be slave only or a master / slave combo)
- cursor.execute("SELECT "
- "CASE WHEN pg_is_in_recovery() THEN (EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()) * 1000)::BIGINT ELSE NULL END AS replication_lag_time, "
- "pg_xlog_location_diff(pg_last_xlog_receive_location(), pg_last_xlog_replay_location()) AS replication_lag_bytes, "
- "pg_is_in_recovery() AS in_recovery;")
- ts = time.time()
- stats = cursor.fetchall()
-
- if (stats[0][0] is not None):
- print("postgresql.replication.upstream.lag.time %i %s"
- % (ts, stats[0][0]))
-
- if (stats[0][1] is not None):
- print("postgresql.replication.upstream.lag.bytes %i %s"
- % (ts, stats[0][1]))
-
- print("postgresql.replication.recovering %i %i"
- % (ts, stats[0][2]))
-
- # WAL receiver process running (could be slave only or master / slave combo)
- ps_out = subprocess.check_output(["/bin/ps", "aux"] , stderr=subprocess.STDOUT)
- ps_out = ps_out.split("\n")
- ts = time.time()
-
- wal_receiver_running = 0
- for l in ps_out:
- l = l.strip()
- if (re.match (".*wal\\sreceiver.*", l)):
- wal_receiver_running = 1;
- break
-
- print("postgresql.replication.walreceiver.running %i %s"
- % (ts, wal_receiver_running))
-
- # WAL sender process info (could be master only or master / slave combo)
- cursor.execute("SELECT client_addr, client_port, "
- "pg_xlog_location_diff(sent_location, replay_location) AS lag_bytes "
- "FROM pg_stat_replication;")
- ts = time.time()
- stats = cursor.fetchall()
-
- print("postgresql.replication.downstream.count %i %i"
- % (ts, len(stats)))
-
- for stat in stats:
- print("postgresql.replication.downstream.lag.bytes %i %i client_ip=%s client_port=%s"
- % (ts, stat[2], stat[0], stat[1]))
-
- except (EnvironmentError, EOFError, RuntimeError, socket.error) as e:
- if isinstance(e, IOError) and e[0] == errno.EPIPE:
- # exit on a broken pipe. There is no point in continuing
- # because no one will read our stdout anyway.
- return 2
- utils.err("error: failed to collect data: %s" % e)
+ """
+ Collects and prints replication statistics.
+ """
+
+ try:
+ cursor = db.cursor()
+
+ # Replication lag time (could be slave only or a master / slave combo)
+ cursor.execute("SELECT "
+ "CASE WHEN pg_is_in_recovery() THEN (EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()) * 1000)::BIGINT ELSE NULL END AS replication_lag_time, "
+ "pg_xlog_location_diff(pg_last_xlog_receive_location(), pg_last_xlog_replay_location()) AS replication_lag_bytes, "
+ "pg_is_in_recovery() AS in_recovery;")
+ ts = time.time()
+ stats = cursor.fetchall()
+
+ if (stats[0][0] is not None):
+ print("postgresql.replication.upstream.lag.time %i %s"
+ % (ts, stats[0][0]))
+
+ if (stats[0][1] is not None):
+ print("postgresql.replication.upstream.lag.bytes %i %s"
+ % (ts, stats[0][1]))
+
+ print("postgresql.replication.recovering %i %i"
+ % (ts, stats[0][2]))
+
+ # WAL receiver process running (could be slave only or master / slave combo)
+ ps_out = subprocess.check_output(["/bin/ps", "aux"], stderr=subprocess.STDOUT)
+ ps_out = ps_out.split("\n")
+ ts = time.time()
+
+ wal_receiver_running = 0
+ for l in ps_out:
+ l = l.strip()
+ if (re.match(".*wal\\sreceiver.*", l)):
+ wal_receiver_running = 1;
+ break
+
+ print("postgresql.replication.walreceiver.running %i %s"
+ % (ts, wal_receiver_running))
+
+ # WAL sender process info (could be master only or master / slave combo)
+ cursor.execute("SELECT client_addr, client_port, "
+ "pg_xlog_location_diff(sent_location, replay_location) AS lag_bytes "
+ "FROM pg_stat_replication;")
+ ts = time.time()
+ stats = cursor.fetchall()
+
+ print("postgresql.replication.downstream.count %i %i"
+ % (ts, len(stats)))
+
+ for stat in stats:
+ print("postgresql.replication.downstream.lag.bytes %i %i client_ip=%s client_port=%s"
+ % (ts, stat[2], stat[0], stat[1]))
+
+ except (EnvironmentError, EOFError, RuntimeError, socket.error) as e:
+ if isinstance(e, IOError) and e[0] == errno.EPIPE:
+ # exit on a broken pipe. There is no point in continuing
+ # because no one will read our stdout anyway.
+ return 2
+ utils.err("error: failed to collect data: %s" % e)
+
def main(args):
- """Collects and dumps stats from a PostgreSQL server."""
+ """Collects and dumps stats from a PostgreSQL server."""
+
+ try:
+ db = postgresqlutils.connect()
+ except (Exception) as e:
+ utils.err("error: Could not initialize collector : %s" % (e))
+ return 13 # ask tcollector to not respawn us
- try:
- db = postgresqlutils.connect()
- except (Exception) as e:
- utils.err("error: Could not initialize collector : %s" % (e))
- return 13 # Ask tcollector to not respawn us
+ while True:
+ collect(db)
+ sys.stdout.flush()
+ time.sleep(COLLECTION_INTERVAL)
- while True:
- collect(db)
- sys.stdout.flush()
- time.sleep(COLLECTION_INTERVAL)
if __name__ == "__main__":
- sys.stdin.close()
- sys.exit(main(sys.argv))
+ sys.stdin.close()
+ sys.exit(main(sys.argv))
diff --git a/collectors/0/procnettcp.py b/collectors/0/procnettcp.py
index 6b7dac9b..33044bfb 100755
--- a/collectors/0/procnettcp.py
+++ b/collectors/0/procnettcp.py
@@ -174,7 +174,7 @@ def main(unused_args):
raise
except IOError as e:
print("Failed to open input file: %s" % (e,), file=sys.stderr)
- return 13 # Ask tcollector to not re-start us immediately.
+ return 13 # ask tcollector to not re-start us immediately.
utils.drop_privileges()
while True:
@@ -233,5 +233,6 @@ def main(unused_args):
sys.stdout.flush()
time.sleep(interval)
+
if __name__ == "__main__":
sys.exit(main(sys.argv))
diff --git a/collectors/0/procstats.py b/collectors/0/procstats.py
index 0dd8e042..39991999 100755
--- a/collectors/0/procstats.py
+++ b/collectors/0/procstats.py
@@ -23,8 +23,8 @@
from collectors.lib import utils
-INTERRUPTS_INTVL_MULT = 4 # query softirqs every SOFTIRQS_INT_MULT * COLLECTION_INTERVAL seconds
-SOFTIRQS_INTVL_MULT = 4 # query softirqs every SOFTIRQS_INT_MULT * COLLECTION_INTERVAL seconds
+INTERRUPTS_INTVL_MULT = 4 # query softirqs every SOFTIRQS_INT_MULT * COLLECTION_INTERVAL seconds
+SOFTIRQS_INTVL_MULT = 4 # query softirqs every SOFTIRQS_INT_MULT * COLLECTION_INTERVAL seconds
# Modern Linux:
CPUSET_PATH = "/sys/fs/cgroup/cpuset"
if os.path.isdir("/dev/cpuset"):
@@ -40,7 +40,7 @@ def find_sysfs_numa_stats():
nodes = os.listdir(NUMADIR)
except OSError as exc:
if exc.errno == 2: # No such file or directory
- return [] # We don't have NUMA stats.
+ return [] # We don't have NUMA stats.
raise
nodes = [node for node in nodes if node.startswith("node")]
@@ -59,34 +59,35 @@ def print_numa_stats(numafiles):
"""From a list of files names, opens file, extracts and prints NUMA stats."""
for numafilename in numafiles:
numafile = open(numafilename)
- node_id = int(numafile.name[numafile.name.find("/node/node")+10:-9])
+ node_id = int(numafile.name[numafile.name.find("/node/node") + 10:-9])
ts = int(time.time())
stats = dict(line.split() for line in numafile.read().splitlines())
- for stat, tag in (# hit: process wanted memory from this node and got it
- ("numa_hit", "hit"),
- # miss: process wanted another node and got it from
- # this one instead.
- ("numa_miss", "miss")):
+ for stat, tag in ( # hit: process wanted memory from this node and got it
+ ("numa_hit", "hit"),
+ # miss: process wanted another node and got it from
+ # this one instead.
+ ("numa_miss", "miss")):
print("sys.numa.zoneallocs %d %s node=%d type=%s"
- % (ts, stats[stat], node_id, tag))
+ % (ts, stats[stat], node_id, tag))
# Count this one as a separate metric because we can't sum up hit +
# miss + foreign, this would result in double-counting of all misses.
# See `zone_statistics' in the code of the kernel.
# foreign: process wanted memory from this node but got it from
# another node. So maybe this node is out of free pages.
print("sys.numa.foreign_allocs %d %s node=%d"
- % (ts, stats["numa_foreign"], node_id))
+ % (ts, stats["numa_foreign"], node_id))
# When is memory allocated to a node that's local or remote to where
# the process is running.
for stat, tag in (("local_node", "local"),
("other_node", "remote")):
print("sys.numa.allocation %d %s node=%d type=%s"
- % (ts, stats[stat], node_id, tag))
+ % (ts, stats[stat], node_id, tag))
# Pages successfully allocated with the interleave policy.
print("sys.numa.interleave %d %s node=%d type=hit"
- % (ts, stats["interleave_hit"], node_id))
+ % (ts, stats["interleave_hit"], node_id))
numafile.close()
+
def expand_numlist(s):
"""return a list of numbers from a list with ranges,
e.g. 4,5-10,14-16"""
@@ -95,51 +96,53 @@ def expand_numlist(s):
if '-' not in i:
r.append(int(i))
else:
- l,h = map(int, i.split('-'))
- r+= range(l,h+1)
+ l, h = map(int, i.split('-'))
+ r += range(l, h + 1)
return r
+
def cpus_csets(cpuset_path):
"""Return a hash of cpu_id_as_string->cset_name"""
try:
csets = os.listdir(cpuset_path)
except OSError as e:
- if e.errno == errno.ENOENT: # No such file or directory
- return {} # We don't have csets
+ if e.errno == errno.ENOENT: # No such file or directory
+ return {} # We don't have csets
raise
csets = [cset for cset in csets if os.path.isdir(os.path.join(cpuset_path, cset))]
cpu2cset = {}
for cset in csets:
- cpuspath = os.path.join(cpuset_path, cset, 'cpuset.cpus')
- if not os.path.isfile(cpuspath):
- cpuspath = os.path.join(cpuset_path, cset, 'cpus')
- if not os.path.isfile(cpuspath):
- # No such file?? Ignore csets
- sys.stderr.write("No 'cpuset.cpus' or 'cpus' file in %s!" % os.path.join(cpuset_path, cset))
- continue
-
- try:
- f_cpus = open(cpuspath)
- except:
- # Ignore that one and continue
- sys.stderr.write("Could not open %s!" % cpuspath)
- continue
-
- format_errors = 0
- for line in f_cpus:
- m = re.match('^[-0-9,]+$', line)
- if m:
- for c in expand_numlist(line):
- cpu2cset[str(c)] = cset
- else:
- format_errors += 1
- if format_errors > 0:
- sys.stderr.write("%d line(s) of %s were not in the expected format" % (format_errors, cpuspath))
+ cpuspath = os.path.join(cpuset_path, cset, 'cpuset.cpus')
+ if not os.path.isfile(cpuspath):
+ cpuspath = os.path.join(cpuset_path, cset, 'cpus')
+ if not os.path.isfile(cpuspath):
+ # No such file?? Ignore csets
+ sys.stderr.write("No 'cpuset.cpus' or 'cpus' file in %s!" % os.path.join(cpuset_path, cset))
+ continue
+
+ try:
+ f_cpus = open(cpuspath)
+ except:
+ # Ignore that one and continue
+ sys.stderr.write("Could not open %s!" % cpuspath)
+ continue
+
+ format_errors = 0
+ for line in f_cpus:
+ m = re.match('^[-0-9,]+$', line)
+ if m:
+ for c in expand_numlist(line):
+ cpu2cset[str(c)] = cset
+ else:
+ format_errors += 1
+ if format_errors > 0:
+ sys.stderr.write("%d line(s) of %s were not in the expected format" % (format_errors, cpuspath))
return cpu2cset
+
def main():
"""procstats main loop"""
@@ -152,19 +155,19 @@ def main():
f_interrupts = open("/proc/interrupts", "r")
f_scaling = "/sys/devices/system/cpu/cpu%s/cpufreq/%s_freq"
- f_scaling_min = dict([])
- f_scaling_max = dict([])
- f_scaling_cur = dict([])
+ f_scaling_min = dict([])
+ f_scaling_max = dict([])
+ f_scaling_cur = dict([])
f_softirqs = open("/proc/softirqs", "r")
for cpu in glob.glob("/sys/devices/system/cpu/cpu[0-9]*/cpufreq/scaling_cur_freq"):
m = re.match("/sys/devices/system/cpu/cpu([0-9]*)/cpufreq/scaling_cur_freq", cpu)
if not m:
continue
cpu_no = m.group(1)
- sys.stderr.write(f_scaling % (cpu_no,"min"))
- f_scaling_min[cpu_no] = open(f_scaling % (cpu_no,"cpuinfo_min"), "r")
- f_scaling_max[cpu_no] = open(f_scaling % (cpu_no,"cpuinfo_max"), "r")
- f_scaling_cur[cpu_no] = open(f_scaling % (cpu_no,"scaling_cur"), "r")
+ sys.stderr.write(f_scaling % (cpu_no, "min"))
+ f_scaling_min[cpu_no] = open(f_scaling % (cpu_no, "cpuinfo_min"), "r")
+ f_scaling_max[cpu_no] = open(f_scaling % (cpu_no, "cpuinfo_max"), "r")
+ f_scaling_cur[cpu_no] = open(f_scaling % (cpu_no, "scaling_cur"), "r")
numastats = find_sysfs_numa_stats()
utils.drop_privileges()
@@ -194,7 +197,7 @@ def main():
value = m.group(2)
name = re.sub("\W", "_", m.group(1)).lower().strip("_")
print("proc.meminfo.%s %d %s"
- % (name, ts, value))
+ % (name, ts, value))
# proc.vmstat
f_vmstat.seek(0)
@@ -229,15 +232,15 @@ def main():
tags = ''
fields = m.group(2).split()
cpu_types = ['user', 'nice', 'system', 'idle', 'iowait',
- 'irq', 'softirq', 'guest', 'guest_nice']
+ 'irq', 'softirq', 'guest', 'guest_nice']
# We use zip to ignore fields that don't exist.
for value, field_name in zip(fields, cpu_types):
print("proc.stat.cpu%s %d %s type=%s%s" % (metric_percpu,
- ts, value, field_name, tags))
+ ts, value, field_name, tags))
elif m.group(1) == "intr":
print(("proc.stat.intr %d %s"
- % (ts, m.group(2).split()[0])))
+ % (ts, m.group(2).split()[0])))
elif m.group(1) == "ctxt":
print("proc.stat.ctxt %d %s" % (ts, m.group(2)))
elif m.group(1) == "processes":
@@ -335,7 +338,7 @@ def print_interrupts(f_interrupts):
interrupt_dict[k] = int(val)
for k in interrupt_dict:
- print ("proc.interrupts %s %d %s" % (ts, interrupt_dict[k], k))
+ print("proc.interrupts %s %d %s" % (ts, interrupt_dict[k], k))
def print_irqs(f_softirqs):
@@ -356,12 +359,11 @@ def print_irqs(f_softirqs):
if not val.isdigit():
# something is weird, there should only be digit values
sys.stderr.write("Unexpected softirq value %r in"
- " %r: " % (val, cols))
+ " %r: " % (val, cols))
break
- print ("proc.softirqs %s %s type=%s cpu=%s"
- % (ts, val, irq_type, i))
+ print("proc.softirqs %s %s type=%s cpu=%s"
+ % (ts, val, irq_type, i))
if __name__ == "__main__":
- main()
-
+ sys.exit(main())
diff --git a/collectors/0/prometheus.py b/collectors/0/prometheus.py
index 3b6bb81f..078247c0 100755
--- a/collectors/0/prometheus.py
+++ b/collectors/0/prometheus.py
@@ -15,19 +15,22 @@
import sys
import time
+from http.client import HTTPConnection
+import json
-import schedule
-from prometheus_client.parser import text_string_to_metric_families
+from collectors.lib import utils
try:
- import json
+ import schedule
except ImportError:
- json = None
+ utils.err("schedule library is not installed")
+ sys.exit(13) # ask tcollector to not re-start us
try:
- from http.client import HTTPConnection
+ from prometheus_client.parser import text_string_to_metric_families
except ImportError:
- from httplib import HTTPConnection
+ utils.err("prometheus_client.parser is not installed")
+ sys.exit(13) # ask tcollector to not re-start us
try:
from collectors.etc import prometheus_conf
@@ -41,6 +44,7 @@
BASE_LABELS = ""
SETTINGS = {}
+
class PrometheusCollector(object):
def __init__(self, service, daemon, host, port, uri="/metrics"):
self.service = service
diff --git a/collectors/0/pxc-collector.py b/collectors/0/pxc-collector.py
index 4a77c0fc..451a899b 100755
--- a/collectors/0/pxc-collector.py
+++ b/collectors/0/pxc-collector.py
@@ -22,66 +22,73 @@
ATTENTION: Only tested on Debian/Ubuntu systems.
"""
-import MySQLdb as mysql # pylint: disable=import-error
+import MySQLdb as mysql # pylint: disable=import-error
import time
import sys
import os.path
-from collectors.etc import pxcconf
+
from collectors.lib import utils
+from collectors.etc import pxcconf
-__author__ = "Kai Laufer"
-__version__ = "1.0.1"
-__email__ = "mail@kai-laufer.de"
+__author__ = "Kai Laufer"
+__version__ = "1.0.1"
+__email__ = "mail@kai-laufer.de"
""" You can find these functions and additional information in etc/pxcconf.py """
-prefix = pxcconf.getPrefix() or "pxc" # Prefix for the collector, e.g. pxc -> pxc.wsrep_replicated_bytes
-interval = pxcconf.getInterval() or 1 # Interval for checking MySQL statistics
-galeraFile = pxcconf.getGaleraFile() or "/usr/lib/libgalera_smm.so" # Path to a galera specific file for ensuring that check won't run with a usual MySQL server. Default: "/usr/lib/libgalera_smm.so"
-login = pxcconf.getUserPassword() # MySQL-User, MySQL-Password and MySQL-Host (localhost)
-myMap = pxcconf.getKeyMap() or ( "wsrep_last_committed", "wsrep_replicated", "wsrep_repl_keys", "wsrep_local_commits" ) # Status variables which should be read
-mysqlUser = login[0] or "root"
+prefix = pxcconf.getPrefix() or "pxc" # Prefix for the collector, e.g. pxc -> pxc.wsrep_replicated_bytes
+interval = pxcconf.getInterval() or 1 # Interval for checking MySQL statistics
+galeraFile = pxcconf.getGaleraFile() or "/usr/lib/libgalera_smm.so" # Path to a galera specific file for ensuring that check won't run with a usual MySQL server. Default: "/usr/lib/libgalera_smm.so"
+login = pxcconf.getUserPassword() # MySQL-User, MySQL-Password and MySQL-Host (localhost)
+myMap = pxcconf.getKeyMap() or ("wsrep_last_committed", "wsrep_replicated", "wsrep_repl_keys",
+ "wsrep_local_commits") # Status variables which should be read
+mysqlUser = login[0] or "root"
mysqlPasswd = login[1] or ""
-mysqlHost = login[2] or "localhost"
+mysqlHost = login[2] or "localhost"
+
def getRow():
- """ Test connection """
- try:
- db = mysql.connect(host=mysqlHost, user=mysqlUser, passwd=mysqlPasswd)
- cursor = db.cursor()
- cursor.execute("SHOW STATUS LIKE '%wsrep%'")
- result = cursor.fetchall()
+ """ Test connection """
+ try:
+ db = mysql.connect(host=mysqlHost, user=mysqlUser, passwd=mysqlPasswd)
+ cursor = db.cursor()
+ cursor.execute("SHOW STATUS LIKE '%wsrep%'")
+ result = cursor.fetchall()
- except:
- print("Error: unable to fetch data - Check your configuration!")
- sys.exit(13) # Don't respawn collector
+ except:
+ utils.err("Error: unable to fetch data - Check your configuration!")
+ sys.exit(13) # # ask tcollector to not respawn us
+
+ db.close()
+ return result
- db.close()
- return result
class TSDResult(object):
- """ Create TSD output """
- def __init__(self, key, value, prefix, timestamp):
- self.key = key
- self.value = value
- self.prefix = prefix
- self.timestamp = timestamp
+ """ Create TSD output """
+
+ def __init__(self, key, value, prefix, timestamp):
+ self.key = key
+ self.value = value
+ self.prefix = prefix
+ self.timestamp = timestamp
+
+ def TSDRow(self):
+ return "%s.%s %s %s" % (self.prefix, self.key, self.timestamp, self.value)
- def TSDRow(self):
- return "%s.%s %s %s" % (self.prefix, self.key, self.timestamp, self.value)
def main():
- if os.path.isfile(galeraFile) is True:
- while True:
- rows = getRow()
- for row in rows:
- timestamp = int(time.time())
- if row[0] in myMap:
- result = TSDResult(row[0], row[1], prefix, timestamp)
- print(result.TSDRow())
- time.sleep(interval)
- return 0
- else:
- return 2
+ if os.path.isfile(galeraFile) is True:
+ while True:
+ rows = getRow()
+ for row in rows:
+ timestamp = int(time.time())
+ if row[0] in myMap:
+ result = TSDResult(row[0], row[1], prefix, timestamp)
+ print(result.TSDRow())
+ time.sleep(interval)
+ return 0
+ else:
+ return 2
+
if __name__ == "__main__":
- sys.exit(main())
+ sys.exit(main())
diff --git a/collectors/0/riak.py b/collectors/0/riak.py
index 006914c7..ae43b8a1 100755
--- a/collectors/0/riak.py
+++ b/collectors/0/riak.py
@@ -44,14 +44,11 @@
import os
import sys
import time
+from urllib.request import urlopen
from collectors.etc import riak_conf
from collectors.lib import utils
-try:
- from urllib.request import urlopen
-except ImportError:
- from urllib2 import urlopen
CONFIG = riak_conf.get_default_config()
@@ -113,7 +110,7 @@ def main():
# don't run if we're not a riak node
if not os.path.exists("/usr/lib/riak"):
- sys.exit(13)
+ return 13
utils.drop_privileges()
sys.stdin.close()
diff --git a/collectors/0/smart_stats.py b/collectors/0/smart_stats.py
index c268f5c7..3785367a 100755
--- a/collectors/0/smart_stats.py
+++ b/collectors/0/smart_stats.py
@@ -31,7 +31,7 @@
except ImportError:
smart_stats_conf = None
-DEFAULT_COLLECTION_INTERVAL=120
+DEFAULT_COLLECTION_INTERVAL = 120
TWCLI = "/usr/sbin/tw_cli"
ARCCONF = "/usr/local/bin/arcconf"
@@ -45,199 +45,201 @@
# Common smart attributes, add more to this list if you start seeing
# numbers instead of attribute names in TSD results.
ATTRIBUTE_MAP = {
- "1": "raw_read_error_rate",
- "2": "throughput_performance",
- "3": "spin_up_time",
- "4": "start_stop_count",
- "5": "reallocated_sector_ct",
- "7": "seek_error_rate",
- "8": "seek_time_performance",
- "9": "power_on_hours",
- "10": "spin_retry_count",
- "11": "recalibration_retries",
- "12": "power_cycle_count",
- "13": "soft_read_error_rate",
- "175": "program_fail_count_chip",
- "176": "erase_fail_count_chip",
- "177": "wear_leveling_count",
- "178": "used_rsvd_blk_cnt_chip",
- "179": "used_rsvd_blk_cnt_tot",
- "180": "unused_rsvd_blk_cnt_tot",
- "181": "program_fail_cnt_total",
- "182": "erase_fail_count_total",
- "183": "runtime_bad_block",
- "184": "end_to_end_error",
- "187": "reported_uncorrect",
- "188": "command_timeout",
- "189": "high_fly_writes",
- "190": "airflow_temperature_celsius",
- "191": "g_sense_error_rate",
- "192": "power-off_retract_count",
- "193": "load_cycle_count",
- "194": "temperature_celsius",
- "195": "hardware_ecc_recovered",
- "196": "reallocated_event_count",
- "197": "current_pending_sector",
- "198": "offline_uncorrectable",
- "199": "udma_crc_error_count",
- "200": "write_error_rate",
- "233": "media_wearout_indicator",
- "240": "transfer_error_rate",
- "241": "total_lba_writes",
- "242": "total_lba_read",
- }
+ "1": "raw_read_error_rate",
+ "2": "throughput_performance",
+ "3": "spin_up_time",
+ "4": "start_stop_count",
+ "5": "reallocated_sector_ct",
+ "7": "seek_error_rate",
+ "8": "seek_time_performance",
+ "9": "power_on_hours",
+ "10": "spin_retry_count",
+ "11": "recalibration_retries",
+ "12": "power_cycle_count",
+ "13": "soft_read_error_rate",
+ "175": "program_fail_count_chip",
+ "176": "erase_fail_count_chip",
+ "177": "wear_leveling_count",
+ "178": "used_rsvd_blk_cnt_chip",
+ "179": "used_rsvd_blk_cnt_tot",
+ "180": "unused_rsvd_blk_cnt_tot",
+ "181": "program_fail_cnt_total",
+ "182": "erase_fail_count_total",
+ "183": "runtime_bad_block",
+ "184": "end_to_end_error",
+ "187": "reported_uncorrect",
+ "188": "command_timeout",
+ "189": "high_fly_writes",
+ "190": "airflow_temperature_celsius",
+ "191": "g_sense_error_rate",
+ "192": "power-off_retract_count",
+ "193": "load_cycle_count",
+ "194": "temperature_celsius",
+ "195": "hardware_ecc_recovered",
+ "196": "reallocated_event_count",
+ "197": "current_pending_sector",
+ "198": "offline_uncorrectable",
+ "199": "udma_crc_error_count",
+ "200": "write_error_rate",
+ "233": "media_wearout_indicator",
+ "240": "transfer_error_rate",
+ "241": "total_lba_writes",
+ "242": "total_lba_read",
+}
class Alarm(RuntimeError):
- pass
+ pass
def alarm_handler(signum, frame):
- print("Program took too long to run, "
- "consider increasing its timeout.", file=sys.stderr)
- raise Alarm()
+ print("Program took too long to run, "
+ "consider increasing its timeout.", file=sys.stderr)
+ raise Alarm()
def smart_is_broken(drives):
- """Determines whether SMART can be used.
+ """Determines whether SMART can be used.
- Args:
- drives: A list of device names on which we intend to use SMART.
+ Args:
+ drives: A list of device names on which we intend to use SMART.
- Returns:
- True if SMART is available, False otherwise.
- """
- if os.path.exists(ARCCONF):
- return is_adaptec_driver_broken()
- if os.path.exists(TWCLI):
- return is_3ware_driver_broken(drives)
- return False
+ Returns:
+ True if SMART is available, False otherwise.
+ """
+ if os.path.exists(ARCCONF):
+ return is_adaptec_driver_broken()
+ if os.path.exists(TWCLI):
+ return is_3ware_driver_broken(drives)
+ return False
def is_adaptec_driver_broken():
- signal.alarm(COMMAND_TIMEOUT)
- arcconf = subprocess.Popen("%s %s" % (ARCCONF, ARCCONF_ARGS),
- shell=True,
- stdout=subprocess.PIPE)
- arcconf_output = arcconf.communicate()[0]
- signal.alarm(0)
- if arcconf.returncode != 0:
- if arcconf_output and arcconf_output.startswith(NO_CONTROLLER):
- # No controller => no problem.
- return False
- if arcconf.returncode == 127:
- # arcconf doesn't even work on this system, so assume we're safe
- return False
- print("arcconf unexpected error %s" % arcconf.returncode, file=sys.stderr)
- return True
- for line in arcconf_output.split("\n"):
- fields = [x for x in line.split(" ") if x]
- if fields[0] == "Driver" and fields[2] in BROKEN_DRIVER_VERSIONS:
- print("arcconf indicates broken driver version %s"
- % fields[2], file=sys.stderr)
- return True
- return False
-
-def is_3ware_driver_broken(drives):
- # Apparently 3ware controllers can't report SMART stats from SAS drives. WTF.
- # See also http://sourceforge.net/apps/trac/smartmontools/ticket/161
- for i in reversed(range(len(drives))):
- drive = drives[i]
signal.alarm(COMMAND_TIMEOUT)
- smart_ctl = subprocess.Popen(SMART_CTL + " -i /dev/" + drive,
- shell=True, stdout=subprocess.PIPE)
- smart_output = smart_ctl.communicate()[0]
- if "supports SMART and is Disabled" in smart_output:
- print("SMART is disabled for %s" % drive, file=sys.stderr)
- del drives[i] # We're iterating from the end of the list so this is OK.
+ arcconf = subprocess.Popen("%s %s" % (ARCCONF, ARCCONF_ARGS),
+ shell=True,
+ stdout=subprocess.PIPE)
+ arcconf_output = arcconf.communicate()[0]
signal.alarm(0)
- if not drives:
- print("None of the drives support SMART. Are they SAS drives?", file=sys.stderr)
- return True
- return False
+ if arcconf.returncode != 0:
+ if arcconf_output and arcconf_output.startswith(NO_CONTROLLER):
+ # No controller => no problem.
+ return False
+ if arcconf.returncode == 127:
+ # arcconf doesn't even work on this system, so assume we're safe
+ return False
+ print("arcconf unexpected error %s" % arcconf.returncode, file=sys.stderr)
+ return True
+ for line in arcconf_output.split("\n"):
+ fields = [x for x in line.split(" ") if x]
+ if fields[0] == "Driver" and fields[2] in BROKEN_DRIVER_VERSIONS:
+ print("arcconf indicates broken driver version %s"
+ % fields[2], file=sys.stderr)
+ return True
+ return False
+
+
+def is_3ware_driver_broken(drives):
+ # Apparently 3ware controllers can't report SMART stats from SAS drives. WTF.
+ # See also http://sourceforge.net/apps/trac/smartmontools/ticket/161
+ for i in reversed(range(len(drives))):
+ drive = drives[i]
+ signal.alarm(COMMAND_TIMEOUT)
+ smart_ctl = subprocess.Popen(SMART_CTL + " -i /dev/" + drive,
+ shell=True, stdout=subprocess.PIPE)
+ smart_output = smart_ctl.communicate()[0]
+ if "supports SMART and is Disabled" in smart_output:
+ print("SMART is disabled for %s" % drive, file=sys.stderr)
+ del drives[i] # We're iterating from the end of the list so this is OK.
+ signal.alarm(0)
+ if not drives:
+ print("None of the drives support SMART. Are they SAS drives?", file=sys.stderr)
+ return True
+ return False
def process_output(drive, smart_output):
- """Print formatted SMART output for the drive"""
- ts = int(time.time())
- smart_output = smart_output.split("\n")
- # Set data_marker to 0, so we skip stuff until we see a line
- # beginning with ID# in the output. Start processing rows after
- # that point.
- data_marker = False
- is_seagate = False
-
- for line in smart_output:
- if data_marker:
- fields = line.split()
- if len(fields) < 2:
- continue
- field = fields[0]
- if len(fields) > 2 and field in ATTRIBUTE_MAP:
- metric = ATTRIBUTE_MAP[field]
- value = fields[9].split()[0]
- print("smart.%s %d %s disk=%s" % (metric, ts, value, drive))
- if is_seagate and metric in ("seek_error_rate", "raw_read_error_rate"):
- # It appears that some Seagate drives (and possibly some Western
- # Digital ones too) use the first 16 bits to store error counts,
- # and the low 32 bits to store operation counts, out of these 48
- # bit values. So try to be helpful and extract these here.
- value = int(value)
- print("smart.%s %d %d disk=%s"
- % (metric.replace("error_rate", "count"), ts,
- value & 0xFFFFFFFF, drive))
- print("smart.%s %d %d disk=%s"
- % (metric.replace("error_rate", "errors"), ts,
- (value & 0xFFFF00000000) >> 32, drive))
- elif line.startswith("ID#"):
- data_marker = True
- elif line.startswith("Device Model:"):
- model = line.split(None, 2)[2]
- # Rough approximation to detect Seagate drives.
- is_seagate = model.startswith("ST")
+ """Print formatted SMART output for the drive"""
+ ts = int(time.time())
+ smart_output = smart_output.split("\n")
+ # Set data_marker to 0, so we skip stuff until we see a line
+ # beginning with ID# in the output. Start processing rows after
+ # that point.
+ data_marker = False
+ is_seagate = False
+
+ for line in smart_output:
+ if data_marker:
+ fields = line.split()
+ if len(fields) < 2:
+ continue
+ field = fields[0]
+ if len(fields) > 2 and field in ATTRIBUTE_MAP:
+ metric = ATTRIBUTE_MAP[field]
+ value = fields[9].split()[0]
+ print("smart.%s %d %s disk=%s" % (metric, ts, value, drive))
+ if is_seagate and metric in ("seek_error_rate", "raw_read_error_rate"):
+ # It appears that some Seagate drives (and possibly some Western
+ # Digital ones too) use the first 16 bits to store error counts,
+ # and the low 32 bits to store operation counts, out of these 48
+ # bit values. So try to be helpful and extract these here.
+ value = int(value)
+ print("smart.%s %d %d disk=%s"
+ % (metric.replace("error_rate", "count"), ts,
+ value & 0xFFFFFFFF, drive))
+ print("smart.%s %d %d disk=%s"
+ % (metric.replace("error_rate", "errors"), ts,
+ (value & 0xFFFF00000000) >> 32, drive))
+ elif line.startswith("ID#"):
+ data_marker = True
+ elif line.startswith("Device Model:"):
+ model = line.split(None, 2)[2]
+ # Rough approximation to detect Seagate drives.
+ is_seagate = model.startswith("ST")
def main():
- """main loop for SMART collector"""
-
- collection_interval=DEFAULT_COLLECTION_INTERVAL
- if(smart_stats_conf):
- config = smart_stats_conf.get_config()
- collection_interval=config['collection_interval']
-
- # Get the list of block devices.
- drives = [dev[5:] for dev in glob.glob("/dev/[hs]d[a-z]")]
- # Try FreeBSD drives if no block devices found
- if not drives:
- drives = [dev[5:] for dev in glob.glob("/dev/da[0-9]")+glob.glob("/dev/da[0-9][0-9]")+glob.glob("/dev/ada[0-9]")+glob.glob("/dev/ada[0-9][0-9]")]
- # Exit gracefully if no block devices found
- if not drives:
- sys.exit(13)
-
-
- # to make sure we are done with smartctl in COMMAND_TIMEOUT seconds
- signal.signal(signal.SIGALRM, alarm_handler)
-
- if smart_is_broken(drives):
- sys.exit(13)
-
- while True:
- for drive in drives:
- signal.alarm(COMMAND_TIMEOUT)
- smart_ctl = subprocess.Popen(SMART_CTL + " -i -A /dev/" + drive,
- shell=True, stdout=subprocess.PIPE)
- smart_output = smart_ctl.communicate()[0]
- signal.alarm(0)
- if smart_ctl.returncode != 0:
- if smart_ctl.returncode == 127:
- sys.exit(13)
- else:
- print("Command exited with: %d" % smart_ctl.returncode, file=sys.stderr)
- process_output(drive, smart_output)
-
- sys.stdout.flush()
- time.sleep(collection_interval)
+ """main loop for SMART collector"""
+
+ collection_interval = DEFAULT_COLLECTION_INTERVAL
+ if smart_stats_conf:
+ config = smart_stats_conf.get_config()
+ collection_interval = config['collection_interval']
+
+ # Get the list of block devices.
+ drives = [dev[5:] for dev in glob.glob("/dev/[hs]d[a-z]")]
+ # Try FreeBSD drives if no block devices found
+ if not drives:
+ drives = [dev[5:] for dev in
+ glob.glob("/dev/da[0-9]") + glob.glob("/dev/da[0-9][0-9]") + glob.glob("/dev/ada[0-9]") + glob.glob(
+ "/dev/ada[0-9][0-9]")]
+ # Exit gracefully if no block devices found
+ if not drives:
+ return 13
+
+ # to make sure we are done with smartctl in COMMAND_TIMEOUT seconds
+ signal.signal(signal.SIGALRM, alarm_handler)
+
+ if smart_is_broken(drives):
+ return 13
+
+ while True:
+ for drive in drives:
+ signal.alarm(COMMAND_TIMEOUT)
+ smart_ctl = subprocess.Popen(SMART_CTL + " -i -A /dev/" + drive,
+ shell=True, stdout=subprocess.PIPE)
+ smart_output = smart_ctl.communicate()[0]
+ signal.alarm(0)
+ if smart_ctl.returncode != 0:
+ if smart_ctl.returncode == 127:
+ return 13
+ else:
+ print("Command exited with: %d" % smart_ctl.returncode, file=sys.stderr)
+ process_output(drive, smart_output)
+
+ sys.stdout.flush()
+ time.sleep(collection_interval)
if __name__ == "__main__":
- main()
+ sys.exit(main())
diff --git a/collectors/0/sysload.py b/collectors/0/sysload.py
index 8e24959c..4c9328d2 100755
--- a/collectors/0/sysload.py
+++ b/collectors/0/sysload.py
@@ -13,7 +13,7 @@
# see .
#
-'''
+"""
CPU detailed statistics for TSDB
This plugin tracks, for all CPUs:
@@ -34,7 +34,7 @@
- memory statistics (bytes) (active, inact, wired, cache, buf, free)
- arc statistics (bytes) (total, mru, mfu, anon, header, other)
- swap statistics (bytes) (total, free, inuse, in/s, out/s)
-'''
+"""
import errno
import sys
@@ -47,16 +47,15 @@
from collectors.lib import utils
-PY3 = sys.version_info[0] > 2
-if PY3:
- long = int
+long = int
try:
from collectors.etc import sysload_conf
except ImportError:
sysload_conf = None
-DEFAULT_COLLECTION_INTERVAL=15
+DEFAULT_COLLECTION_INTERVAL = 15
+
def convert_to_bytes(string):
"""Take a string in the form 1234K, and convert to bytes"""
@@ -75,17 +74,19 @@ def convert_to_bytes(string):
return long(number)
return long(string)
+
signal_received = None
def handlesignal(signum, stack):
global signal_received
signal_received = signum
+
def main():
"""top main loop"""
- collection_interval=DEFAULT_COLLECTION_INTERVAL
- collect_every_cpu=True
- if(sysload_conf):
+ collection_interval = DEFAULT_COLLECTION_INTERVAL
+ collect_every_cpu = True
+ if sysload_conf:
config = sysload_conf.get_config()
collection_interval=config['collection_interval']
collect_every_cpu=config['collect_every_cpu']
@@ -97,7 +98,7 @@ def main():
try:
if platform.system() == "FreeBSD":
- if(collect_every_cpu):
+ if collect_every_cpu:
p_top = subprocess.Popen(
["top", "-S", "-P", "-n", "-s"+str(collection_interval), "-dinfinity", "0"],
stdout=subprocess.PIPE,
@@ -108,7 +109,7 @@ def main():
stdout=subprocess.PIPE,
)
else:
- if(collect_every_cpu):
+ if collect_every_cpu:
p_top = subprocess.Popen(
["mpstat", "-P", "ALL", str(collection_interval)],
stdout=subprocess.PIPE,
@@ -121,7 +122,7 @@ def main():
except OSError as e:
if e.errno == errno.ENOENT:
# it makes no sense to run this collector here
- sys.exit(13) # we signal tcollector to not run us
+ sys.exit(13) # we signal tcollector to not run us
raise
timestamp = 0
@@ -292,5 +293,6 @@ def main():
pass
p_top.wait()
+
if __name__ == "__main__":
- main()
+ sys.exit(main())
diff --git a/collectors/0/tcp_bridge.py b/collectors/0/tcp_bridge.py
index bd3de3b7..63b6f12e 100755
--- a/collectors/0/tcp_bridge.py
+++ b/collectors/0/tcp_bridge.py
@@ -21,15 +21,12 @@
import time
from collectors.lib import utils
-try:
- from _thread import *
-except ImportError:
- from thread import *
+from _thread import *
try:
from collectors.etc import tcp_bridge_conf
except ImportError:
- print('unable to import tcp_bridge_conf', file=sys.stderr)
+ utils.err('unable to import tcp_bridge_conf', file=sys.stderr)
tcp_bridge_conf = None
HOST = '127.0.0.1'
@@ -46,6 +43,7 @@
# buffered stdout seems to break metrics
out = os.fdopen(sys.stdout.fileno(), 'w', 0)
+
def main():
if not (tcp_bridge_conf and tcp_bridge_conf.enabled()):
print('not enabled, or tcp_bridge_conf unavilable', file=sys.stderr)
@@ -130,7 +128,6 @@ def removePut(line):
finally:
sock.close()
-if __name__ == "__main__":
- main()
-sys.exit(0)
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/collectors/0/udp_bridge.py b/collectors/0/udp_bridge.py
index 2daf5b31..7a337f99 100755
--- a/collectors/0/udp_bridge.py
+++ b/collectors/0/udp_bridge.py
@@ -19,17 +19,18 @@
from collectors.lib import utils
try:
- from collectors.etc import udp_bridge_conf
+ from collectors.etc import udp_bridge_conf
except ImportError:
- udp_bridge_conf = None
+ udp_bridge_conf = None
HOST = '127.0.0.1'
PORT = 8953
SIZE = 8192
+
def main():
if not (udp_bridge_conf and udp_bridge_conf.enabled()):
- sys.exit(13)
+ sys.exit(13)
utils.drop_privileges()
def removePut(line):
@@ -40,9 +41,9 @@ def removePut(line):
try:
if (udp_bridge_conf and udp_bridge_conf.usetcp()):
- sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
else:
- sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind((HOST, PORT))
except socket.error as msg:
utils.err('could not open socket: %s' % msg)
@@ -75,7 +76,6 @@ def removePut(line):
finally:
sock.close()
-if __name__ == "__main__":
- main()
-sys.exit(0)
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/collectors/0/varnishstat.py b/collectors/0/varnishstat.py
index 33541dde..2ca6fdbe 100755
--- a/collectors/0/varnishstat.py
+++ b/collectors/0/varnishstat.py
@@ -24,7 +24,7 @@
from collectors.lib import utils
-interval = 10 # seconds
+interval = 10 # seconds
# If you would rather use the timestamp returned by varnishstat instead of a
# local timestamp, then change this value to "True"
@@ -41,6 +41,7 @@
# Collect all metrics
vstats = "all"
+
# Collect metrics a la carte
# vstats = frozenset([
# "client_conn",
@@ -52,47 +53,48 @@
# ])
def main():
- utils.drop_privileges()
- bad_regex = re.compile("[,()]+") # avoid forbidden by TSD symbols
-
- while True:
- try:
- if vstats == "all":
- stats = subprocess.Popen(
- ["varnishstat", "-1", "-j"],
- stdout=subprocess.PIPE,
- )
- else:
- fields = ",".join(vstats)
- stats = subprocess.Popen(
- ["varnishstat", "-1", "-f" + fields, "-j"],
- stdout=subprocess.PIPE,
- )
- except OSError as e:
- # Die and signal to tcollector not to run this script.
- sys.stderr.write("Error: %s\n" % e)
- sys.exit(13)
-
- metrics = ""
- for line in stats.stdout.readlines():
- metrics += line
- metrics = json.loads(metrics)
-
- timestamp = ""
- if use_varnishstat_timestamp:
- pattern = "%Y-%m-%dT%H:%M:%S"
- timestamp = int(time.mktime(time.strptime(metrics['timestamp'], pattern)))
- else:
- timestamp = time.time()
-
- for k, v in metrics.items():
- if k != "timestamp" and None == bad_regex.search(k):
- metric_name = metric_prefix + "." + k
- print("%s %d %s %s" % \
- (metric_name, timestamp, v['value'], ",".join(tags)))
-
- sys.stdout.flush()
- time.sleep(interval)
+ utils.drop_privileges()
+ bad_regex = re.compile("[,()]+") # avoid forbidden by TSD symbols
+
+ while True:
+ try:
+ if vstats == "all":
+ stats = subprocess.Popen(
+ ["varnishstat", "-1", "-j"],
+ stdout=subprocess.PIPE,
+ )
+ else:
+ fields = ",".join(vstats)
+ stats = subprocess.Popen(
+ ["varnishstat", "-1", "-f" + fields, "-j"],
+ stdout=subprocess.PIPE,
+ )
+ except OSError as e:
+ # Die and signal to tcollector not to run this script.
+ sys.stderr.write("Error: %s\n" % e)
+ sys.exit(13)
+
+ metrics = ""
+ for line in stats.stdout.readlines():
+ metrics += line
+ metrics = json.loads(metrics)
+
+ timestamp = ""
+ if use_varnishstat_timestamp:
+ pattern = "%Y-%m-%dT%H:%M:%S"
+ timestamp = int(time.mktime(time.strptime(metrics['timestamp'], pattern)))
+ else:
+ timestamp = time.time()
+
+ for k, v in metrics.items():
+ if k != "timestamp" and None == bad_regex.search(k):
+ metric_name = metric_prefix + "." + k
+ print("%s %d %s %s" % \
+ (metric_name, timestamp, v['value'], ",".join(tags)))
+
+ sys.stdout.flush()
+ time.sleep(interval)
+
if __name__ == "__main__":
- sys.exit(main())
+ sys.exit(main())
diff --git a/collectors/0/zfsiostats.py b/collectors/0/zfsiostats.py
index a3c0e243..e37805d7 100755
--- a/collectors/0/zfsiostats.py
+++ b/collectors/0/zfsiostats.py
@@ -13,7 +13,7 @@
# see .
#
-'''
+"""
ZFS I/O and disk space statistics for TSDB
This plugin tracks, for all pools:
@@ -29,7 +29,7 @@
Disk space usage is given in kbytes.
Throughput is given in operations/s and bytes/s.
-'''
+"""
import errno
import sys
@@ -39,9 +39,7 @@
import signal
import os
-PY3 = sys.version_info[0] > 2
-if PY3:
- long = int
+long = int
from collectors.lib import utils
@@ -50,19 +48,20 @@
except ImportError:
zfsiostats_conf = None
-DEFAULT_COLLECTION_INTERVAL=15
-DEFAULT_REPORT_CAPACITY_EVERY_X_TIMES=20
-DEFAULT_REPORT_DISKS_IN_VDEVS=False
+DEFAULT_COLLECTION_INTERVAL = 15
+DEFAULT_REPORT_CAPACITY_EVERY_X_TIMES = 20
+DEFAULT_REPORT_DISKS_IN_VDEVS = False
+
def convert_to_bytes(string):
"""Take a string in the form 1234K, and convert to bytes"""
factors = {
- "K": 1024,
- "M": 1024 * 1024,
- "G": 1024 * 1024 * 1024,
- "T": 1024 * 1024 * 1024 * 1024,
- "P": 1024 * 1024 * 1024 * 1024 * 1024,
- "E": 1024 * 1024 * 1024 * 1024 * 1024 * 1024,
+ "K": 1024,
+ "M": 1024 * 1024,
+ "G": 1024 * 1024 * 1024,
+ "T": 1024 * 1024 * 1024 * 1024,
+ "P": 1024 * 1024 * 1024 * 1024 * 1024,
+ "E": 1024 * 1024 * 1024 * 1024 * 1024 * 1024,
}
if string == "-": return -1
for f, fm in factors.items():
@@ -72,15 +71,16 @@ def convert_to_bytes(string):
return long(number)
return long(string)
+
def convert_wo_prefix(string):
"""Take a string in the form 1234K, and convert without metric prefix"""
factors = {
- "K": 1000,
- "M": 1000 * 1000,
- "G": 1000 * 1000 * 1000,
- "T": 1000 * 1000 * 1000 * 1000,
- "P": 1000 * 1000 * 1000 * 1000 * 1000,
- "E": 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
+ "K": 1000,
+ "M": 1000 * 1000,
+ "G": 1000 * 1000 * 1000,
+ "T": 1000 * 1000 * 1000 * 1000,
+ "P": 1000 * 1000 * 1000 * 1000 * 1000,
+ "E": 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
}
if string == "-": return -1
for f, fm in factors.items():
@@ -90,11 +90,12 @@ def convert_wo_prefix(string):
return long(number)
return long(string)
-def extract_info(line,report_disks_in_vdevs):
+
+def extract_info(line, report_disks_in_vdevs):
(poolname,
- alloc, free,
- read_issued, write_issued,
- read_throughput, write_throughput) = line.split()
+ alloc, free,
+ read_issued, write_issued,
+ read_throughput, write_throughput) = line.split()
s_io = {}
# magnitudeless variable
@@ -112,11 +113,12 @@ def extract_info(line,report_disks_in_vdevs):
s_df["free"] = convert_to_bytes(free) / 1024
if ((s_df["used"] < 0) or (s_df["free"] < 0)):
s_df = {}
- if(not report_disks_in_vdevs):
+ if (not report_disks_in_vdevs):
s_io = {}
return poolname, s_df, s_io
+
T_START = 1
T_HEADERS = 2
T_SEPARATOR = 3
@@ -126,22 +128,25 @@ def extract_info(line,report_disks_in_vdevs):
T_LEG = 7
signal_received = None
+
+
def handlesignal(signum, stack):
global signal_received
signal_received = signum
+
def main():
"""zfsiostats main loop"""
global signal_received
- collection_interval=DEFAULT_COLLECTION_INTERVAL
- report_capacity_every_x_times=DEFAULT_REPORT_CAPACITY_EVERY_X_TIMES
- report_disks_in_vdevs=DEFAULT_REPORT_DISKS_IN_VDEVS
- if(zfsiostats_conf):
+ collection_interval = DEFAULT_COLLECTION_INTERVAL
+ report_capacity_every_x_times = DEFAULT_REPORT_CAPACITY_EVERY_X_TIMES
+ report_disks_in_vdevs = DEFAULT_REPORT_DISKS_IN_VDEVS
+ if (zfsiostats_conf):
config = zfsiostats_conf.get_config()
- collection_interval=config['collection_interval']
- report_capacity_every_x_times=config['report_capacity_every_x_times']
- report_disks_in_vdevs=config['report_disks_in_vdevs']
+ collection_interval = config['collection_interval']
+ report_capacity_every_x_times = config['report_capacity_every_x_times']
+ report_disks_in_vdevs = config['report_disks_in_vdevs']
signal.signal(signal.SIGTERM, handlesignal)
signal.signal(signal.SIGINT, handlesignal)
@@ -154,11 +159,11 @@ def main():
except OSError as e:
if e.errno == errno.ENOENT:
# it makes no sense to run this collector here
- sys.exit(13) # we signal tcollector to not run us
+ sys.exit(13) # we signal tcollector to not run us
raise
firstloop = True
- report_capacity = (report_capacity_every_x_times-1)
+ report_capacity = (report_capacity_every_x_times - 1)
lastleg = 0
ltype = None
timestamp = int(time.time())
@@ -207,7 +212,7 @@ def main():
ltype = T_DEVICE
else:
# must be a pool name
- #assert ltype == T_SEPARATOR, \
+ # assert ltype == T_SEPARATOR, \
# "expecting last state T_SEPARATOR, now got %s" % ltype
if ltype == T_SEPARATOR:
parentpoolname = ""
@@ -215,19 +220,19 @@ def main():
if ltype == T_START:
for x in (
- capacity_stats_pool, capacity_stats_device,
- io_stats_pool, io_stats_device,
- ):
+ capacity_stats_pool, capacity_stats_device,
+ io_stats_pool, io_stats_device,
+ ):
x.clear()
timestamp = int(time.time())
elif ltype == T_POOL:
line = line.strip()
- poolname, s_df, s_io = extract_info(line,report_disks_in_vdevs)
+ poolname, s_df, s_io = extract_info(line, report_disks_in_vdevs)
if parentpoolname == "":
parentpoolname = poolname
else:
- poolname=parentpoolname+"."+poolname
+ poolname = parentpoolname + "." + poolname
capacity_stats_pool[poolname] = s_df
io_stats_pool[poolname] = s_io
# marker for leg
@@ -236,13 +241,13 @@ def main():
elif ltype == T_LEG:
last_leg = last_leg + 1
line = line.strip()
- devicename, s_df, s_io = extract_info(line,report_disks_in_vdevs)
+ devicename, s_df, s_io = extract_info(line, report_disks_in_vdevs)
capacity_stats_device["%s %s%s" % (poolname, devicename, last_leg)] = s_df
io_stats_device["%s %s%s" % (poolname, devicename, last_leg)] = s_io
elif ltype == T_DEVICE:
line = line.strip()
- devicename, s_df, s_io = extract_info(line,report_disks_in_vdevs)
+ devicename, s_df, s_io = extract_info(line, report_disks_in_vdevs)
capacity_stats_device["%s %s" % (poolname, devicename)] = s_df
io_stats_device["%s %s" % (poolname, devicename)] = s_io
@@ -250,7 +255,7 @@ def main():
if report_capacity_every_x_times > 0:
report_capacity += 1
if report_capacity == report_capacity_every_x_times:
- report_capacity=0
+ report_capacity = 0
for poolname, stats in capacity_stats_pool.items():
fm = "zfs.df.pool.kb.%s %d %s pool=%s"
for statname, statnumber in stats.items():
@@ -287,6 +292,6 @@ def main():
pass
p_zpool.wait()
-if __name__ == "__main__":
- main()
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/collectors/0/zfsolkernstats.py b/collectors/0/zfsolkernstats.py
index 6d580d99..aa655c99 100755
--- a/collectors/0/zfsolkernstats.py
+++ b/collectors/0/zfsolkernstats.py
@@ -33,6 +33,7 @@
# and the allocation sizes for the slabs
# /proc/spl/kstat/zfs/arcstats is a table. we only care about the data column
+
def main():
"""zfsstat main loop"""
interval = 15
@@ -83,6 +84,6 @@ def main():
sys.stdout.flush()
time.sleep(interval)
-if __name__ == "__main__":
- main()
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/collectors/0/zookeeper.py b/collectors/0/zookeeper.py
index cd5fd4cc..5386426d 100755
--- a/collectors/0/zookeeper.py
+++ b/collectors/0/zookeeper.py
@@ -50,6 +50,7 @@
"zk_open_file_descriptor_count",
])
+
def scan_zk_instances():
"""
Finding out all the running instances of zookeeper
@@ -106,10 +107,12 @@ def scan_zk_instances():
fd.close()
return instances
+
def print_stat(metric, ts, value, tags=""):
if value is not None:
print("zookeeper.%s %i %s %s" % (metric, ts, value, tags))
+
def connect_socket(tcp_version, port):
sock = None
if tcp_version == "tcp6":
@@ -124,6 +127,7 @@ def connect_socket(tcp_version, port):
utils.err(err)
return sock
+
def main():
if USER != "root":
utils.drop_privileges(user=USER)
@@ -139,7 +143,7 @@ def main():
last_scan = ts
if not instances:
- return 13 # Ask tcollector not to respawn us
+ return 13 # ask tcollector not to respawn us
# Iterate over every zookeeper instance and get statistics
for ip, port, tcp_version in instances:
@@ -161,5 +165,6 @@ def main():
sys.stdout.flush()
time.sleep(COLLECTION_INTERVAL)
+
if __name__ == "__main__":
sys.exit(main())
diff --git a/collectors/300/aws_cloudwatch_stats.py b/collectors/300/aws_cloudwatch_stats.py
index 6db53024..b26fc462 100755
--- a/collectors/300/aws_cloudwatch_stats.py
+++ b/collectors/300/aws_cloudwatch_stats.py
@@ -5,7 +5,6 @@
import datetime
import re
import json
-from collections import OrderedDict
import threading
from time import mktime
from collectors.lib import utils