forked from matt448/nagios-checks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_sqs_depth.py
executable file
·132 lines (108 loc) · 4.14 KB
/
check_sqs_depth.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/python
##########################################################
#
# Written by Matthew McMillan
# @matthewmcmillan
# https://matthewcmcmillan.blogspot.com
# https://github.com/matt448/nagios-checks
#
# Requires the boto library and a .boto file with read
# permissions to the queues.
#
import sys
import argparse
import boto
import boto.sqs
def printUsage():
print
print "Example: ", sys.argv[0], "--name myqueue --region us-east-1 --warn 10 --crit 20"
print
#Parse command line arguments
parser = argparse.ArgumentParser(description='This script is a Nagios check that \
monitors the number of messages in Amazon SQS \
queues. It requires a .boto file in the user\'s \
home directroy and AWS credentials that allow \
read access to the queues that are to be monitored.')
parser.add_argument('--name', dest='name', type=str, required=True,
help='Name of SQS queue. This can be a wildcard match. \
For example a name of blah_ would match blah_1, \
blah_2, blah_foobar. To monitor a single queue, enter \
the exact name of the queue.')
parser.add_argument('--region', dest='region', type=str, default='us-east-1',
help='AWS Region hosting the SQS queue. \
Default is us-east-1.')
parser.add_argument('--warn', dest='warn', type=int, required=True,
help='Warning level for queue depth.')
parser.add_argument('--crit', dest='crit', type=int, required=True,
help='Critical level for queue depth.')
parser.add_argument('--debug', action='store_true', help='Enable debug output.')
args = parser.parse_args()
# Assign command line args to variable names
queueName = args.name
sqsRegion = args.region
warnDepth = args.warn
critDepth = args.crit
if critDepth <= warnDepth:
print
print "ERROR: Critical value must be larger than warning value."
printUsage()
exit(2)
qList = []
depthList = []
statusMsgList = []
statusMsg = ""
msgLine = ""
perfdataMsg = ""
warnCount = 0
critCount = 0
exitCode = 3
# Make SQS connection
conn = boto.sqs.connect_to_region(sqsRegion)
rs = conn.get_all_queues(prefix=queueName)
# Loop through each queue and get message count
# Push the queue name and depth to lists
for qname in rs:
namelist = str(qname.id).split("/") # Split out queue name
qList.append(namelist[2])
depthList.append(int(qname.count()))
if args.debug:
print
print '========== Queue List ============='
print qList
print '=================================='
print
# Build status message and check warn/crit values
for index in range(len(qList)):
if depthList[index] >= warnDepth and depthList[index] < critDepth:
warnCount += 1
if depthList[index] >= critDepth:
critCount += 1
#print index, ": ", qList[index], depthList[index]
msgLine = qList[index] + ":" + str(depthList[index])
statusMsgList.append(msgLine)
# Set exit code based on number of warnings and criticals
if warnCount == 0 and critCount == 0:
statusMsgList.insert(0, "OK - Queue depth (")
exitCode = 0
elif warnCount > 0 and critCount == 0:
statusMsgList.insert(0, "WARNING - Queue depth (")
exitCode = 1
elif critCount > 0:
statusMsgList.insert(0, "CRITICAL - Queue depth (")
exitCode = 2
else:
statusMsgList.insert(0, "UNKNOWN - Queue depth (")
exitCode = 3
# Build status message output
for msg in statusMsgList:
statusMsg += msg + " "
# Build perfdata output
for index in range(len(qList)):
perfdataMsg += qList[index] + "=" + str(depthList[index]) + ";" + str(warnDepth) + ";" + str(critDepth) + "; "
# Finalize status message
statusMsg += ") [W:" + str(warnDepth) + " C:" + str(critDepth) + "]"
# Print final output for Nagios
print statusMsg + "|" + perfdataMsg
# Exit with appropriate code
exit(exitCode)