Skip to content

Commit d3306ee

Browse files
author
tonytan4ever
committed
Adjust neutron/nova agent related alarms and check scripts
1 parent 3eac154 commit d3306ee

14 files changed

+70
-42
lines changed

playbooks/files/rax-maas/plugins/README.md

+7-7
Original file line numberDiff line numberDiff line change
@@ -221,9 +221,9 @@ polls the nova api and gets a list of all nova services running in the environme
221221
Hostname or IP address of service to test
222222
##### Example Output:
223223

224-
metric nova-scheduler_on_host_aio1_nova_scheduler_container-e7b92e0f uint32 1
225-
metric nova-conductor_on_host_aio1_nova_conductor_container-dcddd54a uint32 1
226-
metric nova-compute_on_host_aio1_nova_compute_container-19824c74 uint32 1
224+
metric nova-scheduler_on_host_aio1_nova_scheduler_container-e7b92e0f string Yes
225+
metric nova-conductor_on_host_aio1_nova_conductor_container-dcddd54a string No
226+
metric nova-compute_on_host_aio1_nova_compute_container-19824c74 string Yes
227227
...
228228

229229
***
@@ -265,10 +265,10 @@ polls the neutron api and gets a list of all neutron agents running in the envir
265265
Hostname or IP address of service to test
266266
##### Example Output:
267267

268-
metric neutron-metadata-agent_status uint32 1
269-
metric neutron-linuxbridge-agent_status uint32 1
270-
metric neutron-dhcp-agent_status uint32 1
271-
metric neutron-linuxbridge-agent_status uint32 1
268+
metric neutron-metadata-agent_status string Yes
269+
metric neutron-linuxbridge-agent_status string No
270+
metric neutron-dhcp-agent_status string neutron-dhcp-agent cannot reach API
271+
metric neutron-linuxbridge-agent_status string Yes
272272
...
273273

274274
***

playbooks/files/rax-maas/plugins/maas_common.py

+16
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,22 @@
5050
TOKEN_FILE = '/root/.auth_ref.json'
5151

5252

53+
NEUTRON_AGENT_TYPE_LIST = [
54+
'neutron-linuxbridge-agent',
55+
'neutron-dhcp-agent',
56+
'neutron-l3-agent',
57+
'neutron-metadata-agent',
58+
'neutron-metering-agent'
59+
]
60+
NOVA_SERVICE_TYPE_LIST = [
61+
'nova-cert',
62+
'nova-compute',
63+
'nova-conductor',
64+
'nova-consoleauth',
65+
'nova-scheduler'
66+
]
67+
68+
5369
try:
5470
from cinderclient import client as c_client
5571
from cinderclient import exceptions as c_exc

playbooks/files/rax-maas/plugins/neutron_service_check.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,15 @@
1515
# limitations under the License.
1616

1717
import argparse
18-
import sys
1918

2019
from maas_common import get_neutron_client
20+
from maas_common import metric
2121
from maas_common import metric_bool
2222
from maas_common import print_output
2323
from maas_common import status_err
2424
from maas_common import status_err_no_exit
2525
from maas_common import status_ok
26+
from maas_common import NEUTRON_AGENT_TYPE_LIST
2627

2728

2829
def check(args):
@@ -35,8 +36,13 @@ def check(args):
3536
# not gathering api status metric here so catch any exception
3637
except Exception as e:
3738
metric_bool('client_success', False, m_name='maas_neutron')
39+
for neutron_agent_type in NEUTRON_AGENT_TYPE_LIST:
40+
metric('%s_status' % neutron_agent_type,
41+
'string',
42+
'%s cannot reach API' % neutron_agent_type,
43+
m_name='maas_neutron')
3844
status_err_no_exit(str(e), m_name='maas_neutron')
39-
sys.exit(0)
45+
return
4046
else:
4147
metric_bool('client_success', True, m_name='maas_neutron')
4248

@@ -58,9 +64,9 @@ def check(args):
5864
# return all the things
5965
status_ok(m_name='maas_neutron')
6066
for agent in agents:
61-
agent_is_up = True
67+
agent_is_up = "Yes"
6268
if agent['admin_state_up'] and not agent['alive']:
63-
agent_is_up = False
69+
agent_is_up = "No"
6470

6571
if args.host:
6672
name = '%s_status' % agent['binary']
@@ -71,7 +77,7 @@ def check(args):
7177
agent['id'],
7278
agent['host'])
7379

74-
metric_bool(name, agent_is_up, m_name='maas_neutron')
80+
metric(name, 'string', agent_is_up, m_name='maas_neutron')
7581

7682

7783
def main(args):

playbooks/files/rax-maas/plugins/nova_service_check.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,17 @@
1515
# limitations under the License.
1616

1717
import argparse
18-
import sys
1918

2019
from maas_common import get_auth_ref
2120
from maas_common import get_keystone_client
2221
from maas_common import get_nova_client
22+
from maas_common import metric
2323
from maas_common import metric_bool
2424
from maas_common import print_output
2525
from maas_common import status_err
2626
from maas_common import status_err_no_exit
2727
from maas_common import status_ok
28+
from maas_common import NOVA_SERVICE_TYPE_LIST
2829

2930

3031
def check(auth_ref, args):
@@ -44,8 +45,13 @@ def check(auth_ref, args):
4445
# not gathering api status metric here so catch any exception
4546
except Exception as e:
4647
metric_bool('client_success', False, m_name='maas_nova')
48+
for nova_service_type in NOVA_SERVICE_TYPE_LIST:
49+
metric('%s_status' % nova_service_type,
50+
'string',
51+
'%s cannot reach API' % nova_service_type,
52+
m_name='maas_nova')
4753
status_err_no_exit(str(e), m_name='maas_nova')
48-
sys.exit(0)
54+
return
4955
else:
5056
metric_bool('client_success', True, m_name='maas_nova')
5157

@@ -61,17 +67,17 @@ def check(auth_ref, args):
6167
# return all the things
6268
status_ok(m_name='maas_nova')
6369
for service in services:
64-
service_is_up = True
70+
service_is_up = "Yes"
6571

6672
if service.status == 'enabled' and service.state == 'down':
67-
service_is_up = False
73+
service_is_up = "No"
6874

6975
if args.host:
7076
name = '%s_status' % service.binary
7177
else:
7278
name = '%s_on_host_%s_status' % (service.binary, service.host)
7379

74-
metric_bool(name, service_is_up, m_name='maas_nova')
80+
metric(name, 'string', service_is_up, m_name='maas_nova')
7581

7682

7783
def main(args):

playbooks/templates/rax-maas/neutron_dhcp_agent_check.yaml.j2

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ alarms :
1919
disabled : {{ (('neutron_dhcp_agent_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }}
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
22-
if (metric["client_success"] != 1) {
22+
if (metric["neutron-dhcp-agent_status"] regex ".*cannot reach API.*") {
2323
return new AlarmStatus(WARNING, "neutron dhcp agent can't reach API");
2424
}
25-
if (metric["neutron-dhcp-agent_status"] != 1) {
25+
if (metric["neutron-dhcp-agent_status"] == 'No') {
2626
return new AlarmStatus(CRITICAL, "neutron-dhcp-agent down");
2727
}

playbooks/templates/rax-maas/neutron_l3_agent_check.yaml.j2

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ alarms :
1919
disabled : {{ (('neutron_l3_agent_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }}
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
22-
if (metric["client_success"] != 1) {
22+
if (metric["neutron-l3-agent_status"] regex ".*cannot reach API.*") {
2323
return new AlarmStatus(WARNING, "neutron l3 agent can't reach API");
2424
}
25-
if (metric["neutron-l3-agent_status"] != 1) {
25+
if (metric["neutron-l3-agent_status"] == 'No') {
2626
return new AlarmStatus(CRITICAL, "neutron-l3-agent down");
2727
}

playbooks/templates/rax-maas/neutron_linuxbridge_agent_check.yaml.j2

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ alarms :
1919
disabled : {{ (('neutron_linuxbridge_agent_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }}
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
22-
if (metric["client_success"] != 1) {
22+
if (metric["neutron-linuxbridge-agent_status"] regex ".*cannot reach API.*") {
2323
return new AlarmStatus(WARNING, "neutron linux-agent can't reach API");
2424
}
25-
if (metric["neutron-linuxbridge-agent_status"] != 1) {
25+
if (metric["neutron-linuxbridge-agent_status"] == "No") {
2626
return new AlarmStatus(CRITICAL, "neutron-linuxbridge-agent down");
2727
}

playbooks/templates/rax-maas/neutron_metadata_agent_check.yaml.j2

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ alarms :
1919
disabled : {{ (('neutron_metadata_agent_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }}
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
22-
if (metric["client_success"] != 1) {
22+
if (metric["neutron-metadata-agent_status"] regex ".*cannot reach API.*") {
2323
return new AlarmStatus(WARNING, "neutron metadata agent can't reach API");
2424
}
25-
if (metric["neutron-metadata-agent_status"] != 1) {
25+
if (metric["neutron-metadata-agent_status"] == 'No') {
2626
return new AlarmStatus(CRITICAL, "neutron-metadata-agent down");
2727
}

playbooks/templates/rax-maas/neutron_metering_agent_check.yaml.j2

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ alarms :
1919
disabled : {{ (('neutron_metering_agent_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }}
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
22-
if (metric["client_success"] != 1) {
22+
if (metric["neutron-metering-agent_status"] regex ".*cannot reach API.*") {
2323
return new AlarmStatus(WARNING, "neutron metering agent can't reach API");
2424
}
25-
if (metric["neutron-metering-agent_status"] != 1) {
25+
if (metric["neutron-metering-agent_status"] == 'No') {
2626
return new AlarmStatus(CRITICAL, "neutron-metering-agent down");
2727
}

playbooks/templates/rax-maas/nova_cert_check.yaml.j2

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ alarms :
1919
disabled : {{ (('nova_cert_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }}
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
22-
if (metric["client_success"] != 1) {
23-
return new AlarmStatus(WARNING, "Nova cert service can't reach API");
22+
if (metric["nova-cert_status"] regex ".*cannot reach API.*") {
23+
return new AlarmStatus(WARNING, "nova cert service can't reach API");
2424
}
25-
if (metric["nova-cert_status"] != 1) {
25+
if (metric["nova-cert_status"] == "No") {
2626
return new AlarmStatus(CRITICAL, "nova-cert down");
2727
}

playbooks/templates/rax-maas/nova_compute_check.yaml.j2

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ alarms :
1919
disabled : {{ (('nova_compute_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }}
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
22-
if (metric["client_success"] != 1) {
23-
return new AlarmStatus(WARNING, "Nova compute service can't reach API");
22+
if (metric["nova-compute_status"] regex ".*cannot reach API.*") {
23+
return new AlarmStatus(WARNING, "nova compute service can't reach API");
2424
}
25-
if (metric["nova-compute_status"] != 1) {
25+
if (metric["nova-compute_status"] == 'No') {
2626
return new AlarmStatus(CRITICAL, "nova-compute down");
2727
}

playbooks/templates/rax-maas/nova_conductor_check.yaml.j2

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ alarms :
1919
disabled : {{ (('nova_conductor_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }}
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
22-
if (metric["client_success"] != 1) {
23-
return new AlarmStatus(WARNING, "Nova conductor can't reach API");
22+
if (metric["nova-conductor_status"] regex ".*cannot reach API.*") {
23+
return new AlarmStatus(WARNING, "nova conductor can't reach API");
2424
}
25-
if (metric["nova-conductor_status"] != 1) {
25+
if (metric["nova-conductor_status"] == 'No') {
2626
return new AlarmStatus(CRITICAL, "nova-conductor down");
2727
}

playbooks/templates/rax-maas/nova_consoleauth_check.yaml.j2

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ alarms :
1919
disabled : {{ (('nova_consoleauth_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }}
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
22-
if (metric["client_success"] != 1) {
23-
return new AlarmStatus(WARNING, "Nova consoleauth service can't reach API");
22+
if (metric["nova-consoleauth_status"] regex ".*cannot reach API.*") {
23+
return new AlarmStatus(WARNING, "nova consoleauth service can't reach API");
2424
}
25-
if (metric["nova-consoleauth_status"] != 1) {
25+
if (metric["nova-consoleauth_status"] == "No") {
2626
return new AlarmStatus(CRITICAL, "nova-consoleauth down");
2727
}

playbooks/templates/rax-maas/nova_scheduler_check.yaml.j2

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ alarms :
1919
disabled : {{ (('nova_scheduler_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }}
2020
criteria : |
2121
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
22-
if (metric["client_success"] != 1) {
23-
return new AlarmStatus(WARNING, "Nova scheduler service can't reach API");
22+
if (metric["nova-scheduler_status"] regex ".*cannot reach API.*") {
23+
return new AlarmStatus(WARNING, "nova scheduler service can't reach API");
2424
}
25-
if (metric["nova-scheduler_status"] != 1) {
25+
if (metric["nova-scheduler_status"] == 'No') {
2626
return new AlarmStatus(CRITICAL, "nova-scheduler down");
2727
}

0 commit comments

Comments
 (0)