diff --git a/playbooks/files/rax-maas/plugins/rally_performance.py b/playbooks/files/rax-maas/plugins/rally_performance.py index ba2189519..003e75ea7 100644 --- a/playbooks/files/rax-maas/plugins/rally_performance.py +++ b/playbooks/files/rax-maas/plugins/rally_performance.py @@ -435,7 +435,12 @@ def main(): metric('maas_check_duration', 'double', "{:.2f}".format((end - start) * 1)) if args.influxdb: - send_metrics_to_influxdb(plugin_config, logger) + try: + send_metrics_to_influxdb(plugin_config, logger) + except: + metric('influxdb_success', 'uint32', 0) + else: + metric('influxdb_success', 'uint32', 1) return diff --git a/playbooks/templates/rax-maas/rally_check.yaml.j2 b/playbooks/templates/rax-maas/rally_check.yaml.j2 index e35772965..222b3e6b3 100644 --- a/playbooks/templates/rax-maas/rally_check.yaml.j2 +++ b/playbooks/templates/rax-maas/rally_check.yaml.j2 @@ -68,3 +68,14 @@ alarms : if (metric["resource_cleanup"] > 0) { return new AlarmStatus(CRITICAL, "{{ name }} performance test has required stale resource cleanup for {{ resource_cleanup_threshold }} or more intervals."); } +{% if influxdb %} + rally_{{ name }}_influxdb : + label : rally_{{ name }}_influxdb--{{ inventory_hostname }} + notification_plan_id : "{{ maas_notification_plan_override[label] | default(maas_notification_plan) }}" + disabled : {{ (("rally_{{ name }}_influxdb--{{ inventory_hostname }}") | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }} + criteria : | + :set consecutiveCount={{ alarm_consecutive_count }} + if (metric["influxdb_success"] != 1) { + return new AlarmStatus(CRITICAL, "{{ name }} performance test was unable to write to the configured influxdb endpoint for {{ alarm_consecutive_count }} or more intervals."); + } +{% endif %} diff --git a/releasenotes/notes/influxdb-error-check-3b4b7734f2f876d7.yaml b/releasenotes/notes/influxdb-error-check-3b4b7734f2f876d7.yaml new file mode 100644 index 000000000..7e196ab52 --- /dev/null +++ b/releasenotes/notes/influxdb-error-check-3b4b7734f2f876d7.yaml @@ -0,0 +1,7 @@ +--- +features: + - If maas_rally is configured to write to an influxdb endpoint, a new metric + (influxdb_success) and alarm will be created to generate alerts if writing + to influxdb fails. A failure to write to influxdb is no longer fatal, + which allows performance metrics to still be reported via the MaaS API even + if the influxdb endpoint is unavailable.