From 6eaf5fd32813c9c3311d67d766d76c004bc9c4f9 Mon Sep 17 00:00:00 2001 From: nhat do Date: Thu, 12 Dec 2024 13:34:56 +0100 Subject: [PATCH 01/13] create detectors for firebase database --- docs/severity.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/severity.md b/docs/severity.md index 149003026..0754ad78f 100644 --- a/docs/severity.md +++ b/docs/severity.md @@ -72,6 +72,7 @@ - [integration_gcp-cloud-sql-mysql](#integration_gcp-cloud-sql-mysql) - [integration_gcp-cloud-sql-postgresql](#integration_gcp-cloud-sql-postgresql) - [integration_gcp-compute-engine](#integration_gcp-compute-engine) +- [integration_gcp-firebase](#integration_gcp-firebase) - [integration_gcp-load-balancing](#integration_gcp-load-balancing) - [integration_gcp-memorystore-redis](#integration_gcp-memorystore-redis) - [integration_gcp-pubsub-subscription](#integration_gcp-pubsub-subscription) @@ -793,6 +794,15 @@ |GCP GCE Instance disk throttled ops|X|X|-|-|-| +## integration_gcp-firebase + +|Detector|Critical|Major|Minor|Warning|Info| +|---|---|---|---|---|---| +|Webcheck heartbeat|X|-|-|-|-| +|GCP Firebase database firebase database load|X|X|-|-|-| +|GCP Firebase database firebase database io utilization|X|X|-|-|-| + + ## integration_gcp-load-balancing |Detector|Critical|Major|Minor|Warning|Info| From 10d832c90c3c66d40cc7e822a38e599beee2012f Mon Sep 17 00:00:00 2001 From: nhat do Date: Thu, 12 Dec 2024 13:35:30 +0100 Subject: [PATCH 02/13] create detectors for firebase database --- modules/integration_gcp-firebase/README.md | 111 +++++++++ .../common-filters.tf | 1 + .../integration_gcp-firebase/common-locals.tf | 1 + .../common-modules.tf | 1 + .../common-variables.tf | 1 + .../common-versions.tf | 1 + .../conf/00-heatbeat.yaml | 12 + .../conf/01-database_load.yaml | 18 ++ .../conf/02-io_utilization.yaml | 18 ++ .../integration_gcp-firebase/conf/readme.yaml | 5 + .../integration_gcp-firebase/detectors-gen.tf | 108 +++++++++ modules/integration_gcp-firebase/outputs.tf | 15 ++ modules/integration_gcp-firebase/tags.tf | 4 + .../integration_gcp-firebase/variables-gen.tf | 224 ++++++++++++++++++ 14 files changed, 520 insertions(+) create mode 100644 modules/integration_gcp-firebase/README.md create mode 120000 modules/integration_gcp-firebase/common-filters.tf create mode 120000 modules/integration_gcp-firebase/common-locals.tf create mode 120000 modules/integration_gcp-firebase/common-modules.tf create mode 120000 modules/integration_gcp-firebase/common-variables.tf create mode 120000 modules/integration_gcp-firebase/common-versions.tf create mode 100644 modules/integration_gcp-firebase/conf/00-heatbeat.yaml create mode 100644 modules/integration_gcp-firebase/conf/01-database_load.yaml create mode 100644 modules/integration_gcp-firebase/conf/02-io_utilization.yaml create mode 100644 modules/integration_gcp-firebase/conf/readme.yaml create mode 100644 modules/integration_gcp-firebase/detectors-gen.tf create mode 100644 modules/integration_gcp-firebase/outputs.tf create mode 100644 modules/integration_gcp-firebase/tags.tf create mode 100644 modules/integration_gcp-firebase/variables-gen.tf diff --git a/modules/integration_gcp-firebase/README.md b/modules/integration_gcp-firebase/README.md new file mode 100644 index 000000000..feb51e0c9 --- /dev/null +++ b/modules/integration_gcp-firebase/README.md @@ -0,0 +1,111 @@ +# GCP-FIREBASE SignalFx detectors + + + +:link: **Contents** + +- [How to use this module?](#how-to-use-this-module) +- [What are the available detectors in this module?](#what-are-the-available-detectors-in-this-module) +- [How to collect required metrics?](#how-to-collect-required-metrics) + - [Metrics](#metrics) +- [Related documentation](#related-documentation) + + + +## How to use this module? + +This directory defines a [Terraform](https://www.terraform.io/) +[module](https://www.terraform.io/language/modules/syntax) you can use in your +existing [stack](https://github.com/claranet/terraform-signalfx-detectors/wiki/Getting-started#stack) by adding a +`module` configuration and setting its `source` parameter to URL of this folder: + +```hcl +module "signalfx-detectors-integration-gcp-firebase" { + source = "github.com/claranet/terraform-signalfx-detectors.git//modules/integration_gcp-firebase?ref={revision}" + + environment = var.environment + notifications = local.notifications +} +``` + +Note the following parameters: + +* `source`: Use this parameter to specify the URL of the module. The double slash (`//`) is intentional and required. + Terraform uses it to specify subfolders within a Git repo (see [module + sources](https://www.terraform.io/language/modules/sources)). The `ref` parameter specifies a specific Git tag in + this repository. It is recommended to use the latest "pinned" version in place of `{revision}`. Avoid using a branch + like `master` except for testing purpose. Note that every modules in this repository are available on the Terraform + [registry](https://registry.terraform.io/modules/claranet/detectors/signalfx) and we recommend using it as source + instead of `git` which is more flexible but less future-proof. + +* `environment`: Use this parameter to specify the + [environment](https://github.com/claranet/terraform-signalfx-detectors/wiki/Getting-started#environment) used by this + instance of the module. + Its value will be added to the `prefixes` list at the start of the [detector + name](https://github.com/claranet/terraform-signalfx-detectors/wiki/Templating#example). + In general, it will also be used in the `filtering` internal sub-module to [apply + filters](https://github.com/claranet/terraform-signalfx-detectors/wiki/Guidance#filtering) based on our default + [tagging convention](https://github.com/claranet/terraform-signalfx-detectors/wiki/Tagging-convention) by default. + +* `notifications`: Use this parameter to define where alerts should be sent depending on their severity. It consists + of a Terraform [object](https://www.terraform.io/language/expressions/type-constraints#object) where each key represents an available + [detector rule severity](https://docs.splunk.com/observability/alerts-detectors-notifications/create-detectors-for-alerts.html#severity) + and its value is a list of recipients. Every recipients must respect the [detector notification + format](https://registry.terraform.io/providers/splunk-terraform/signalfx/latest/docs/resources/detector#notification-format). + Check the [notification binding](https://github.com/claranet/terraform-signalfx-detectors/wiki/Notifications-binding) + documentation to understand the recommended role of each severity. + +These 3 parameters along with all variables defined in [common-variables.tf](common-variables.tf) are common to all +[modules](../) in this repository. Other variables, specific to this module, are available in +[variables-gen.tf](variables-gen.tf). +In general, the default configuration "works" but all of these Terraform +[variables](https://www.terraform.io/language/values/variables) make it possible to +customize the detectors behavior to better fit your needs. + +Most of them represent usual tips and rules detailed in the +[guidance](https://github.com/claranet/terraform-signalfx-detectors/wiki/Guidance) documentation and listed in the +common [variables](https://github.com/claranet/terraform-signalfx-detectors/wiki/Variables) dedicated documentation. + +Feel free to explore the [wiki](https://github.com/claranet/terraform-signalfx-detectors/wiki) for more information about +general usage of this repository. + +## What are the available detectors in this module? + +This module creates the following SignalFx detectors which could contain one or multiple alerting rules: + +|Detector|Critical|Major|Minor|Warning|Info| +|---|---|---|---|---|---| +|Webcheck heartbeat|X|-|-|-|-| +|GCP Firebase database firebase database load|X|X|-|-|-| +|GCP Firebase database firebase database io utilization|X|X|-|-|-| + +## How to collect required metrics? + +This module deploys detectors using metrics reported by the +[GCP integration](https://docs.splunk.com/observability/en/gdi/get-data-in/connect/gcp/gcp-metrics.html) configurable +with [this Terraform module](https://github.com/claranet/terraform-signalfx-integrations/tree/master/cloud/gcp). + + +Check the [Related documentation](#related-documentation) section for more detailed and specific information about this module dependencies. + + + +### Metrics + + +Here is the list of required metrics for detectors in this module. + +* `io/database_load` +* `io/utilization` +* `network/active_connections` + + + + +## Related documentation + +* [Terraform SignalFx provider](https://registry.terraform.io/providers/splunk-terraform/signalfx/latest/docs) +* [Terraform SignalFx detector](https://registry.terraform.io/providers/splunk-terraform/signalfx/latest/docs/resources/detector) +* [Splunk Observability integrations](https://docs.splunk.com/Observability/gdi/get-data-in/integrations.html) +* [Stackdriver metrics for Firebase](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-firebasedatabase) +* [Splunk Observability metrics](https://docs.splunk.com/observability/en/gdi/get-data-in/connect/gcp/gcp.html) diff --git a/modules/integration_gcp-firebase/common-filters.tf b/modules/integration_gcp-firebase/common-filters.tf new file mode 120000 index 000000000..778f2e38e --- /dev/null +++ b/modules/integration_gcp-firebase/common-filters.tf @@ -0,0 +1 @@ +../../common/module/filters-integration-gcp.tf \ No newline at end of file diff --git a/modules/integration_gcp-firebase/common-locals.tf b/modules/integration_gcp-firebase/common-locals.tf new file mode 120000 index 000000000..5672d21ab --- /dev/null +++ b/modules/integration_gcp-firebase/common-locals.tf @@ -0,0 +1 @@ +../../common/module/locals.tf \ No newline at end of file diff --git a/modules/integration_gcp-firebase/common-modules.tf b/modules/integration_gcp-firebase/common-modules.tf new file mode 120000 index 000000000..8c81ef377 --- /dev/null +++ b/modules/integration_gcp-firebase/common-modules.tf @@ -0,0 +1 @@ +../../common/module/modules.tf \ No newline at end of file diff --git a/modules/integration_gcp-firebase/common-variables.tf b/modules/integration_gcp-firebase/common-variables.tf new file mode 120000 index 000000000..f3037a584 --- /dev/null +++ b/modules/integration_gcp-firebase/common-variables.tf @@ -0,0 +1 @@ +../../common/module/variables.tf \ No newline at end of file diff --git a/modules/integration_gcp-firebase/common-versions.tf b/modules/integration_gcp-firebase/common-versions.tf new file mode 120000 index 000000000..fa7f5509f --- /dev/null +++ b/modules/integration_gcp-firebase/common-versions.tf @@ -0,0 +1 @@ +../../common/module/versions.tf \ No newline at end of file diff --git a/modules/integration_gcp-firebase/conf/00-heatbeat.yaml b/modules/integration_gcp-firebase/conf/00-heatbeat.yaml new file mode 100644 index 000000000..8ed1c73e9 --- /dev/null +++ b/modules/integration_gcp-firebase/conf/00-heatbeat.yaml @@ -0,0 +1,12 @@ +## Example +module: webcheck +name: heartbeat + +transformation: false +aggregation: true + +signals: + signal: + metric: "network/active_connections" +rules: + critical: diff --git a/modules/integration_gcp-firebase/conf/01-database_load.yaml b/modules/integration_gcp-firebase/conf/01-database_load.yaml new file mode 100644 index 000000000..d495de2a9 --- /dev/null +++ b/modules/integration_gcp-firebase/conf/01-database_load.yaml @@ -0,0 +1,18 @@ +module: "GCP Firebase database" +name: "Firebase database load" + +transformation: ".min(over='30m')" + +signals: + signal: + metric: "io/database_load" + +rules: + critical: + threshold: 10 + comparator: ">" + + major: + threshold: 5 + comparator: ">" + dependency: "critical" diff --git a/modules/integration_gcp-firebase/conf/02-io_utilization.yaml b/modules/integration_gcp-firebase/conf/02-io_utilization.yaml new file mode 100644 index 000000000..c28b84fad --- /dev/null +++ b/modules/integration_gcp-firebase/conf/02-io_utilization.yaml @@ -0,0 +1,18 @@ +module: "GCP Firebase database" +name: "Firebase database IO utilization" + +transformation: ".min(over='30m')" + +signals: + signal: + metric: "io/utilization" + +rules: + critical: + threshold: 10 + comparator: ">" + + major: + threshold: 5 + comparator: ">" + dependency: "critical" diff --git a/modules/integration_gcp-firebase/conf/readme.yaml b/modules/integration_gcp-firebase/conf/readme.yaml new file mode 100644 index 000000000..120419ded --- /dev/null +++ b/modules/integration_gcp-firebase/conf/readme.yaml @@ -0,0 +1,5 @@ +documentations: + - name: Stackdriver metrics for Firebase + url: https://cloud.google.com/monitoring/api/metrics_gcp#gcp-firebasedatabase + - name: Splunk Observability metrics + url: https://docs.splunk.com/observability/en/gdi/get-data-in/connect/gcp/gcp.html diff --git a/modules/integration_gcp-firebase/detectors-gen.tf b/modules/integration_gcp-firebase/detectors-gen.tf new file mode 100644 index 000000000..7514035d6 --- /dev/null +++ b/modules/integration_gcp-firebase/detectors-gen.tf @@ -0,0 +1,108 @@ +resource "signalfx_detector" "heartbeat" { + name = format("%s %s", local.detector_name_prefix, "Webcheck heartbeat") + + authorized_writer_teams = var.authorized_writer_teams + teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) + tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) + + program_text = <<-EOF + from signalfx.detectors.not_reporting import not_reporting + signal = data('network/active_connections', filter=${module.filtering.signalflow})${var.heartbeat_aggregation_function}.publish('signal') + not_reporting.detector(stream=signal, resource_identifier=None, duration='${var.heartbeat_timeframe}', auto_resolve_after='${local.heartbeat_auto_resolve_after}').publish('CRIT') +EOF + + rule { + description = "has not reported in ${var.heartbeat_timeframe}" + severity = "Critical" + detect_label = "CRIT" + disabled = coalesce(var.heartbeat_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.heartbeat_notifications, "critical", []), var.notifications.critical), null) + runbook_url = try(coalesce(var.heartbeat_runbook_url, var.runbook_url), "") + tip = var.heartbeat_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject_novalue : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + max_delay = var.heartbeat_max_delay +} + +resource "signalfx_detector" "firebase_database_load" { + name = format("%s %s", local.detector_name_prefix, "GCP Firebase database firebase database load") + + authorized_writer_teams = var.authorized_writer_teams + teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) + tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) + + program_text = <<-EOF + signal = data('io/database_load', filter=${module.filtering.signalflow})${var.firebase_database_load_aggregation_function}${var.firebase_database_load_transformation_function}.publish('signal') + detect(when(signal > ${var.firebase_database_load_threshold_critical}%{if var.firebase_database_load_lasting_duration_critical != null}, lasting='${var.firebase_database_load_lasting_duration_critical}', at_least=${var.firebase_database_load_at_least_percentage_critical}%{endif})).publish('CRIT') + detect(when(signal > ${var.firebase_database_load_threshold_major}%{if var.firebase_database_load_lasting_duration_major != null}, lasting='${var.firebase_database_load_lasting_duration_major}', at_least=${var.firebase_database_load_at_least_percentage_major}%{endif}) and (not when(signal > ${var.firebase_database_load_threshold_critical}%{if var.firebase_database_load_lasting_duration_critical != null}, lasting='${var.firebase_database_load_lasting_duration_critical}', at_least=${var.firebase_database_load_at_least_percentage_critical}%{endif}))).publish('MAJOR') +EOF + + rule { + description = "is too high > ${var.firebase_database_load_threshold_critical}" + severity = "Critical" + detect_label = "CRIT" + disabled = coalesce(var.firebase_database_load_disabled_critical, var.firebase_database_load_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.firebase_database_load_notifications, "critical", []), var.notifications.critical), null) + runbook_url = try(coalesce(var.firebase_database_load_runbook_url, var.runbook_url), "") + tip = var.firebase_database_load_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + rule { + description = "is too high > ${var.firebase_database_load_threshold_major}" + severity = "Major" + detect_label = "MAJOR" + disabled = coalesce(var.firebase_database_load_disabled_major, var.firebase_database_load_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.firebase_database_load_notifications, "major", []), var.notifications.major), null) + runbook_url = try(coalesce(var.firebase_database_load_runbook_url, var.runbook_url), "") + tip = var.firebase_database_load_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + max_delay = var.firebase_database_load_max_delay +} + +resource "signalfx_detector" "firebase_database_io_utilization" { + name = format("%s %s", local.detector_name_prefix, "GCP Firebase database firebase database io utilization") + + authorized_writer_teams = var.authorized_writer_teams + teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) + tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) + + program_text = <<-EOF + signal = data('io/utilization', filter=${module.filtering.signalflow})${var.firebase_database_io_utilization_aggregation_function}${var.firebase_database_io_utilization_transformation_function}.publish('signal') + detect(when(signal > ${var.firebase_database_io_utilization_threshold_critical}%{if var.firebase_database_io_utilization_lasting_duration_critical != null}, lasting='${var.firebase_database_io_utilization_lasting_duration_critical}', at_least=${var.firebase_database_io_utilization_at_least_percentage_critical}%{endif})).publish('CRIT') + detect(when(signal > ${var.firebase_database_io_utilization_threshold_major}%{if var.firebase_database_io_utilization_lasting_duration_major != null}, lasting='${var.firebase_database_io_utilization_lasting_duration_major}', at_least=${var.firebase_database_io_utilization_at_least_percentage_major}%{endif}) and (not when(signal > ${var.firebase_database_io_utilization_threshold_critical}%{if var.firebase_database_io_utilization_lasting_duration_critical != null}, lasting='${var.firebase_database_io_utilization_lasting_duration_critical}', at_least=${var.firebase_database_io_utilization_at_least_percentage_critical}%{endif}))).publish('MAJOR') +EOF + + rule { + description = "is too high > ${var.firebase_database_io_utilization_threshold_critical}" + severity = "Critical" + detect_label = "CRIT" + disabled = coalesce(var.firebase_database_io_utilization_disabled_critical, var.firebase_database_io_utilization_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.firebase_database_io_utilization_notifications, "critical", []), var.notifications.critical), null) + runbook_url = try(coalesce(var.firebase_database_io_utilization_runbook_url, var.runbook_url), "") + tip = var.firebase_database_io_utilization_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + rule { + description = "is too high > ${var.firebase_database_io_utilization_threshold_major}" + severity = "Major" + detect_label = "MAJOR" + disabled = coalesce(var.firebase_database_io_utilization_disabled_major, var.firebase_database_io_utilization_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.firebase_database_io_utilization_notifications, "major", []), var.notifications.major), null) + runbook_url = try(coalesce(var.firebase_database_io_utilization_runbook_url, var.runbook_url), "") + tip = var.firebase_database_io_utilization_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + max_delay = var.firebase_database_io_utilization_max_delay +} + diff --git a/modules/integration_gcp-firebase/outputs.tf b/modules/integration_gcp-firebase/outputs.tf new file mode 100644 index 000000000..7ecef6cb6 --- /dev/null +++ b/modules/integration_gcp-firebase/outputs.tf @@ -0,0 +1,15 @@ +output "firebase_database_io_utilization" { + description = "Detector resource for firebase_database_io_utilization" + value = signalfx_detector.firebase_database_io_utilization +} + +output "firebase_database_load" { + description = "Detector resource for firebase_database_load" + value = signalfx_detector.firebase_database_load +} + +output "heartbeat" { + description = "Detector resource for heartbeat" + value = signalfx_detector.heartbeat +} + diff --git a/modules/integration_gcp-firebase/tags.tf b/modules/integration_gcp-firebase/tags.tf new file mode 100644 index 000000000..732e12ed7 --- /dev/null +++ b/modules/integration_gcp-firebase/tags.tf @@ -0,0 +1,4 @@ +locals { + tags = ["integration", "gcp-firebase"] +} + diff --git a/modules/integration_gcp-firebase/variables-gen.tf b/modules/integration_gcp-firebase/variables-gen.tf new file mode 100644 index 000000000..b9b5cef28 --- /dev/null +++ b/modules/integration_gcp-firebase/variables-gen.tf @@ -0,0 +1,224 @@ +# heartbeat detector + +variable "heartbeat_notifications" { + description = "Notification recipients list per severity overridden for heartbeat detector" + type = map(list(string)) + default = {} +} + +variable "heartbeat_aggregation_function" { + description = "Aggregation function and group by for heartbeat detector (i.e. \".mean(by=['host'])\")" + type = string + default = "" +} + +variable "heartbeat_max_delay" { + description = "Enforce max delay for heartbeat detector (use \"0\" or \"null\" for \"Auto\")" + type = number + default = null +} + +variable "heartbeat_tip" { + description = "Suggested first course of action or any note useful for incident handling" + type = string + default = "" +} + +variable "heartbeat_runbook_url" { + description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" + type = string + default = "" +} + +variable "heartbeat_disabled" { + description = "Disable all alerting rules for heartbeat detector" + type = bool + default = null +} + +variable "heartbeat_timeframe" { + description = "Timeframe for heartbeat detector (i.e. \"25m\")." + type = string + default = "25m" +} + +# firebase_database_load detector + +variable "firebase_database_load_notifications" { + description = "Notification recipients list per severity overridden for firebase_database_load detector" + type = map(list(string)) + default = {} +} + +variable "firebase_database_load_aggregation_function" { + description = "Aggregation function and group by for firebase_database_load detector (i.e. \".mean(by=['host'])\")" + type = string + default = "" +} + +variable "firebase_database_load_transformation_function" { + description = "Transformation function for firebase_database_load detector (i.e. \".mean(over='5m')\")" + type = string + default = ".min(over='30m')" +} + +variable "firebase_database_load_max_delay" { + description = "Enforce max delay for firebase_database_load detector (use \"0\" or \"null\" for \"Auto\")" + type = number + default = null +} + +variable "firebase_database_load_tip" { + description = "Suggested first course of action or any note useful for incident handling" + type = string + default = "" +} + +variable "firebase_database_load_runbook_url" { + description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" + type = string + default = "" +} + +variable "firebase_database_load_disabled" { + description = "Disable all alerting rules for firebase_database_load detector" + type = bool + default = null +} + +variable "firebase_database_load_disabled_critical" { + description = "Disable critical alerting rule for firebase_database_load detector" + type = bool + default = null +} + +variable "firebase_database_load_disabled_major" { + description = "Disable major alerting rule for firebase_database_load detector" + type = bool + default = null +} + +variable "firebase_database_load_threshold_critical" { + description = "Critical threshold for firebase_database_load detector" + type = number + default = 10 +} + +variable "firebase_database_load_lasting_duration_critical" { + description = "Minimum duration that conditions must be true before raising alert" + type = string + default = null +} + +variable "firebase_database_load_at_least_percentage_critical" { + description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" + type = number + default = 1 +} +variable "firebase_database_load_threshold_major" { + description = "Major threshold for firebase_database_load detector" + type = number + default = 5 +} + +variable "firebase_database_load_lasting_duration_major" { + description = "Minimum duration that conditions must be true before raising alert" + type = string + default = null +} + +variable "firebase_database_load_at_least_percentage_major" { + description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" + type = number + default = 1 +} +# firebase_database_io_utilization detector + +variable "firebase_database_io_utilization_notifications" { + description = "Notification recipients list per severity overridden for firebase_database_io_utilization detector" + type = map(list(string)) + default = {} +} + +variable "firebase_database_io_utilization_aggregation_function" { + description = "Aggregation function and group by for firebase_database_io_utilization detector (i.e. \".mean(by=['host'])\")" + type = string + default = "" +} + +variable "firebase_database_io_utilization_transformation_function" { + description = "Transformation function for firebase_database_io_utilization detector (i.e. \".mean(over='5m')\")" + type = string + default = ".min(over='30m')" +} + +variable "firebase_database_io_utilization_max_delay" { + description = "Enforce max delay for firebase_database_io_utilization detector (use \"0\" or \"null\" for \"Auto\")" + type = number + default = null +} + +variable "firebase_database_io_utilization_tip" { + description = "Suggested first course of action or any note useful for incident handling" + type = string + default = "" +} + +variable "firebase_database_io_utilization_runbook_url" { + description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" + type = string + default = "" +} + +variable "firebase_database_io_utilization_disabled" { + description = "Disable all alerting rules for firebase_database_io_utilization detector" + type = bool + default = null +} + +variable "firebase_database_io_utilization_disabled_critical" { + description = "Disable critical alerting rule for firebase_database_io_utilization detector" + type = bool + default = null +} + +variable "firebase_database_io_utilization_disabled_major" { + description = "Disable major alerting rule for firebase_database_io_utilization detector" + type = bool + default = null +} + +variable "firebase_database_io_utilization_threshold_critical" { + description = "Critical threshold for firebase_database_io_utilization detector" + type = number + default = 10 +} + +variable "firebase_database_io_utilization_lasting_duration_critical" { + description = "Minimum duration that conditions must be true before raising alert" + type = string + default = null +} + +variable "firebase_database_io_utilization_at_least_percentage_critical" { + description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" + type = number + default = 1 +} +variable "firebase_database_io_utilization_threshold_major" { + description = "Major threshold for firebase_database_io_utilization detector" + type = number + default = 5 +} + +variable "firebase_database_io_utilization_lasting_duration_major" { + description = "Minimum duration that conditions must be true before raising alert" + type = string + default = null +} + +variable "firebase_database_io_utilization_at_least_percentage_major" { + description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" + type = number + default = 1 +} From 6b0305cddbdfe40aac7328ffbc59bbc1545402cf Mon Sep 17 00:00:00 2001 From: nhat do Date: Thu, 12 Dec 2024 17:35:50 +0100 Subject: [PATCH 03/13] fix conf detector --- modules/integration_gcp-firebase/README.md | 7 ++++--- modules/integration_gcp-firebase/common-filters.tf | 1 - modules/integration_gcp-firebase/filters.tf | 3 +++ modules/integration_gcp-firebase/variables.tf | 4 ++++ 4 files changed, 11 insertions(+), 4 deletions(-) delete mode 120000 modules/integration_gcp-firebase/common-filters.tf create mode 100644 modules/integration_gcp-firebase/filters.tf create mode 100644 modules/integration_gcp-firebase/variables.tf diff --git a/modules/integration_gcp-firebase/README.md b/modules/integration_gcp-firebase/README.md index feb51e0c9..18e3aacd9 100644 --- a/modules/integration_gcp-firebase/README.md +++ b/modules/integration_gcp-firebase/README.md @@ -23,8 +23,9 @@ existing [stack](https://github.com/claranet/terraform-signalfx-detectors/wiki/G module "signalfx-detectors-integration-gcp-firebase" { source = "github.com/claranet/terraform-signalfx-detectors.git//modules/integration_gcp-firebase?ref={revision}" - environment = var.environment - notifications = local.notifications + environment = var.environment + notifications = local.notifications + gcp_project_id = "fillme" } ``` @@ -57,7 +58,7 @@ Note the following parameters: These 3 parameters along with all variables defined in [common-variables.tf](common-variables.tf) are common to all [modules](../) in this repository. Other variables, specific to this module, are available in -[variables-gen.tf](variables-gen.tf). +[variables.tf](variables.tf) and [variables-gen.tf](variables-gen.tf). In general, the default configuration "works" but all of these Terraform [variables](https://www.terraform.io/language/values/variables) make it possible to customize the detectors behavior to better fit your needs. diff --git a/modules/integration_gcp-firebase/common-filters.tf b/modules/integration_gcp-firebase/common-filters.tf deleted file mode 120000 index 778f2e38e..000000000 --- a/modules/integration_gcp-firebase/common-filters.tf +++ /dev/null @@ -1 +0,0 @@ -../../common/module/filters-integration-gcp.tf \ No newline at end of file diff --git a/modules/integration_gcp-firebase/filters.tf b/modules/integration_gcp-firebase/filters.tf new file mode 100644 index 000000000..f396ead7e --- /dev/null +++ b/modules/integration_gcp-firebase/filters.tf @@ -0,0 +1,3 @@ +locals { + filters = "filter('project_id', '${var.gcp_project_id}')" +} diff --git a/modules/integration_gcp-firebase/variables.tf b/modules/integration_gcp-firebase/variables.tf new file mode 100644 index 000000000..901d3ad46 --- /dev/null +++ b/modules/integration_gcp-firebase/variables.tf @@ -0,0 +1,4 @@ +variable "gcp_project_id" { + description = "GCP project id used for default filtering while lables are not synced" + type = string +} From 1a9ec698712b90148a6f7f05537655edd12995bb Mon Sep 17 00:00:00 2001 From: nhat do Date: Mon, 16 Dec 2024 11:18:19 +0100 Subject: [PATCH 04/13] update read me for detector firebase database --- modules/integration_gcp-firebase/README.md | 57 +++++++++++++++++++ .../integration_gcp-firebase/conf/readme.yaml | 52 +++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/modules/integration_gcp-firebase/README.md b/modules/integration_gcp-firebase/README.md index 18e3aacd9..25d28362f 100644 --- a/modules/integration_gcp-firebase/README.md +++ b/modules/integration_gcp-firebase/README.md @@ -8,6 +8,10 @@ - [What are the available detectors in this module?](#what-are-the-available-detectors-in-this-module) - [How to collect required metrics?](#how-to-collect-required-metrics) - [Metrics](#metrics) +- [Notes](#notes) + - [Metadata configuration for default filtering](#metadata-configuration-for-default-filtering) + - [Database load](#database-load) + - [Database IO](#database-io) - [Related documentation](#related-documentation) @@ -101,6 +105,59 @@ Here is the list of required metrics for detectors in this module. * `network/active_connections` +## Notes + + +### Metadata configuration for default filtering + +label to use : + +sfx_env = true +sfx_monitored = true + +### Database load + +Monitoring the CPU utilization helps in understanding the system's capability and efficiency. + +```hcl +module "signalfx-detectors-integration_gcp-cloud-run" { + source = "github.com/claranet/terraform-signalfx-detectors.git//modules/integration_gcp-firebase" + + environment = var.environment + gcp_project_id = var.project_id + notifications = local.notifications + + # We keep default filtering policy here, we just want to append additional filter to it + filtering_append = true + # We define the additional filter + filtering_custom = "filter('service_name', '*service-name*')" + # We can configure the thresholds of the probes + firebase_database_load_threshold_critical = 5 + firebase_database_load_threshold_major = 3 +} +``` + +### Database IO + +Monitoring the IO of the database helps in understanding the system's capability and efficiency. + +```hcl +module "signalfx-detectors-integration_gcp-cloud-run" { + source = "github.com/claranet/terraform-signalfx-detectors.git//modules/integration_gcp-firebase" + + environment = var.environment + gcp_project_id = var.project_id + notifications = local.notifications + + # We keep default filtering policy here, we just want to append additional filter to it + filtering_append = true + # We define the additional filter + filtering_custom = "filter('service_name', '*service-name*')" + # We can configure the thresholds of the probes + firebase_database_io_utilization_threshold_critical = 5 + firebase_database_io_utilization_threshold_major = 3 +} +``` ## Related documentation diff --git a/modules/integration_gcp-firebase/conf/readme.yaml b/modules/integration_gcp-firebase/conf/readme.yaml index 120419ded..e1a6088d8 100644 --- a/modules/integration_gcp-firebase/conf/readme.yaml +++ b/modules/integration_gcp-firebase/conf/readme.yaml @@ -3,3 +3,55 @@ documentations: url: https://cloud.google.com/monitoring/api/metrics_gcp#gcp-firebasedatabase - name: Splunk Observability metrics url: https://docs.splunk.com/observability/en/gdi/get-data-in/connect/gcp/gcp.html +notes: | + + ### Metadata configuration for default filtering + + label to use : + + sfx_env = true + sfx_monitored = true + + ### Database load + + Monitoring the CPU utilization helps in understanding the system's capability and efficiency. + + ```hcl + module "signalfx-detectors-integration_gcp-cloud-run" { + source = "github.com/claranet/terraform-signalfx-detectors.git//modules/integration_gcp-firebase" + + environment = var.environment + gcp_project_id = var.project_id + notifications = local.notifications + + # We keep default filtering policy here, we just want to append additional filter to it + filtering_append = true + # We define the additional filter + filtering_custom = "filter('service_name', '*service-name*')" + # We can configure the thresholds of the probes + firebase_database_load_threshold_critical = 5 + firebase_database_load_threshold_major = 3 + } + ``` + + ### Database IO + + Monitoring the IO of the database helps in understanding the system's capability and efficiency. + + ```hcl + module "signalfx-detectors-integration_gcp-cloud-run" { + source = "github.com/claranet/terraform-signalfx-detectors.git//modules/integration_gcp-firebase" + + environment = var.environment + gcp_project_id = var.project_id + notifications = local.notifications + + # We keep default filtering policy here, we just want to append additional filter to it + filtering_append = true + # We define the additional filter + filtering_custom = "filter('service_name', '*service-name*')" + # We can configure the thresholds of the probes + firebase_database_io_utilization_threshold_critical = 5 + firebase_database_io_utilization_threshold_major = 3 + } + ``` From 385eb080099d43be67885e685eb1f72f03287d7c Mon Sep 17 00:00:00 2001 From: nhat do Date: Mon, 16 Dec 2024 11:21:05 +0100 Subject: [PATCH 05/13] fix typo --- docs/severity.md | 2 +- modules/integration_gcp-firebase/README.md | 2 +- modules/integration_gcp-firebase/conf/00-heatbeat.yaml | 2 +- modules/integration_gcp-firebase/detectors-gen.tf | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/severity.md b/docs/severity.md index deaa43417..7d62a469d 100644 --- a/docs/severity.md +++ b/docs/severity.md @@ -810,7 +810,7 @@ |Detector|Critical|Major|Minor|Warning|Info| |---|---|---|---|---|---| -|Webcheck heartbeat|X|-|-|-|-| +|GCP Firebase database heartbeat|X|-|-|-|-| |GCP Firebase database firebase database load|X|X|-|-|-| |GCP Firebase database firebase database io utilization|X|X|-|-|-| diff --git a/modules/integration_gcp-firebase/README.md b/modules/integration_gcp-firebase/README.md index 25d28362f..eb83e9a90 100644 --- a/modules/integration_gcp-firebase/README.md +++ b/modules/integration_gcp-firebase/README.md @@ -80,7 +80,7 @@ This module creates the following SignalFx detectors which could contain one or |Detector|Critical|Major|Minor|Warning|Info| |---|---|---|---|---|---| -|Webcheck heartbeat|X|-|-|-|-| +|GCP Firebase database heartbeat|X|-|-|-|-| |GCP Firebase database firebase database load|X|X|-|-|-| |GCP Firebase database firebase database io utilization|X|X|-|-|-| diff --git a/modules/integration_gcp-firebase/conf/00-heatbeat.yaml b/modules/integration_gcp-firebase/conf/00-heatbeat.yaml index 8ed1c73e9..64140a1f0 100644 --- a/modules/integration_gcp-firebase/conf/00-heatbeat.yaml +++ b/modules/integration_gcp-firebase/conf/00-heatbeat.yaml @@ -1,5 +1,5 @@ ## Example -module: webcheck +module: GCP Firebase database name: heartbeat transformation: false diff --git a/modules/integration_gcp-firebase/detectors-gen.tf b/modules/integration_gcp-firebase/detectors-gen.tf index 7514035d6..87c112c35 100644 --- a/modules/integration_gcp-firebase/detectors-gen.tf +++ b/modules/integration_gcp-firebase/detectors-gen.tf @@ -1,5 +1,5 @@ resource "signalfx_detector" "heartbeat" { - name = format("%s %s", local.detector_name_prefix, "Webcheck heartbeat") + name = format("%s %s", local.detector_name_prefix, "GCP Firebase database heartbeat") authorized_writer_teams = var.authorized_writer_teams teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) From 1e69de1d7e8e5c81e48b77d698db26f4dc4fdf59 Mon Sep 17 00:00:00 2001 From: ndo77 Date: Mon, 16 Dec 2024 11:22:05 +0100 Subject: [PATCH 06/13] Update modules/integration_gcp-firebase/conf/00-heatbeat.yaml Co-authored-by: Jean-Baptiste Simillon --- modules/integration_gcp-firebase/conf/00-heatbeat.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/integration_gcp-firebase/conf/00-heatbeat.yaml b/modules/integration_gcp-firebase/conf/00-heatbeat.yaml index 64140a1f0..5d5f8b3b1 100644 --- a/modules/integration_gcp-firebase/conf/00-heatbeat.yaml +++ b/modules/integration_gcp-firebase/conf/00-heatbeat.yaml @@ -1,4 +1,3 @@ -## Example module: GCP Firebase database name: heartbeat From a79b8d84de3183b32f7bccfe664514e0e56e3b48 Mon Sep 17 00:00:00 2001 From: nhat do Date: Mon, 16 Dec 2024 11:28:08 +0100 Subject: [PATCH 07/13] fix configuration of detectors --- modules/integration_gcp-firebase/README.md | 4 +- .../conf/00-heatbeat.yaml | 1 + .../conf/01-database_load.yaml | 6 +- .../conf/02-io_utilization.yaml | 6 +- .../integration_gcp-firebase/detectors-gen.tf | 64 +++++----- modules/integration_gcp-firebase/outputs.tf | 18 +-- .../integration_gcp-firebase/variables-gen.tf | 112 +++++++++--------- 7 files changed, 108 insertions(+), 103 deletions(-) diff --git a/modules/integration_gcp-firebase/README.md b/modules/integration_gcp-firebase/README.md index eb83e9a90..5be9153bf 100644 --- a/modules/integration_gcp-firebase/README.md +++ b/modules/integration_gcp-firebase/README.md @@ -81,8 +81,8 @@ This module creates the following SignalFx detectors which could contain one or |Detector|Critical|Major|Minor|Warning|Info| |---|---|---|---|---|---| |GCP Firebase database heartbeat|X|-|-|-|-| -|GCP Firebase database firebase database load|X|X|-|-|-| -|GCP Firebase database firebase database io utilization|X|X|-|-|-| +|GCP Firebase database load|X|X|-|-|-| +|GCP Firebase database io utilization|X|X|-|-|-| ## How to collect required metrics? diff --git a/modules/integration_gcp-firebase/conf/00-heatbeat.yaml b/modules/integration_gcp-firebase/conf/00-heatbeat.yaml index 5d5f8b3b1..64140a1f0 100644 --- a/modules/integration_gcp-firebase/conf/00-heatbeat.yaml +++ b/modules/integration_gcp-firebase/conf/00-heatbeat.yaml @@ -1,3 +1,4 @@ +## Example module: GCP Firebase database name: heartbeat diff --git a/modules/integration_gcp-firebase/conf/01-database_load.yaml b/modules/integration_gcp-firebase/conf/01-database_load.yaml index d495de2a9..4ad826790 100644 --- a/modules/integration_gcp-firebase/conf/01-database_load.yaml +++ b/modules/integration_gcp-firebase/conf/01-database_load.yaml @@ -1,7 +1,7 @@ module: "GCP Firebase database" -name: "Firebase database load" +name: "Load" -transformation: ".min(over='30m')" +transformation: true signals: signal: @@ -11,8 +11,10 @@ rules: critical: threshold: 10 comparator: ">" + lasting_duration: '30m' major: threshold: 5 comparator: ">" + lasting_duration: '30m' dependency: "critical" diff --git a/modules/integration_gcp-firebase/conf/02-io_utilization.yaml b/modules/integration_gcp-firebase/conf/02-io_utilization.yaml index c28b84fad..5c0cadfa9 100644 --- a/modules/integration_gcp-firebase/conf/02-io_utilization.yaml +++ b/modules/integration_gcp-firebase/conf/02-io_utilization.yaml @@ -1,7 +1,7 @@ module: "GCP Firebase database" -name: "Firebase database IO utilization" +name: "IO utilization" -transformation: ".min(over='30m')" +transformation: true signals: signal: @@ -11,8 +11,10 @@ rules: critical: threshold: 10 comparator: ">" + lasting_duration: '30m' major: threshold: 5 comparator: ">" + lasting_duration: '30m' dependency: "critical" diff --git a/modules/integration_gcp-firebase/detectors-gen.tf b/modules/integration_gcp-firebase/detectors-gen.tf index 87c112c35..42f603e3a 100644 --- a/modules/integration_gcp-firebase/detectors-gen.tf +++ b/modules/integration_gcp-firebase/detectors-gen.tf @@ -26,83 +26,83 @@ EOF max_delay = var.heartbeat_max_delay } -resource "signalfx_detector" "firebase_database_load" { - name = format("%s %s", local.detector_name_prefix, "GCP Firebase database firebase database load") +resource "signalfx_detector" "load" { + name = format("%s %s", local.detector_name_prefix, "GCP Firebase database load") authorized_writer_teams = var.authorized_writer_teams teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) program_text = <<-EOF - signal = data('io/database_load', filter=${module.filtering.signalflow})${var.firebase_database_load_aggregation_function}${var.firebase_database_load_transformation_function}.publish('signal') - detect(when(signal > ${var.firebase_database_load_threshold_critical}%{if var.firebase_database_load_lasting_duration_critical != null}, lasting='${var.firebase_database_load_lasting_duration_critical}', at_least=${var.firebase_database_load_at_least_percentage_critical}%{endif})).publish('CRIT') - detect(when(signal > ${var.firebase_database_load_threshold_major}%{if var.firebase_database_load_lasting_duration_major != null}, lasting='${var.firebase_database_load_lasting_duration_major}', at_least=${var.firebase_database_load_at_least_percentage_major}%{endif}) and (not when(signal > ${var.firebase_database_load_threshold_critical}%{if var.firebase_database_load_lasting_duration_critical != null}, lasting='${var.firebase_database_load_lasting_duration_critical}', at_least=${var.firebase_database_load_at_least_percentage_critical}%{endif}))).publish('MAJOR') + signal = data('io/database_load', filter=${module.filtering.signalflow})${var.load_aggregation_function}${var.load_transformation_function}.publish('signal') + detect(when(signal > ${var.load_threshold_critical}%{if var.load_lasting_duration_critical != null}, lasting='${var.load_lasting_duration_critical}', at_least=${var.load_at_least_percentage_critical}%{endif})).publish('CRIT') + detect(when(signal > ${var.load_threshold_major}%{if var.load_lasting_duration_major != null}, lasting='${var.load_lasting_duration_major}', at_least=${var.load_at_least_percentage_major}%{endif}) and (not when(signal > ${var.load_threshold_critical}%{if var.load_lasting_duration_critical != null}, lasting='${var.load_lasting_duration_critical}', at_least=${var.load_at_least_percentage_critical}%{endif}))).publish('MAJOR') EOF rule { - description = "is too high > ${var.firebase_database_load_threshold_critical}" + description = "is too high > ${var.load_threshold_critical}" severity = "Critical" detect_label = "CRIT" - disabled = coalesce(var.firebase_database_load_disabled_critical, var.firebase_database_load_disabled, var.detectors_disabled) - notifications = try(coalescelist(lookup(var.firebase_database_load_notifications, "critical", []), var.notifications.critical), null) - runbook_url = try(coalesce(var.firebase_database_load_runbook_url, var.runbook_url), "") - tip = var.firebase_database_load_tip + disabled = coalesce(var.load_disabled_critical, var.load_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.load_notifications, "critical", []), var.notifications.critical), null) + runbook_url = try(coalesce(var.load_runbook_url, var.runbook_url), "") + tip = var.load_tip parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject parameterized_body = var.message_body == "" ? local.rule_body : var.message_body } rule { - description = "is too high > ${var.firebase_database_load_threshold_major}" + description = "is too high > ${var.load_threshold_major}" severity = "Major" detect_label = "MAJOR" - disabled = coalesce(var.firebase_database_load_disabled_major, var.firebase_database_load_disabled, var.detectors_disabled) - notifications = try(coalescelist(lookup(var.firebase_database_load_notifications, "major", []), var.notifications.major), null) - runbook_url = try(coalesce(var.firebase_database_load_runbook_url, var.runbook_url), "") - tip = var.firebase_database_load_tip + disabled = coalesce(var.load_disabled_major, var.load_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.load_notifications, "major", []), var.notifications.major), null) + runbook_url = try(coalesce(var.load_runbook_url, var.runbook_url), "") + tip = var.load_tip parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject parameterized_body = var.message_body == "" ? local.rule_body : var.message_body } - max_delay = var.firebase_database_load_max_delay + max_delay = var.load_max_delay } -resource "signalfx_detector" "firebase_database_io_utilization" { - name = format("%s %s", local.detector_name_prefix, "GCP Firebase database firebase database io utilization") +resource "signalfx_detector" "io_utilization" { + name = format("%s %s", local.detector_name_prefix, "GCP Firebase database io utilization") authorized_writer_teams = var.authorized_writer_teams teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) program_text = <<-EOF - signal = data('io/utilization', filter=${module.filtering.signalflow})${var.firebase_database_io_utilization_aggregation_function}${var.firebase_database_io_utilization_transformation_function}.publish('signal') - detect(when(signal > ${var.firebase_database_io_utilization_threshold_critical}%{if var.firebase_database_io_utilization_lasting_duration_critical != null}, lasting='${var.firebase_database_io_utilization_lasting_duration_critical}', at_least=${var.firebase_database_io_utilization_at_least_percentage_critical}%{endif})).publish('CRIT') - detect(when(signal > ${var.firebase_database_io_utilization_threshold_major}%{if var.firebase_database_io_utilization_lasting_duration_major != null}, lasting='${var.firebase_database_io_utilization_lasting_duration_major}', at_least=${var.firebase_database_io_utilization_at_least_percentage_major}%{endif}) and (not when(signal > ${var.firebase_database_io_utilization_threshold_critical}%{if var.firebase_database_io_utilization_lasting_duration_critical != null}, lasting='${var.firebase_database_io_utilization_lasting_duration_critical}', at_least=${var.firebase_database_io_utilization_at_least_percentage_critical}%{endif}))).publish('MAJOR') + signal = data('io/utilization', filter=${module.filtering.signalflow})${var.io_utilization_aggregation_function}${var.io_utilization_transformation_function}.publish('signal') + detect(when(signal > ${var.io_utilization_threshold_critical}%{if var.io_utilization_lasting_duration_critical != null}, lasting='${var.io_utilization_lasting_duration_critical}', at_least=${var.io_utilization_at_least_percentage_critical}%{endif})).publish('CRIT') + detect(when(signal > ${var.io_utilization_threshold_major}%{if var.io_utilization_lasting_duration_major != null}, lasting='${var.io_utilization_lasting_duration_major}', at_least=${var.io_utilization_at_least_percentage_major}%{endif}) and (not when(signal > ${var.io_utilization_threshold_critical}%{if var.io_utilization_lasting_duration_critical != null}, lasting='${var.io_utilization_lasting_duration_critical}', at_least=${var.io_utilization_at_least_percentage_critical}%{endif}))).publish('MAJOR') EOF rule { - description = "is too high > ${var.firebase_database_io_utilization_threshold_critical}" + description = "is too high > ${var.io_utilization_threshold_critical}" severity = "Critical" detect_label = "CRIT" - disabled = coalesce(var.firebase_database_io_utilization_disabled_critical, var.firebase_database_io_utilization_disabled, var.detectors_disabled) - notifications = try(coalescelist(lookup(var.firebase_database_io_utilization_notifications, "critical", []), var.notifications.critical), null) - runbook_url = try(coalesce(var.firebase_database_io_utilization_runbook_url, var.runbook_url), "") - tip = var.firebase_database_io_utilization_tip + disabled = coalesce(var.io_utilization_disabled_critical, var.io_utilization_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.io_utilization_notifications, "critical", []), var.notifications.critical), null) + runbook_url = try(coalesce(var.io_utilization_runbook_url, var.runbook_url), "") + tip = var.io_utilization_tip parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject parameterized_body = var.message_body == "" ? local.rule_body : var.message_body } rule { - description = "is too high > ${var.firebase_database_io_utilization_threshold_major}" + description = "is too high > ${var.io_utilization_threshold_major}" severity = "Major" detect_label = "MAJOR" - disabled = coalesce(var.firebase_database_io_utilization_disabled_major, var.firebase_database_io_utilization_disabled, var.detectors_disabled) - notifications = try(coalescelist(lookup(var.firebase_database_io_utilization_notifications, "major", []), var.notifications.major), null) - runbook_url = try(coalesce(var.firebase_database_io_utilization_runbook_url, var.runbook_url), "") - tip = var.firebase_database_io_utilization_tip + disabled = coalesce(var.io_utilization_disabled_major, var.io_utilization_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.io_utilization_notifications, "major", []), var.notifications.major), null) + runbook_url = try(coalesce(var.io_utilization_runbook_url, var.runbook_url), "") + tip = var.io_utilization_tip parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject parameterized_body = var.message_body == "" ? local.rule_body : var.message_body } - max_delay = var.firebase_database_io_utilization_max_delay + max_delay = var.io_utilization_max_delay } diff --git a/modules/integration_gcp-firebase/outputs.tf b/modules/integration_gcp-firebase/outputs.tf index 7ecef6cb6..e7b7ca70b 100644 --- a/modules/integration_gcp-firebase/outputs.tf +++ b/modules/integration_gcp-firebase/outputs.tf @@ -1,15 +1,15 @@ -output "firebase_database_io_utilization" { - description = "Detector resource for firebase_database_io_utilization" - value = signalfx_detector.firebase_database_io_utilization +output "heartbeat" { + description = "Detector resource for heartbeat" + value = signalfx_detector.heartbeat } -output "firebase_database_load" { - description = "Detector resource for firebase_database_load" - value = signalfx_detector.firebase_database_load +output "io_utilization" { + description = "Detector resource for io_utilization" + value = signalfx_detector.io_utilization } -output "heartbeat" { - description = "Detector resource for heartbeat" - value = signalfx_detector.heartbeat +output "load" { + description = "Detector resource for load" + value = signalfx_detector.load } diff --git a/modules/integration_gcp-firebase/variables-gen.tf b/modules/integration_gcp-firebase/variables-gen.tf index b9b5cef28..2c88e9e28 100644 --- a/modules/integration_gcp-firebase/variables-gen.tf +++ b/modules/integration_gcp-firebase/variables-gen.tf @@ -42,182 +42,182 @@ variable "heartbeat_timeframe" { default = "25m" } -# firebase_database_load detector +# load detector -variable "firebase_database_load_notifications" { - description = "Notification recipients list per severity overridden for firebase_database_load detector" +variable "load_notifications" { + description = "Notification recipients list per severity overridden for load detector" type = map(list(string)) default = {} } -variable "firebase_database_load_aggregation_function" { - description = "Aggregation function and group by for firebase_database_load detector (i.e. \".mean(by=['host'])\")" +variable "load_aggregation_function" { + description = "Aggregation function and group by for load detector (i.e. \".mean(by=['host'])\")" type = string default = "" } -variable "firebase_database_load_transformation_function" { - description = "Transformation function for firebase_database_load detector (i.e. \".mean(over='5m')\")" +variable "load_transformation_function" { + description = "Transformation function for load detector (i.e. \".mean(over='5m')\")" type = string - default = ".min(over='30m')" + default = "" } -variable "firebase_database_load_max_delay" { - description = "Enforce max delay for firebase_database_load detector (use \"0\" or \"null\" for \"Auto\")" +variable "load_max_delay" { + description = "Enforce max delay for load detector (use \"0\" or \"null\" for \"Auto\")" type = number default = null } -variable "firebase_database_load_tip" { +variable "load_tip" { description = "Suggested first course of action or any note useful for incident handling" type = string default = "" } -variable "firebase_database_load_runbook_url" { +variable "load_runbook_url" { description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" type = string default = "" } -variable "firebase_database_load_disabled" { - description = "Disable all alerting rules for firebase_database_load detector" +variable "load_disabled" { + description = "Disable all alerting rules for load detector" type = bool default = null } -variable "firebase_database_load_disabled_critical" { - description = "Disable critical alerting rule for firebase_database_load detector" +variable "load_disabled_critical" { + description = "Disable critical alerting rule for load detector" type = bool default = null } -variable "firebase_database_load_disabled_major" { - description = "Disable major alerting rule for firebase_database_load detector" +variable "load_disabled_major" { + description = "Disable major alerting rule for load detector" type = bool default = null } -variable "firebase_database_load_threshold_critical" { - description = "Critical threshold for firebase_database_load detector" +variable "load_threshold_critical" { + description = "Critical threshold for load detector" type = number default = 10 } -variable "firebase_database_load_lasting_duration_critical" { +variable "load_lasting_duration_critical" { description = "Minimum duration that conditions must be true before raising alert" type = string - default = null + default = "30m" } -variable "firebase_database_load_at_least_percentage_critical" { +variable "load_at_least_percentage_critical" { description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" type = number default = 1 } -variable "firebase_database_load_threshold_major" { - description = "Major threshold for firebase_database_load detector" +variable "load_threshold_major" { + description = "Major threshold for load detector" type = number default = 5 } -variable "firebase_database_load_lasting_duration_major" { +variable "load_lasting_duration_major" { description = "Minimum duration that conditions must be true before raising alert" type = string - default = null + default = "30m" } -variable "firebase_database_load_at_least_percentage_major" { +variable "load_at_least_percentage_major" { description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" type = number default = 1 } -# firebase_database_io_utilization detector +# io_utilization detector -variable "firebase_database_io_utilization_notifications" { - description = "Notification recipients list per severity overridden for firebase_database_io_utilization detector" +variable "io_utilization_notifications" { + description = "Notification recipients list per severity overridden for io_utilization detector" type = map(list(string)) default = {} } -variable "firebase_database_io_utilization_aggregation_function" { - description = "Aggregation function and group by for firebase_database_io_utilization detector (i.e. \".mean(by=['host'])\")" +variable "io_utilization_aggregation_function" { + description = "Aggregation function and group by for io_utilization detector (i.e. \".mean(by=['host'])\")" type = string default = "" } -variable "firebase_database_io_utilization_transformation_function" { - description = "Transformation function for firebase_database_io_utilization detector (i.e. \".mean(over='5m')\")" +variable "io_utilization_transformation_function" { + description = "Transformation function for io_utilization detector (i.e. \".mean(over='5m')\")" type = string - default = ".min(over='30m')" + default = "" } -variable "firebase_database_io_utilization_max_delay" { - description = "Enforce max delay for firebase_database_io_utilization detector (use \"0\" or \"null\" for \"Auto\")" +variable "io_utilization_max_delay" { + description = "Enforce max delay for io_utilization detector (use \"0\" or \"null\" for \"Auto\")" type = number default = null } -variable "firebase_database_io_utilization_tip" { +variable "io_utilization_tip" { description = "Suggested first course of action or any note useful for incident handling" type = string default = "" } -variable "firebase_database_io_utilization_runbook_url" { +variable "io_utilization_runbook_url" { description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" type = string default = "" } -variable "firebase_database_io_utilization_disabled" { - description = "Disable all alerting rules for firebase_database_io_utilization detector" +variable "io_utilization_disabled" { + description = "Disable all alerting rules for io_utilization detector" type = bool default = null } -variable "firebase_database_io_utilization_disabled_critical" { - description = "Disable critical alerting rule for firebase_database_io_utilization detector" +variable "io_utilization_disabled_critical" { + description = "Disable critical alerting rule for io_utilization detector" type = bool default = null } -variable "firebase_database_io_utilization_disabled_major" { - description = "Disable major alerting rule for firebase_database_io_utilization detector" +variable "io_utilization_disabled_major" { + description = "Disable major alerting rule for io_utilization detector" type = bool default = null } -variable "firebase_database_io_utilization_threshold_critical" { - description = "Critical threshold for firebase_database_io_utilization detector" +variable "io_utilization_threshold_critical" { + description = "Critical threshold for io_utilization detector" type = number default = 10 } -variable "firebase_database_io_utilization_lasting_duration_critical" { +variable "io_utilization_lasting_duration_critical" { description = "Minimum duration that conditions must be true before raising alert" type = string - default = null + default = "30m" } -variable "firebase_database_io_utilization_at_least_percentage_critical" { +variable "io_utilization_at_least_percentage_critical" { description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" type = number default = 1 } -variable "firebase_database_io_utilization_threshold_major" { - description = "Major threshold for firebase_database_io_utilization detector" +variable "io_utilization_threshold_major" { + description = "Major threshold for io_utilization detector" type = number default = 5 } -variable "firebase_database_io_utilization_lasting_duration_major" { +variable "io_utilization_lasting_duration_major" { description = "Minimum duration that conditions must be true before raising alert" type = string - default = null + default = "30m" } -variable "firebase_database_io_utilization_at_least_percentage_major" { +variable "io_utilization_at_least_percentage_major" { description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" type = number default = 1 From b6dee6920651375a3c0a09cd4aa70e4be04028aa Mon Sep 17 00:00:00 2001 From: nhat do Date: Mon, 16 Dec 2024 11:28:08 +0100 Subject: [PATCH 08/13] fix configuration of detectors --- docs/severity.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/severity.md b/docs/severity.md index 7d62a469d..15dea78a5 100644 --- a/docs/severity.md +++ b/docs/severity.md @@ -811,8 +811,8 @@ |Detector|Critical|Major|Minor|Warning|Info| |---|---|---|---|---|---| |GCP Firebase database heartbeat|X|-|-|-|-| -|GCP Firebase database firebase database load|X|X|-|-|-| -|GCP Firebase database firebase database io utilization|X|X|-|-|-| +|GCP Firebase database load|X|X|-|-|-| +|GCP Firebase database io utilization|X|X|-|-|-| ## integration_gcp-load-balancing From 0171f1e6cdb8160818f5ece626b6667c561318e5 Mon Sep 17 00:00:00 2001 From: ndo77 Date: Mon, 16 Dec 2024 11:30:31 +0100 Subject: [PATCH 09/13] Update modules/integration_gcp-firebase/conf/01-database_load.yaml Co-authored-by: Jean-Baptiste Simillon --- modules/integration_gcp-firebase/conf/01-database_load.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/integration_gcp-firebase/conf/01-database_load.yaml b/modules/integration_gcp-firebase/conf/01-database_load.yaml index 4ad826790..207802c10 100644 --- a/modules/integration_gcp-firebase/conf/01-database_load.yaml +++ b/modules/integration_gcp-firebase/conf/01-database_load.yaml @@ -12,6 +12,7 @@ rules: threshold: 10 comparator: ">" lasting_duration: '30m' + lasting_duration: '30m' major: threshold: 5 From 73cf5010fd5bb81e29e31c502a72891f17e900cb Mon Sep 17 00:00:00 2001 From: ndo77 Date: Mon, 16 Dec 2024 11:30:38 +0100 Subject: [PATCH 10/13] Update modules/integration_gcp-firebase/conf/01-database_load.yaml Co-authored-by: Jean-Baptiste Simillon --- modules/integration_gcp-firebase/conf/01-database_load.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/integration_gcp-firebase/conf/01-database_load.yaml b/modules/integration_gcp-firebase/conf/01-database_load.yaml index 207802c10..6e9f32c94 100644 --- a/modules/integration_gcp-firebase/conf/01-database_load.yaml +++ b/modules/integration_gcp-firebase/conf/01-database_load.yaml @@ -18,4 +18,5 @@ rules: threshold: 5 comparator: ">" lasting_duration: '30m' + lasting_duration: '30m' dependency: "critical" From e6d81e3a2791ace09399999eafe100990422550d Mon Sep 17 00:00:00 2001 From: ndo77 Date: Mon, 16 Dec 2024 11:30:47 +0100 Subject: [PATCH 11/13] Update modules/integration_gcp-firebase/conf/02-io_utilization.yaml Co-authored-by: Jean-Baptiste Simillon --- modules/integration_gcp-firebase/conf/02-io_utilization.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/integration_gcp-firebase/conf/02-io_utilization.yaml b/modules/integration_gcp-firebase/conf/02-io_utilization.yaml index 5c0cadfa9..c35b85056 100644 --- a/modules/integration_gcp-firebase/conf/02-io_utilization.yaml +++ b/modules/integration_gcp-firebase/conf/02-io_utilization.yaml @@ -12,6 +12,7 @@ rules: threshold: 10 comparator: ">" lasting_duration: '30m' + lasting_duration: '30m' major: threshold: 5 From 459a632c3689722fc3cfce26dd62410f14c83ad4 Mon Sep 17 00:00:00 2001 From: ndo77 Date: Mon, 16 Dec 2024 11:30:58 +0100 Subject: [PATCH 12/13] Update modules/integration_gcp-firebase/conf/02-io_utilization.yaml Co-authored-by: Jean-Baptiste Simillon --- modules/integration_gcp-firebase/conf/02-io_utilization.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/integration_gcp-firebase/conf/02-io_utilization.yaml b/modules/integration_gcp-firebase/conf/02-io_utilization.yaml index c35b85056..b08cfaf8a 100644 --- a/modules/integration_gcp-firebase/conf/02-io_utilization.yaml +++ b/modules/integration_gcp-firebase/conf/02-io_utilization.yaml @@ -18,4 +18,5 @@ rules: threshold: 5 comparator: ">" lasting_duration: '30m' + lasting_duration: '30m' dependency: "critical" From e26d342af1905cb5714b66a29ada3bd593e2b9e2 Mon Sep 17 00:00:00 2001 From: nhat do Date: Mon, 16 Dec 2024 11:37:18 +0100 Subject: [PATCH 13/13] fix typo --- modules/integration_gcp-firebase/conf/01-database_load.yaml | 2 -- modules/integration_gcp-firebase/conf/02-io_utilization.yaml | 2 -- 2 files changed, 4 deletions(-) diff --git a/modules/integration_gcp-firebase/conf/01-database_load.yaml b/modules/integration_gcp-firebase/conf/01-database_load.yaml index 6e9f32c94..4ad826790 100644 --- a/modules/integration_gcp-firebase/conf/01-database_load.yaml +++ b/modules/integration_gcp-firebase/conf/01-database_load.yaml @@ -12,11 +12,9 @@ rules: threshold: 10 comparator: ">" lasting_duration: '30m' - lasting_duration: '30m' major: threshold: 5 comparator: ">" lasting_duration: '30m' - lasting_duration: '30m' dependency: "critical" diff --git a/modules/integration_gcp-firebase/conf/02-io_utilization.yaml b/modules/integration_gcp-firebase/conf/02-io_utilization.yaml index b08cfaf8a..5c0cadfa9 100644 --- a/modules/integration_gcp-firebase/conf/02-io_utilization.yaml +++ b/modules/integration_gcp-firebase/conf/02-io_utilization.yaml @@ -12,11 +12,9 @@ rules: threshold: 10 comparator: ">" lasting_duration: '30m' - lasting_duration: '30m' major: threshold: 5 comparator: ">" lasting_duration: '30m' - lasting_duration: '30m' dependency: "critical"