Skip to content

Commit

Permalink
Merge pull request #151 from claranet/genericjmx
Browse files Browse the repository at this point in the history
  • Loading branch information
xp-1000 authored Oct 27, 2020
2 parents f3f4a4c + 7fb8e30 commit f9ca07f
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 0 deletions.
45 changes: 45 additions & 0 deletions middleware/genericjmx/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# MIDDLEWARE JMX SignalFx detectors

## How to use this module

```hcl
module "signalfx-detectors-middleware-jmx" {
source = "github.com/claranet/terraform-signalfx-detectors.git//middleware/jmx?ref={revision}"
environment = var.environment
notifications = var.notifications
}
```

## Purpose

Creates SignalFx detectors with the following checks:
- JMX GC old generation usage
- JMX memory heap usage

## Notes

This module uses the [GenericJMX](https://docs.signalfx.com/en/latest/integrations/agent/monitors/collectd-genericjmx.html)
monitor to fetch common Java runtime metrics for every JVM based applications.

You must [enable JMX Remote](https://docs.oracle.com/javadb/10.10.1.2/adminguide/radminjmxenabledisable.html) on your JAVA
application. Depending on your application you should add following paramters as example:

```
-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=5000 -Dcom.sun.management.jmxremote.local.only=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Djava.rmi.server.hostname=127.0.0.1
```

If there is a native GenericJMX based monitor for your specific application like for
[Cassandra](https://docs.signalfx.com/en/latest/integrations/agent/monitors/collectd-cassandra.html)
so you should configure its dedicated monitor and you will automatically retrieve required metrics for this module.

Else if there is no monitor available for your specific application or you simply do not want to collect specific
application metrics, so you have to configure the GenericJMX directly:

```
- type: collectd/genericjmx
host: 127.0.0.1
port: 5000
```

Keep in mind you can easily add specific application metrics defining `mBeanDefinitions` parameter.
1 change: 1 addition & 0 deletions middleware/genericjmx/common-locals.tf
1 change: 1 addition & 0 deletions middleware/genericjmx/common-variables.tf
62 changes: 62 additions & 0 deletions middleware/genericjmx/detectors-genericjmx.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
resource "signalfx_detector" "jmx_memory_heap" {
name = format("%s %s", local.detector_name_prefix, "JMX memory heap usage")

program_text = <<-EOF
A = data('jmx_memory.used', filter=filter('plugin_instance', 'memory-heap') and ${module.filter-tags.filter_custom})${var.memory_heap_aggregation_function}${var.memory_heap_transformation_function}
B = data('jmx_memory.max', filter=filter('plugin_instance', 'memory-heap') and ${module.filter-tags.filter_custom})${var.memory_heap_aggregation_function}${var.memory_heap_transformation_function}
signal = (A/B).scale(100).publish('signal')
detect(when(signal > ${var.memory_heap_threshold_critical})).publish('CRIT')
detect(when(signal > ${var.memory_heap_threshold_major}) and (signal < ${var.memory_heap_threshold_critical})).publish('MAJOR')
EOF

rule {
description = "is too high > ${var.memory_heap_threshold_major}"
severity = "Major"
detect_label = "MAJOR"
disabled = coalesce(var.memory_heap_disabled_major, var.memory_heap_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.memory_heap_notifications, "major", []), var.notifications.major)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
rule {
description = "is too high > ${var.memory_heap_threshold_critical}"
severity = "Critical"
detect_label = "CRIT"
disabled = coalesce(var.memory_heap_disabled_critical, var.memory_heap_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.memory_heap_notifications, "critical", []), var.notifications.critical)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
}

resource "signalfx_detector" "jmx_old_gen" {
name = format("%s %s", local.detector_name_prefix, "JMX GC old generation usage")

program_text = <<-EOF
A = data('jmx_memory.used', filter=filter('plugin_instance', 'memory_pool-G1 Old Gen') and ${module.filter-tags.filter_custom})${var.gc_old_gen_aggregation_function}${var.gc_old_gen_transformation_function}
B = data('jmx_memory.max', filter=filter('plugin_instance', 'memory_pool-G1 Old Gen') and ${module.filter-tags.filter_custom})${var.gc_old_gen_aggregation_function}${var.gc_old_gen_transformation_function}
signal = (A/B).scale(100).publish('signal')
detect(when(signal > ${var.gc_old_gen_threshold_critical})).publish('CRIT')
detect(when(signal > ${var.gc_old_gen_threshold_major}) and (signal < ${var.gc_old_gen_threshold_critical})).publish('MAJOR')
EOF

rule {
description = "is too high > ${var.gc_old_gen_threshold_major}"
severity = "Major"
detect_label = "MAJOR"
disabled = coalesce(var.gc_old_gen_disabled_major, var.gc_old_gen_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.gc_old_gen_notifications, "major", []), var.notifications.major)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
rule {
description = "is too high > ${var.gc_old_gen_threshold_critical}"
severity = "Critical"
detect_label = "CRIT"
disabled = coalesce(var.gc_old_gen_disabled_critical, var.gc_old_gen_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.gc_old_gen_notifications, "critical", []), var.notifications.critical)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
}

8 changes: 8 additions & 0 deletions middleware/genericjmx/modules.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module "filter-tags" {
source = "github.com/claranet/terraform-signalfx-detectors.git//common/filter-tags"

filter_defaults = "filter('env', '${var.environment}') and filter('sfx_monitored', 'true')"
filter_custom_includes = var.filter_custom_includes
filter_custom_excludes = var.filter_custom_excludes
}

10 changes: 10 additions & 0 deletions middleware/genericjmx/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
output "jmx_memory_heap" {
description = "Detector resource for jmx_memory_heap"
value = signalfx_detector.jmx_memory_heap
}

output "jmx_old_gen" {
description = "Detector resource for jmx_old_gen"
value = signalfx_detector.jmx_old_gen
}

102 changes: 102 additions & 0 deletions middleware/genericjmx/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Module specific

# memory_heap detector

variable "memory_heap_disabled" {
description = "Disable all alerting rules for memory_heap detector"
type = bool
default = null
}

variable "memory_heap_disabled_critical" {
description = "Disable critical alerting rule for memory_heap detector"
type = bool
default = null
}

variable "memory_heap_disabled_major" {
description = "Disable major alerting rule for memory_heap detector"
type = bool
default = null
}

variable "memory_heap_notifications" {
description = "Notification recipients list per severity overridden for memory_heap detector"
type = map(list(string))
default = {}
}

variable "memory_heap_aggregation_function" {
description = "Aggregation function and group by for memory_heap detector (i.e. \".mean(by=['host']).\")"
type = string
default = ""
}

variable "memory_heap_transformation_function" {
description = "Transformation function for memory_heap detector (i.e. \".mean(over='5m')\")"
type = string
default = ".min(over='5m')"
}

variable "memory_heap_threshold_major" {
description = "Warning threshold for memory_heap detector"
type = number
default = 80
}

variable "memory_heap_threshold_critical" {
description = "Critical threshold for memory_heap detector"
type = number
default = 90
}

# gc_old_gen detector

variable "gc_old_gen_disabled" {
description = "Disable all alerting rules for gc_old_gen detector"
type = bool
default = null
}

variable "gc_old_gen_disabled_critical" {
description = "Disable critical alerting rule for gc_old_gen detector"
type = bool
default = null
}

variable "gc_old_gen_disabled_major" {
description = "Disable major alerting rule for gc_old_gen detector"
type = bool
default = null
}

variable "gc_old_gen_notifications" {
description = "Notification recipients list per severity overridden for gc_old_gen detector"
type = map(list(string))
default = {}
}

variable "gc_old_gen_aggregation_function" {
description = "Aggregation function and group by for gc_old_gen detector (i.e. \".mean(by=['host']).\")"
type = string
default = ""
}

variable "gc_old_gen_transformation_function" {
description = "Transformation function for gc_old_gen detector (i.e. \".mean(over='5m')\")"
type = string
default = ".min(over='5m')"
}

variable "gc_old_gen_threshold_major" {
description = "Warning threshold for gc_old_gen detector"
type = number
default = 80
}

variable "gc_old_gen_threshold_critical" {
description = "Critical threshold for gc_old_gen detector"
type = number
default = 90
}

9 changes: 9 additions & 0 deletions middleware/genericjmx/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
terraform {
required_providers {
signalfx = {
source = "splunk-terraform/signalfx"
version = ">= 4.26.4"
}
}
required_version = ">= 0.12.26"
}

0 comments on commit f9ca07f

Please sign in to comment.