From ac2aaf153849fb7e975df40ddc31ec6c5424ce08 Mon Sep 17 00:00:00 2001 From: Alberto Lopez Date: Wed, 1 Jun 2022 11:56:22 +0200 Subject: [PATCH] Remove apend and aggregation dimensions Enabling AMI clean builds have produced SNS alarming issues for missing datapoints. We are aiming to address this issue by removing optional configurations in the CW Agent config file --- manifests/cloudwatch/agent.pp | 4 +- .../amazon-cloudwatch-agent.json.erb | 94 +++++++++++-------- 2 files changed, 58 insertions(+), 40 deletions(-) diff --git a/manifests/cloudwatch/agent.pp b/manifests/cloudwatch/agent.pp index b787e52..8c9db6c 100644 --- a/manifests/cloudwatch/agent.pp +++ b/manifests/cloudwatch/agent.pp @@ -1,5 +1,6 @@ class octo_base::cloudwatch::agent ( - $log_files + $log_files, + $instance_name ) { $install_download_link = 'https://s3.amazonaws.com/amazoncloudwatch-agent/ubuntu/amd64/latest/amazon-cloudwatch-agent.deb' $signature_download_link = 'https://s3.amazonaws.com/amazoncloudwatch-agent/ubuntu/amd64/latest/amazon-cloudwatch-agent.deb.sig' @@ -38,6 +39,7 @@ ] $all_log_files = concat($core_log_files, $log_files) $number_of_log_files = length($all_log_files) + $instance = $instance_name $config_file = '/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json' diff --git a/templates/cloudwatch/amazon-cloudwatch-agent.json.erb b/templates/cloudwatch/amazon-cloudwatch-agent.json.erb index 5dfa825..9b3e880 100644 --- a/templates/cloudwatch/amazon-cloudwatch-agent.json.erb +++ b/templates/cloudwatch/amazon-cloudwatch-agent.json.erb @@ -5,81 +5,97 @@ "run_as_user": "cwagent" }, "metrics": { + "namespace": "EC2/Custom", "metrics_collected": { "cpu": { "resources": [ "*" ], "measurement": [ - {"name": "cpu_usage_idle", "rename": "CPU_USAGE_IDLE", "unit": "Percent"}, - {"name": "cpu_usage_nice", "rename": "CPU_USAGE_NICE", "unit": "Percent"}, - {"name": "cpu_usage_guest", "rename": "CPU_USAGE_GUEST", "unit": "Percent"} + {"name": "cpu_usage_active", "rename": "CPUUtilization", "unit": "Percent"} ], - "totalcpu": false, - "metrics_collection_interval": 60 + "totalcpu": true, + "metrics_collection_interval": 60, + "append_dimensions": { + "InstanceName": "<%= @instance %>" + } }, "disk": { "resources": [ "*" ], "measurement": [ - {"name": "free", "rename": "DISK_FREE_GB", "unit": "Gigabytes"}, - {"name": "total", "rename": "DISK_TOTAL_GB", "unit": "Gigabytes"}, - {"name": "used_percent", "rename": "DISK_USED_PERCENT"}, - {"name": "inodes_free", "ranem": "FREE_INODES"}, - {"name": "inodes_total", "rename": "TOTAL_INODES"}, - {"name": "inodes_used", "rename": "USED_INODES"} + {"name": "free", "rename": "DiskAvailable", "unit": "Bytes"}, + {"name": "used", "rename": "DiskUsed", "unit": "Bytes"}, + {"name": "used_percent", "rename": "DiskUsedPercentage", "unit": "Percent"}, + {"name": "inodes_free", "rename": "FreeInodes", "unit": "Count"} ], "ignore_file_system_types": [ "tmpfs", "devtmpfs" ], - "metrics_collection_interval": 60 + "metrics_collection_interval": 60, + "append_dimensions": { + "InstanceName": "<%= @instance %>" + } }, "diskio": { "resources": [ "*" ], "measurement": [ - {"name": "reads", "rename": "IO_READS"}, - {"name": "writes", "rename": "IO_WRITES"}, - {"name": "read_time", "rename": "IO_READ_TIME"}, - {"name": "write_time", "rename": "IO_WRITE_TIME"}, - {"name": "io_time", "rename": "IO_TIME"} + {"name": "reads", "rename": "DiskIOReads"}, + {"name": "writes", "rename": "DiskIOWrites"}, + {"name": "read_time", "rename": "DiskIOReadTime"}, + {"name": "write_time", "rename": "DiskIOWriteTime"}, + {"name": "io_time", "rename": "DiskIOTime"} ], - "metrics_collection_interval": 60 + "metrics_collection_interval": 60, + "append_dimensions": { + "InstanceName": "<%= @instance %>" + } }, "swap": { "measurement": [ - {"name": "swap_used", "rename": "SWAP_USED"}, - {"name": "swap_free", "rename": "SWAP_FREE"}, - {"name": "swap_used_percent", "rename": "SWAP_USED_PERCENT"} - ] + {"name": "swap_used", "rename": "SwapUsed", "unit": "Megabytes"} + ], + "append_dimensions": { + "InstanceName": "<%= @instance %>" + } }, "mem": { "measurement": [ - {"name": "mem_used", "rename": "MEM_USED"}, - {"name": "mem_cached", "rename": "MEM_CACHED"}, - {"name": "mem_total", "rename": "MEM_TOTAL"} + {"name": "used", "rename": "MemoryUsed", "unit": "Megabytes"}, + {"name": "used_percent", "rename": "MemoryUsedPercentage", "unit": "Percent"}, + {"name": "free", "rename": "MemoryFree", "unit": "Megabytes"} ], - "metrics_collection_interval": 30 + "metrics_collection_interval": 30, + "append_dimensions": { + "InstanceName": "<%= @instance %>" + } }, "net": { "resources": [ "*" ], "measurement": [ - {"name": "bytes_sent", "rename": "BYTES_SENT"}, - {"name": "bytes_recv", "rename": "BYTES_RECEIVED"}, - {"name": "drop_in", "rename": "BYTES_DROPPED_IN"}, - {"name": "drop_out", "rename": "BYTES_DROPPED_OUT"} - ] + {"name": "bytes_sent", "rename": "BytesSent"}, + {"name": "bytes_recv", "rename": "BytesReceived"}, + {"name": "drop_in", "rename": "BytesDroppedIn"}, + {"name": "drop_out", "rename": "BytesDroppedOut"} + ], + "append_dimensions": { + "InstanceName": "<%= @instance %>" + } }, "processes": { "measurement": [ - {"name": "running", "rename": "NUM_PROC_RUNNING"}, - {"name": "sleeping", "rename": "NUM_PROC_SLEEPING"}, - {"name": "dead", "rename": "NUM_PROC_DEAD"} - ] + {"name": "running", "rename": "NumProcessesRunning"}, + {"name": "sleeping", "rename": "NumProcessesSleeping"}, + {"name": "dead", "rename": "NumProcessesDead"} + ], + "append_dimensions": { + "InstanceName": "<%= @instance %>" + } }, "procstat": [ { @@ -88,17 +104,17 @@ "cpu_time", "cpu_time_system", "cpu_time_user" - ] + ], + "append_dimensions": { + "InstanceName": "<%= @instance %>" + } } ] }, "append_dimensions": { - "ImageId": "${aws:ImageId}", "InstanceId": "${aws:InstanceId}", - "InstanceType": "${aws:InstanceType}", "AutoScalingGroupName": "${aws:AutoScalingGroupName}" }, - "aggregation_dimensions" : [["AutoScalingGroupName"], ["ImageId"], ["InstanceId", "InstanceType"], []], "force_flush_interval" : 30 }, "logs": {