From a7b2cc5d99b2a3072edecdf15046760a3d3e03e2 Mon Sep 17 00:00:00 2001 From: "phillip.toohill" Date: Wed, 30 Oct 2024 12:57:00 -0500 Subject: [PATCH] Updating grafana dashboards for naming consistency and new additions (#519) --- .../{Alertmanager.json => alertmanager.json} | 0 etc/grafana-dashboards/ceph_cluster.json | 3294 +++++++++++++++++ etc/grafana-dashboards/ceph_pools.json | 957 +++++ ...it_Metrics.json => fluentbit_metrics.json} | 0 ...view.json => galera_mariadb_overview.json} | 0 ...e_Metrics.json => kube_state_metrics.json} | 0 ...son => kubernetes_cluster_monitoring.json} | 0 ...Disks_Usage.json => loki_disks_usage.json} | 0 ...iaDB_Metrics.json => mariadb_metrics.json} | 0 ...ed_Metrics.json => memcached_metrics.json} | 0 .../{Node_Metrics.json => node_metrics.json} | 0 etc/grafana-dashboards/open_alerts.json | 1006 +++++ ...ck_Metrics.json => openstack_metrics.json} | 279 +- etc/grafana-dashboards/project_lookup.json | 369 ++ ...Overview.json => prometheus_overview.json} | 0 ...tMQ_Metrics.json => rabbitmq_metrics.json} | 0 16 files changed, 5771 insertions(+), 134 deletions(-) rename etc/grafana-dashboards/{Alertmanager.json => alertmanager.json} (100%) create mode 100644 etc/grafana-dashboards/ceph_cluster.json create mode 100644 etc/grafana-dashboards/ceph_pools.json rename etc/grafana-dashboards/{Fluentbit_Metrics.json => fluentbit_metrics.json} (100%) rename etc/grafana-dashboards/{Galera_MariaDB_Overview.json => galera_mariadb_overview.json} (100%) rename etc/grafana-dashboards/{Kube_State_Metrics.json => kube_state_metrics.json} (100%) rename etc/grafana-dashboards/{Kubernetes_Cluster_Monitoring.json => kubernetes_cluster_monitoring.json} (100%) rename etc/grafana-dashboards/{Loki_Disks_Usage.json => loki_disks_usage.json} (100%) rename etc/grafana-dashboards/{MariaDB_Metrics.json => mariadb_metrics.json} (100%) rename etc/grafana-dashboards/{Memcached_Metrics.json => memcached_metrics.json} (100%) rename etc/grafana-dashboards/{Node_Metrics.json => node_metrics.json} (100%) create mode 100644 etc/grafana-dashboards/open_alerts.json rename etc/grafana-dashboards/{OpenStack_Metrics.json => openstack_metrics.json} (97%) create mode 100644 etc/grafana-dashboards/project_lookup.json rename etc/grafana-dashboards/{Prometheus_Overview.json => prometheus_overview.json} (100%) rename etc/grafana-dashboards/{RabbitMQ_Metrics.json => rabbitmq_metrics.json} (100%) diff --git a/etc/grafana-dashboards/Alertmanager.json b/etc/grafana-dashboards/alertmanager.json similarity index 100% rename from etc/grafana-dashboards/Alertmanager.json rename to etc/grafana-dashboards/alertmanager.json diff --git a/etc/grafana-dashboards/ceph_cluster.json b/etc/grafana-dashboards/ceph_cluster.json new file mode 100644 index 00000000..6253018b --- /dev/null +++ b/etc/grafana-dashboards/ceph_cluster.json @@ -0,0 +1,3294 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.3.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph (old)", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Ceph Cluster overview.\r\n", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 9550, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 37, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "CLUSTER STATE", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "HEALTHY" + }, + "1": { + "text": "WARNING" + }, + "2": { + "text": "ERROR" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 2 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 21, + "interval": "1m", + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_health_status{job=\"$job\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "refId": "A", + "step": 300 + } + ], + "title": "Status", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "N/A" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 3 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 14, + "interval": "1m", + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_mon_quorum_status{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Monitors In Quorum", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 22, + "interval": "1m", + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "count(ceph_pool_objects{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Pools", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.025 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 0.1 + } + ] + }, + "unit": "bytes", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 33, + "interval": "1m", + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_cluster_total_bytes{job=\"$job\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Cluster Capacity", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.025 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 0.1 + } + ] + }, + "unit": "bytes", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 34, + "interval": "1m", + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_cluster_total_used_bytes{job=\"$job\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Used Capacity", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 70 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 80 + } + ] + }, + "unit": "percent", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 23, + "interval": "1m", + "links": [], + "maxDataPoints": 100, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_cluster_total_used_bytes{job=\"$job\"}/ceph_cluster_total_bytes{job=\"$job\"}*100", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Used Capacity", + "type": "gauge" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 38, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "OSD STATE", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 0, + "y": 6 + }, + "id": 26, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_osd_in{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "OSDs IN", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 40, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 1 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 2, + "y": 6 + }, + "id": 27, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "count(ceph_osd_up{job=\"$job\"}) - count(ceph_osd_in{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "OSDs OUT", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 4, + "y": 6 + }, + "id": 28, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_osd_up{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "OSDs UP", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 40, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 1 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 6, + "y": 6 + }, + "id": 29, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "count(ceph_osd_up{job=\"$job\"} == 0) OR vector(0)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "OSDs DOWN", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 250 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 300 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 6 + }, + "id": 30, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "avg(ceph_osd_numpg{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Agerage PGs per OSD", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 50 + } + ] + }, + "unit": "ms", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 6 + }, + "id": 31, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "avg(ceph_osd_apply_latency_ms{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Average OSD Apply Latency", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 50 + } + ] + }, + "unit": "ms", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 6 + }, + "id": 32, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "avg(ceph_osd_commit_latency_ms{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Average OSD Commit Latency", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 70 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 80 + } + ] + }, + "unit": "s", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 6 + }, + "id": 24, + "interval": "1m", + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(ceph_paxos_refresh_latency_sum{job=\"$job\"}[1m]))/clamp_min(sum(increase(ceph_paxos_refresh_latency_count{job=\"$job\"}[1m])),1)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 300 + } + ], + "title": "Average Monitor Latency", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 39, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "CLUSTER", + "type": "row" + }, + { + "aliasColors": { + "Available": "#EAB839", + "Total Capacity": "#447EBC", + "Used": "#BF1B00", + "total_avail": "#6ED0E0", + "total_space": "#7EB26D", + "total_used": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 10 + }, + "height": "300", + "hiddenSeries": false, + "id": 1, + "interval": "$interval", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Total Capacity", + "fill": 0, + "linewidth": 3, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_cluster_total_bytes{job=\"$job\"}-ceph_cluster_total_used_bytes{job=\"$job\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Available", + "refId": "A", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_cluster_total_used_bytes{job=\"$job\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "B", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_cluster_total_bytes{job=\"$job\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Total Capacity", + "refId": "C", + "step": 300 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Capacity", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Total Capacity": "#7EB26D", + "Used": "#BF1B00", + "total_avail": "#6ED0E0", + "total_space": "#7EB26D", + "total_used": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "decimals": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 10 + }, + "height": "300", + "hiddenSeries": false, + "id": 3, + "interval": "$interval", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(ceph_osd_op_r{job=\"$job\"}[$interval]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "read", + "refId": "A", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(ceph_osd_op_w{job=\"$job\"}[$interval]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "write", + "refId": "B", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(ceph_osd_op_rw{job=\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "overwrite", + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "IOPS", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 10 + }, + "height": "300", + "hiddenSeries": false, + "id": 7, + "interval": "$interval", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(irate(ceph_osd_op_r_out_bytes{job=\"$job\"}[$interval]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "read", + "refId": "A", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(irate(ceph_osd_op_w_in_bytes{job=\"$job\"}[$interval]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "write", + "refId": "B", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(irate(ceph_osd_op_rw_in_bytes{job=\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "overwrite", + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Throughput", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 40, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "LATENCY", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 14, + "x": 0, + "y": 19 + }, + "hiddenSeries": false, + "id": 35, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "1000*sum(increase(ceph_osd_op_r_latency_sum{job=\"$job\"}[1m]))/clamp_min(sum(increase(ceph_osd_op_r_latency_count{job=\"$job\"}[1m])),1)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 2, + "legendFormat": "read", + "metric": "ceph_osd_perf_apply_latency_seconds", + "refId": "A", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "1000*sum(increase(ceph_osd_op_w_latency_sum{job=\"$job\"}[1m]))/clamp_min(sum(increase(ceph_osd_op_w_latency_count{job=\"$job\"}[1m])),1)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 2, + "legendFormat": "write", + "metric": "ceph_osd_perf_commit_latency_seconds", + "refId": "B", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "1000*sum(increase(ceph_osd_op_rw_latency_sum{job=\"$job\"}[1m]))/clamp_min(sum(increase(ceph_osd_op_rw_latency_count{job=\"$job\"}[1m])),1)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "overwrite", + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "OSD Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 10, + "x": 14, + "y": 19 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_mds_reply_latency_sum{job=\"$job\"}/ceph_mds_reply_latency_count{job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "latency", + "metric": "ceph_monitor_latency_seconds", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MDS Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 41, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "OBJECTS", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 27 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/^Total.*$/", + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pool_objects{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "A", + "step": 300 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Objects in the Cluster", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 27 + }, + "hiddenSeries": false, + "id": 43, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Unfound", + "color": "#bf1b00" + }, + { + "alias": "Degraded", + "color": "#ef843c" + }, + { + "alias": "Misplaced", + "color": "#ba43a9" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_num_objects_unfound{job=\"$job\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Unfound", + "refId": "A", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_num_objects_degraded{job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Degraded", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_num_objects_misplaced{job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Misplaced", + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Objects Unfound / Degraded / Misplaced", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 32 + }, + "hiddenSeries": false, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/^Total.*$/", + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_active{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Active", + "refId": "M" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_clean{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Clean", + "refId": "U" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_peering{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Peering", + "refId": "I" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_degraded{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Degraded", + "refId": "B", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_stale{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Stale", + "refId": "C", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_unclean_pgs{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Unclean", + "refId": "D", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_undersized{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Undersized", + "refId": "E", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_incomplete{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Incomplete", + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_forced_backfill{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Forced Backfill", + "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_inconsistent{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Inconsistent", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_forced_recovery{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Forced Recovery", + "refId": "J" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_creating{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Creating", + "refId": "K" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_wait_backfill{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Wait Backfill", + "refId": "L" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_deep{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Deep", + "refId": "N" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_scrubbing{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Scrubbing", + "refId": "O" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_recovering{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Recovering", + "refId": "P" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_repair{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Repair", + "refId": "Q" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_down{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Down", + "refId": "R" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_peered{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Peered", + "refId": "S" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_backfill{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Backfill", + "refId": "T" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_remapped{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Remapped", + "refId": "V" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_backfill_toofull{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Backfill Toofull", + "refId": "W" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "PGs", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 32 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/^Total.*$/", + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_degraded{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Degraded", + "refId": "F", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_stale{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Stale", + "refId": "A", + "step": 300 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(ceph_pg_undersized{job=\"$job\"})", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Undersized", + "refId": "B", + "step": 300 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Faulty PGs", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 37 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(irate(ceph_osd_recovery_ops{job=\"$job\"}[$interval]))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "OPS", + "refId": "A", + "step": 300 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Recovery Operations", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 42, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "OSD Utilization", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 42 + }, + "hiddenSeries": false, + "id": 45, + "interval": "60s", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_osd_stat_bytes_used{job=\"$job\"}", + "format": "time_series", + "interval": "60s", + "intervalFactor": 1, + "legendFormat": "{{ceph_daemon}}", + "metric": "ceph_osd_avail_bytes", + "refId": "B", + "step": 60 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "OSD Storage Used", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "unitScale": true + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 42 + }, + "hiddenSeries": false, + "id": 47, + "interval": "60s", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.3.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "ceph_osd_stat_bytes_used{job=\"$job\"}/ceph_osd_stat_bytes{job=\"$job\"}*100", + "format": "time_series", + "interval": "60s", + "intervalFactor": 1, + "legendFormat": "{{ceph_daemon}}", + "metric": "ceph_osd_avail_bytes", + "refId": "B", + "step": 60 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "OSD Used %", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [ + "ceph", + "cluster" + ], + "templating": { + "list": [ + { + "auto": true, + "auto_count": 10, + "auto_min": "1m", + "current": { + "selected": false, + "text": "30s", + "value": "30s" + }, + "hide": 0, + "includeAll": false, + "label": "Interval", + "multi": false, + "name": "interval", + "options": [ + { + "selected": false, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "5s", + "value": "5s" + }, + { + "selected": false, + "text": "10s", + "value": "10s" + }, + { + "selected": true, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(ceph_mon_quorum_status, job)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph - Cluster", + "uid": "vwcB0Bzmk", + "version": 2, + "weekStart": "" +} diff --git a/etc/grafana-dashboards/ceph_pools.json b/etc/grafana-dashboards/ceph_pools.json new file mode 100644 index 00000000..cbc72b83 --- /dev/null +++ b/etc/grafana-dashboards/ceph_pools.json @@ -0,0 +1,957 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.3.3" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Ceph Pools dashboard.", + "editable": false, + "fiscalYearStartMonth": 0, + "gnetId": 5342, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 11, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "refId": "A" + } + ], + "title": "Pool: $pool", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes", + "unitScale": true + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^Total.*$/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 4 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^Raw.*$/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 4 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 20, + "x": 0, + "y": 1 + }, + "id": 2, + "interval": "$interval", + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum((ceph_pool_max_avail) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"})) by (name)", + "format": "time_series", + "hide": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Available - {{ name }}", + "metric": "ceph_pool_available_bytes", + "refId": "A", + "step": 60 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum((ceph_pool_stored) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"})) by (name)", + "format": "time_series", + "hide": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Stored - {{ name }}", + "metric": "ceph_pool", + "refId": "B", + "step": 60 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum((ceph_pool_stored + ceph_pool_max_avail) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"})) by (name)", + "format": "time_series", + "hide": true, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Total - {{ name }}", + "metric": "ceph_pool", + "refId": "C", + "step": 60 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum((ceph_pool_stored_raw) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"})) by (name)", + "format": "time_series", + "hide": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Raw - {{ name }}", + "metric": "ceph_pool", + "refId": "D", + "step": 60 + } + ], + "title": "Pool Storage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 75 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percentunit", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 10, + "links": [], + "maxDataPoints": 100, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum without (instance, pool_id, name) ((ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"}))", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 60 + } + ], + "title": "Usage", + "type": "gauge" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 12, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "refId": "A" + } + ], + "title": "Pool Info", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 7, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum((ceph_pool_objects) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"})) by (name)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Objects - {{ name }}", + "refId": "A", + "step": 60 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum((ceph_pool_dirty) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"})) by (name)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Dirty Objects - {{ name }}", + "refId": "B", + "step": 60 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum((ceph_pool_quota_objects) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"})) by (name)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Quota Objects - {{ name }}", + "refId": "C" + } + ], + "title": "Objects in Pool", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "IOPS", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 4, + "interval": "$interval", + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum((irate(ceph_pool_rd[3m])) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"})) by (name)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Read - {{ name }}", + "refId": "B", + "step": 60 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum((irate(ceph_pool_wr[3m])) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"})) by (name)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Write - {{ name }}", + "refId": "A", + "step": 60 + } + ], + "title": "IOPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 5, + "interval": "$interval", + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum((irate(ceph_pool_rd_bytes[5m])) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"})) by (name)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Read Bytes - {{ name }}", + "refId": "A", + "step": 60 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum((irate(ceph_pool_wr_bytes[5m])) *on (pool_id) group_left(name)(ceph_pool_metadata{name=~\"^$pool$\"})) by (name)", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Written Bytes - {{ name }}", + "refId": "B", + "step": 60 + } + ], + "title": "Throughput", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 39, + "tags": [ + "ceph", + "pools" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "PBFA97CFB590B2093" + }, + "hide": 0, + "includeAll": false, + "label": "Data source", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "auto": true, + "auto_count": 10, + "auto_min": "1m", + "current": { + "selected": false, + "text": "30s", + "value": "30s" + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Interval", + "multi": false, + "name": "interval", + "options": [ + { + "selected": false, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "10s", + "value": "10s" + }, + { + "selected": true, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "queryValue": "", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allValue": ".*", + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(ceph_pool_metadata, name)", + "hide": 0, + "includeAll": true, + "label": "Pool", + "multi": true, + "name": "pool", + "options": [], + "query": { + "query": "label_values(ceph_pool_metadata, name)", + "refId": "Prometheus-pool-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph - Pools", + "uid": "-gtf0Bzik", + "version": 1, + "weekStart": "" +} diff --git a/etc/grafana-dashboards/Fluentbit_Metrics.json b/etc/grafana-dashboards/fluentbit_metrics.json similarity index 100% rename from etc/grafana-dashboards/Fluentbit_Metrics.json rename to etc/grafana-dashboards/fluentbit_metrics.json diff --git a/etc/grafana-dashboards/Galera_MariaDB_Overview.json b/etc/grafana-dashboards/galera_mariadb_overview.json similarity index 100% rename from etc/grafana-dashboards/Galera_MariaDB_Overview.json rename to etc/grafana-dashboards/galera_mariadb_overview.json diff --git a/etc/grafana-dashboards/Kube_State_Metrics.json b/etc/grafana-dashboards/kube_state_metrics.json similarity index 100% rename from etc/grafana-dashboards/Kube_State_Metrics.json rename to etc/grafana-dashboards/kube_state_metrics.json diff --git a/etc/grafana-dashboards/Kubernetes_Cluster_Monitoring.json b/etc/grafana-dashboards/kubernetes_cluster_monitoring.json similarity index 100% rename from etc/grafana-dashboards/Kubernetes_Cluster_Monitoring.json rename to etc/grafana-dashboards/kubernetes_cluster_monitoring.json diff --git a/etc/grafana-dashboards/Loki_Disks_Usage.json b/etc/grafana-dashboards/loki_disks_usage.json similarity index 100% rename from etc/grafana-dashboards/Loki_Disks_Usage.json rename to etc/grafana-dashboards/loki_disks_usage.json diff --git a/etc/grafana-dashboards/MariaDB_Metrics.json b/etc/grafana-dashboards/mariadb_metrics.json similarity index 100% rename from etc/grafana-dashboards/MariaDB_Metrics.json rename to etc/grafana-dashboards/mariadb_metrics.json diff --git a/etc/grafana-dashboards/Memcached_Metrics.json b/etc/grafana-dashboards/memcached_metrics.json similarity index 100% rename from etc/grafana-dashboards/Memcached_Metrics.json rename to etc/grafana-dashboards/memcached_metrics.json diff --git a/etc/grafana-dashboards/Node_Metrics.json b/etc/grafana-dashboards/node_metrics.json similarity index 100% rename from etc/grafana-dashboards/Node_Metrics.json rename to etc/grafana-dashboards/node_metrics.json diff --git a/etc/grafana-dashboards/open_alerts.json b/etc/grafana-dashboards/open_alerts.json new file mode 100644 index 00000000..7fba6958 --- /dev/null +++ b/etc/grafana-dashboards/open_alerts.json @@ -0,0 +1,1006 @@ +{ + "__inputs": [], + "__elements": {}, + "__requires": [ + { + "type": "datasource", + "id": "camptocamp-prometheus-alertmanager-datasource", + "name": "Prometheus AlertManager Datasource", + "version": "2.0.1" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.3.3" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "type": "dashboard" + } + ] + }, + "description": "Dashboard to visualize only Open(LIVE) Alerts of AlertManager instead of looking in Slack, Teams, and Emails.", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 12947, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "uid": "$alertmanager" + }, + "description": "", + "fieldConfig": { + "defaults": { + "displayName": "", + "mappings": [ + { + "options": { + "match": "null", + "result": { + "color": "red", + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 8, + "links": [], + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "annotations": false, + "datasource": { + "uid": "$alertmanager" + }, + "expr": "region=~\"$region\", severity=~\"$severity\",alertname=~\".*\", team=~\"$team\",env=~\"$env\"", + "labelSelector": "*", + "legendFormat": "", + "refId": "A", + "target": "Query", + "type": "single" + } + ], + "title": "Total", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "SeverityValue": true, + "Time": true, + "alertname": true, + "alertstatus": true, + "alertstatus_code": true, + "condition": true, + "container": true, + "controller": true, + "daemonset": true, + "deployment": true, + "description": true, + "device": true, + "effect": true, + "endpoint": true, + "horizontalpodautoscaler": true, + "instance": true, + "integration": true, + "job": true, + "job_name": true, + "key": true, + "master": true, + "metrics_path": true, + "namespace": true, + "node": true, + "persistentvolumeclaim": true, + "pod": true, + "prometheus": true, + "reason": true, + "runbook_url": true, + "service": true, + "status": true, + "summary": true, + "target": true, + "uid": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "reduce", + "options": { + "reducers": [ + "count" + ] + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "camptocamp-prometheus-alertmanager-datasource", + "uid": "$alertmanager" + }, + "description": "", + "fieldConfig": { + "defaults": { + "displayName": "", + "mappings": [ + { + "options": { + "match": "null", + "result": { + "color": "green", + "index": 0, + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 20, + "links": [], + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "annotations": false, + "datasource": { + "uid": "$alertmanager" + }, + "expr": "region=~\"$region\", severity=~\"critical\",alertname=~\".*\", team=~\"$team\",env=~\"$env\"", + "filters": "", + "labelSelector": "*", + "legendFormat": "", + "receiver": "", + "refId": "A", + "target": "Query", + "type": "single" + } + ], + "title": "Total Critical", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "SeverityValue": false, + "Time": true, + "alertname": true, + "alertstatus": true, + "alertstatus_code": true, + "container": true, + "description": true, + "endpoint": true, + "instance": true, + "job": true, + "job_name": true, + "namespace": true, + "pod": true, + "prometheus": true, + "runbook_url": true, + "service": true, + "severity": false, + "summary": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "SeverityValue": "", + "Time": "" + } + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "SeverityValue": { + "aggregations": [ + "count" + ], + "operation": "aggregate" + }, + "severity": { + "aggregations": [ + "uniqueValues" + ], + "operation": "groupby" + } + } + } + }, + { + "id": "filterByValue", + "options": { + "filters": [ + { + "config": { + "id": "notEqual", + "options": { + "value": "critical" + } + }, + "fieldName": "severity" + } + ], + "match": "any", + "type": "exclude" + } + } + ], + "type": "stat" + }, + { + "datasource": { + "uid": "$alertmanager" + }, + "description": "", + "fieldConfig": { + "defaults": { + "displayName": "", + "mappings": [ + { + "options": { + "match": "null", + "result": { + "color": "orange", + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "orange", + "value": null + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 21, + "links": [], + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "annotations": false, + "datasource": { + "uid": "$alertmanager" + }, + "expr": "region=~\"$region\", severity=~\"warning\",alertname=~\".*\", team=~\"$team\",env=~\"$env\"", + "labelSelector": "*", + "legendFormat": "", + "refId": "A", + "target": "Query", + "type": "single" + } + ], + "title": "Total Warning", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "alertname": true, + "alertstatus": true, + "alertstatus_code": true, + "container": true, + "description": true, + "endpoint": true, + "instance": true, + "job": true, + "job_name": true, + "namespace": true, + "pod": true, + "prometheus": true, + "runbook_url": true, + "service": true, + "summary": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "SeverityValue": { + "aggregations": [ + "count" + ], + "operation": "aggregate" + }, + "severity": { + "aggregations": [], + "operation": "groupby" + } + } + } + }, + { + "id": "filterByValue", + "options": { + "filters": [ + { + "config": { + "id": "notEqual", + "options": { + "value": "warning" + } + }, + "fieldName": "severity" + } + ], + "match": "any", + "type": "exclude" + } + } + ], + "type": "stat" + }, + { + "datasource": { + "uid": "$alertmanager" + }, + "description": "", + "fieldConfig": { + "defaults": { + "displayName": "", + "mappings": [ + { + "options": { + "match": "null", + "result": { + "color": "red", + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + } + ] + }, + "unit": "none", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 18, + "links": [], + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "annotations": false, + "datasource": { + "uid": "$alertmanager" + }, + "expr": "region=~\"$region\", severity=~\"$severity\",alertname=~\"InstanceDown\", team=~\"$team\",env=~\"$env\"", + "labelSelector": "*", + "legendFormat": "", + "refId": "A", + "target": "Query", + "type": "single" + } + ], + "title": "Instance/Service Down", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "SeverityValue": false, + "Time": true, + "alertname": false, + "alertstatus": true, + "alertstatus_code": true, + "container": true, + "description": true, + "endpoint": true, + "instance": true, + "job": true, + "job_name": true, + "namespace": true, + "pod": true, + "prometheus": true, + "runbook_url": true, + "service": true, + "severity": true, + "summary": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "filterByValue", + "options": { + "filters": [ + { + "config": { + "id": "regex", + "options": { + "value": ".*Down" + } + }, + "fieldName": "alertname" + } + ], + "match": "any", + "type": "include" + } + }, + { + "id": "reduce", + "options": { + "includeTimeField": false, + "mode": "reduceFields", + "reducers": [ + "count" + ] + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "camptocamp-prometheus-alertmanager-datasource", + "uid": "$alertmanager" + }, + "description": "MAKE SURE TO SCROLL TO BOTTOM AND CHECK FOR MORE ALERTS", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "right", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [ + { + "options": { + "critical": { + "color": "red", + "index": 1 + }, + "info": { + "color": "super-light-yellow", + "index": 2 + }, + "none": { + "color": "super-light-blue", + "index": 3 + }, + "warning": { + "color": "orange", + "index": 0 + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short", + "unitScale": true + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "TIME" + }, + { + "id": "unit", + "value": "time: YYYY-MM-DD HH:mm:ss" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "alertname" + }, + "properties": [ + { + "id": "displayName", + "value": "ALERT" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "data" + }, + "properties": [ + { + "id": "displayName", + "value": "DESCRIPTION" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "Click to view detail metrics", + "url": "/d/LINK_TO_YOUR_NODE_SPECIFIC_DASHBOARD_GOES_HERE?var-host=${__cell_2}" + } + ] + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "severity" + }, + "properties": [ + { + "id": "displayName", + "value": "SEVERITY" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "custom.align" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 4 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "region" + }, + "properties": [ + { + "id": "displayName", + "value": "REGION" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "env" + }, + "properties": [ + { + "id": "displayName", + "value": "ENV" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "host" + }, + "properties": [ + { + "id": "displayName", + "value": "HOST" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + } + ] + }, + "gridPos": { + "h": 98, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 14, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "annotations": false, + "datasource": { + "uid": "$alertmanager" + }, + "expr": "region=~\"$region\", severity=~\"$severity\",alertname=~\"$alertname\", team=~\"$team\",env=~\"$env\"", + "labelSelector": "*", + "legendFormat": "{{ msg }}", + "refId": "A", + "target": "Query", + "type": "table" + } + ], + "title": "OPEN ALERTS", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "SeverityValue": true, + "condition": true, + "endpoint": true, + "prometheus": true, + "service": true + }, + "includeByName": {}, + "indexByName": { + "Time": 0, + "alertname": 1, + "availability_zone": 3, + "container_name": 18, + "data": 17, + "device": 5, + "env": 4, + "fstype": 6, + "host": 2, + "image_name": 19, + "instance": 7, + "instance_type": 8, + "ipaddress": 9, + "job": 10, + "mountpoint": 11, + "name": 23, + "platform_version": 12, + "prometheus": 13, + "region": 14, + "service_name": 20, + "severity": 15, + "slack_channel": 21, + "state": 24, + "tag_name": 22, + "team": 16, + "type": 25 + }, + "renameByName": { + "SeverityValue": "", + "severity": "" + } + } + } + ], + "type": "table" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "alertmanager", + "open-alerts", + "shubhamc183" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "camptocamp-prometheus-alertmanager-datasource", + "value": "ad8d90c7-08d5-45b7-8075-6ee8ed5e7c9e" + }, + "hide": 0, + "includeAll": false, + "label": "Alertmanager", + "multi": false, + "name": "alertmanager", + "options": [], + "query": "camptocamp-prometheus-alertmanager-datasource", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "OPEN ALERTS OF ALERTMANAGER", + "uid": "WojOgXTmk", + "version": 14, + "weekStart": "" +} diff --git a/etc/grafana-dashboards/OpenStack_Metrics.json b/etc/grafana-dashboards/openstack_metrics.json similarity index 97% rename from etc/grafana-dashboards/OpenStack_Metrics.json rename to etc/grafana-dashboards/openstack_metrics.json index 39d69911..6eb15b21 100644 --- a/etc/grafana-dashboards/OpenStack_Metrics.json +++ b/etc/grafana-dashboards/openstack_metrics.json @@ -1172,6 +1172,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "Overcommit metric based off regional deployment values. This must be updated to match the allocation ratio per the deployment configs", "fieldConfig": { "defaults": { "color": { @@ -1219,11 +1220,11 @@ }, { "color": "red", - "value": 80 + "value": 10240 } ] }, - "unit": "short", + "unit": "none", "unitScale": true }, "overrides": [] @@ -1275,9 +1276,22 @@ "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "available vcpu cores", + "legendFormat": "pre-overcommit vcpu core availability", "range": true, "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(openstack_placement_resource_total{resourcetype=\"VCPU\"}) * 6", + "hide": false, + "instant": false, + "legendFormat": "overcommit vcpu availability", + "range": true, + "refId": "C" } ], "title": "Overall CPU cores usage", @@ -2587,6 +2601,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "This is the total vCPU limits allocated to projects/tenants", "fieldConfig": { "defaults": { "color": { @@ -2607,8 +2622,8 @@ "overrides": [] }, "gridPos": { - "h": 3, - "w": 2, + "h": 2, + "w": 4, "x": 0, "y": 62 }, @@ -2647,7 +2662,7 @@ "refId": "A" } ], - "title": "vCPUs Available", + "title": "vCPUs Total Limits By Project", "type": "stat" }, { @@ -2661,12 +2676,17 @@ "mode": "thresholds" }, "mappings": [], + "noValue": "0", "thresholds": { - "mode": "percentage", + "mode": "absolute", "steps": [ { "color": "green", "value": null + }, + { + "color": "red", + "value": 1000 } ] }, @@ -2675,15 +2695,16 @@ "overrides": [] }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 2, + "x": 4, "y": 62 }, - "id": 58, + "id": 56, + "links": [], "options": { "colorMode": "value", - "graphMode": "area", + "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { @@ -2694,6 +2715,7 @@ "values": false }, "showPercentChange": false, + "text": {}, "textMode": "auto", "wideLayout": true }, @@ -2706,16 +2728,17 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(openstack_nova_limits_vcpus_used)", + "expr": "openstack_nova_total_vms", "format": "table", - "instant": false, + "instant": true, "interval": "", + "intervalFactor": 1, "legendFormat": "__auto", - "range": true, + "range": false, "refId": "A" } ], - "title": "vCPUs Used", + "title": "Total VMs", "type": "stat" }, { @@ -2748,12 +2771,12 @@ "overrides": [] }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 4, + "x": 6, "y": 62 }, - "id": 56, + "id": 93, "links": [], "options": { "colorMode": "value", @@ -2781,7 +2804,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "openstack_nova_total_vms", + "expr": "count(openstack_nova_server_status{status=\"ACTIVE\"})", "format": "table", "instant": true, "interval": "", @@ -2791,7 +2814,7 @@ "refId": "A" } ], - "title": "Total VMs", + "title": "VMs ACTIVE", "type": "stat" }, { @@ -2810,7 +2833,7 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "orange", "value": null }, { @@ -2824,12 +2847,12 @@ "overrides": [] }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 6, + "x": 8, "y": 62 }, - "id": 93, + "id": 91, "links": [], "options": { "colorMode": "value", @@ -2857,7 +2880,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "openstack_nova_server_status{status=\"ACTIVE\"}", + "expr": "count(openstack_nova_server_status{status=\"BUILD\"})", "format": "table", "instant": true, "interval": "", @@ -2867,7 +2890,7 @@ "refId": "A" } ], - "title": "VMs ACTIVE", + "title": "VMs BUILD", "type": "stat" }, { @@ -2900,11 +2923,12 @@ "overrides": [] }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 8, + "x": 10, "y": 62 }, + "hideTimeOverride": false, "id": 90, "links": [], "options": { @@ -2913,10 +2937,8 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", + "calcs": [], + "fields": "/^Value$/", "values": false }, "showPercentChange": false, @@ -2933,7 +2955,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "openstack_nova_server_status{status='ERROR'}", + "expr": "count(openstack_nova_server_status{status='ERROR'})", "format": "table", "instant": true, "interval": "", @@ -2962,7 +2984,7 @@ "mode": "absolute", "steps": [ { - "color": "orange", + "color": "green", "value": null }, { @@ -2976,12 +2998,12 @@ "overrides": [] }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 10, + "x": 12, "y": 62 }, - "id": 91, + "id": 92, "links": [], "options": { "colorMode": "value", @@ -3009,7 +3031,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "openstack_nova_server_status{status=\"BUILD\"}", + "expr": "count(openstack_nova_server_status{status=\"SUSPENDED\"})", "format": "table", "instant": true, "interval": "", @@ -3019,7 +3041,7 @@ "refId": "A" } ], - "title": "VMs BUILD", + "title": "VMs SUSPENDED", "type": "stat" }, { @@ -3052,12 +3074,12 @@ "overrides": [] }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 12, + "x": 14, "y": 62 }, - "id": 92, + "id": 94, "links": [], "options": { "colorMode": "value", @@ -3085,7 +3107,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "openstack_nova_server_status{status=\"SUSPENDED\"}", + "expr": "count(openstack_nova_server_status{status=\"PAUSED\"})", "format": "table", "instant": true, "interval": "", @@ -3095,7 +3117,7 @@ "refId": "A" } ], - "title": "VMs SUSPENDED", + "title": "VMs PAUSED", "type": "stat" }, { @@ -3128,12 +3150,12 @@ "overrides": [] }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 14, + "x": 16, "y": 62 }, - "id": 94, + "id": 96, "links": [], "options": { "colorMode": "value", @@ -3161,7 +3183,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "openstack_nova_server_status{status=\"PAUSED\"}", + "expr": "count(openstack_nova_server_status{status=\"RESIZE\"})", "format": "table", "instant": true, "interval": "", @@ -3171,7 +3193,7 @@ "refId": "A" } ], - "title": "VMs PAUSED", + "title": "VMs RESIZE", "type": "stat" }, { @@ -3204,12 +3226,12 @@ "overrides": [] }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 16, + "x": 18, "y": 62 }, - "id": 95, + "id": 97, "links": [], "options": { "colorMode": "value", @@ -3237,7 +3259,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "openstack_nova_server_status{status=\"REBOOT\"} + openstack_nova_server_status{status=\"HARD_REBOOT\"}", + "expr": "count(openstack_nova_server_status{status=\"RESCUE\"})", "format": "table", "instant": true, "interval": "", @@ -3247,7 +3269,7 @@ "refId": "A" } ], - "title": "VMs REBOOT", + "title": "VMs RESCUE", "type": "stat" }, { @@ -3280,9 +3302,9 @@ "overrides": [] }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 18, + "x": 20, "y": 62 }, "id": 98, @@ -3356,12 +3378,12 @@ "overrides": [] }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 20, + "x": 22, "y": 62 }, - "id": 96, + "id": 95, "links": [], "options": { "colorMode": "value", @@ -3389,7 +3411,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "openstack_nova_server_status{status=\"RESIZE\"}", + "expr": "count(openstack_nova_server_status{status=\"REBOOT\"}) + count(openstack_nova_server_status{status=\"HARD_REBOOT\"})", "format": "table", "instant": true, "interval": "", @@ -3399,7 +3421,7 @@ "refId": "A" } ], - "title": "VMs RESIZE", + "title": "VMs REBOOT", "type": "stat" }, { @@ -3407,23 +3429,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "This is how many vCPU's are used by individual projects/tenants", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], - "noValue": "0", "thresholds": { - "mode": "absolute", + "mode": "percentage", "steps": [ { "color": "green", "value": null - }, - { - "color": "red", - "value": 1000 } ] }, @@ -3432,16 +3450,15 @@ "overrides": [] }, "gridPos": { - "h": 3, - "w": 2, - "x": 22, - "y": 62 + "h": 2, + "w": 4, + "x": 0, + "y": 64 }, - "id": 97, - "links": [], + "id": 58, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { @@ -3452,7 +3469,6 @@ "values": false }, "showPercentChange": false, - "text": {}, "textMode": "auto", "wideLayout": true }, @@ -3465,17 +3481,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "openstack_nova_server_status{status=\"RESCUE\"}", + "expr": "sum(openstack_nova_limits_vcpus_used)", "format": "table", - "instant": true, + "instant": false, "interval": "", - "intervalFactor": 1, "legendFormat": "__auto", - "range": false, + "range": true, "refId": "A" } ], - "title": "VMs RESCUE", + "title": "vCPUs Limits Used By Project", "type": "stat" }, { @@ -3483,6 +3498,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "This takes in to account the overcommit allocation ratio", "fieldConfig": { "defaults": { "color": { @@ -3543,7 +3559,7 @@ "h": 9, "w": 12, "x": 0, - "y": 65 + "y": 66 }, "id": 40, "links": [], @@ -3570,7 +3586,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "openstack_placement_resource_usage{resourcetype=\"VCPU\"}", + "expr": "(((openstack_placement_resource_usage{resourcetype=\"VCPU\"} * openstack_placement_resource_total{resourcetype=\"VCPU\"}) / 100) / (openstack_placement_resource_total{resourcetype=\"VCPU\"} * 6) * 100)", "format": "time_series", "hide": false, "interval": "", @@ -3580,7 +3596,7 @@ "refId": "A" } ], - "title": "CPU core usage", + "title": "vCPU core overcommit usage percentage", "type": "timeseries" }, { @@ -3650,7 +3666,7 @@ "h": 9, "w": 12, "x": 12, - "y": 65 + "y": 66 }, "id": 2, "links": [], @@ -3929,7 +3945,7 @@ "h": 9, "w": 12, "x": 0, - "y": 74 + "y": 75 }, "id": 55, "options": { @@ -4018,6 +4034,15 @@ } }, "fieldName": "status" + }, + { + "config": { + "id": "equal", + "options": { + "value": "DELETED" + } + }, + "fieldName": "status" } ], "match": "any", @@ -4092,7 +4117,7 @@ "h": 9, "w": 12, "x": 12, - "y": 74 + "y": 75 }, "id": 6, "links": [], @@ -4139,7 +4164,7 @@ "h": 1, "w": 24, "x": 0, - "y": 83 + "y": 84 }, "id": 42, "panels": [], @@ -4222,7 +4247,7 @@ "h": 8, "w": 12, "x": 0, - "y": 84 + "y": 85 }, "id": 44, "options": { @@ -4322,7 +4347,7 @@ "h": 8, "w": 12, "x": 12, - "y": 84 + "y": 85 }, "id": 45, "options": { @@ -4372,8 +4397,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -4385,7 +4409,7 @@ "h": 4, "w": 4, "x": 0, - "y": 92 + "y": 93 }, "id": 67, "options": { @@ -4440,8 +4464,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -4454,7 +4477,7 @@ "h": 4, "w": 4, "x": 4, - "y": 92 + "y": 93 }, "id": 69, "options": { @@ -4509,8 +4532,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -4523,7 +4545,7 @@ "h": 4, "w": 4, "x": 8, - "y": 92 + "y": 93 }, "id": 70, "options": { @@ -4568,7 +4590,7 @@ "h": 1, "w": 24, "x": 0, - "y": 96 + "y": 97 }, "id": 71, "panels": [], @@ -4590,8 +4612,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4607,7 +4628,7 @@ "h": 4, "w": 4, "x": 0, - "y": 97 + "y": 98 }, "id": 60, "options": { @@ -4661,8 +4682,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4678,7 +4698,7 @@ "h": 4, "w": 4, "x": 4, - "y": 97 + "y": 98 }, "id": 73, "options": { @@ -4732,8 +4752,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4749,7 +4768,7 @@ "h": 4, "w": 4, "x": 8, - "y": 97 + "y": 98 }, "id": 74, "options": { @@ -4803,8 +4822,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4820,7 +4838,7 @@ "h": 4, "w": 4, "x": 12, - "y": 97 + "y": 98 }, "id": 75, "options": { @@ -4874,8 +4892,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4891,7 +4908,7 @@ "h": 4, "w": 4, "x": 16, - "y": 97 + "y": 98 }, "id": 76, "options": { @@ -4936,7 +4953,7 @@ "h": 1, "w": 24, "x": 0, - "y": 101 + "y": 102 }, "id": 59, "panels": [], @@ -4958,8 +4975,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4975,7 +4991,7 @@ "h": 4, "w": 4, "x": 0, - "y": 102 + "y": 103 }, "id": 72, "options": { @@ -5030,8 +5046,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5047,7 +5062,7 @@ "h": 4, "w": 4, "x": 4, - "y": 102 + "y": 103 }, "id": 61, "options": { @@ -5102,8 +5117,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5119,7 +5133,7 @@ "h": 4, "w": 4, "x": 8, - "y": 102 + "y": 103 }, "id": 62, "options": { @@ -5174,8 +5188,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5191,7 +5204,7 @@ "h": 4, "w": 4, "x": 12, - "y": 102 + "y": 103 }, "id": 65, "options": { @@ -5245,8 +5258,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5262,7 +5274,7 @@ "h": 4, "w": 4, "x": 16, - "y": 102 + "y": 103 }, "id": 64, "options": { @@ -5317,8 +5329,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5334,7 +5345,7 @@ "h": 4, "w": 4, "x": 20, - "y": 102 + "y": 103 }, "id": 63, "options": { @@ -5435,6 +5446,6 @@ "timezone": "", "title": "OpenStack Exporter", "uid": "YZCsB1Qmy", - "version": 31, + "version": 52, "weekStart": "" } diff --git a/etc/grafana-dashboards/project_lookup.json b/etc/grafana-dashboards/project_lookup.json new file mode 100644 index 00000000..2c5df913 --- /dev/null +++ b/etc/grafana-dashboards/project_lookup.json @@ -0,0 +1,369 @@ +{ + "__inputs": [ + { + "name": "DS_LOKI", + "label": "Loki", + "description": "", + "type": "datasource", + "pluginId": "loki", + "pluginName": "Loki" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "barchart", + "name": "Bar chart", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.3.3" + }, + { + "type": "panel", + "id": "logs", + "name": "Logs", + "version": "" + }, + { + "type": "datasource", + "id": "loki", + "name": "Loki", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${DS_LOKI}" + }, + "description": "looks up all CRUD requests to the given service for the top 8 project ID's", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "dark-blue", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": -3, + "fillOpacity": 39, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-blue", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "barRadius": 0.5, + "barWidth": 0.55, + "colorByField": "req_count", + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": false + }, + "orientation": "horizontal", + "showValue": "always", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${DS_LOKI}" + }, + "editorMode": "code", + "expr": "sort(topk(8, sum by(tenant_id) (count_over_time({application=\"$service\"} |= `[None` |= `req-` |~ `\\[(?:(?:[^\\[\\s]+ ){3})([^\\s]+)` !~ `((-\\s){4,6}-)]` |~ `(GET|POST|PUT|DELETE)` | logfmt | json | line_format \"{{.log}}\" | pattern `<_> [<_> <_> <_> - - <_> <_>]` | __error__!=\"JSONParserErr\"[3d]))))", + "legendFormat": "req_count", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Top 8 projects by ROT", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(Value #A)", + "renamePattern": "req_count" + } + } + ], + "type": "barchart" + }, + { + "datasource": { + "type": "loki", + "uid": "${DS_LOKI}" + }, + "description": "Provides CRUD request count breakdown for project ID", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "dark-purple", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": -3, + "fillOpacity": 42, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "semi-dark-purple", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 3, + "options": { + "barRadius": 0.5, + "barWidth": 0.55, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": false + }, + "orientation": "horizontal", + "showValue": "always", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${DS_LOKI}" + }, + "editorMode": "code", + "expr": "sort(topk(8, sum by(method) (count_over_time({application=\"$service\"} |= `[None` |= `req-` |~ `\\[(?:(?:[^\\[\\s]+ ){3})([^\\s]+)` !~ `((-\\s){4,6}-)]` |= `$project_id` |~ `(GET|POST|PUT|DELETE)` | logfmt | json | line_format \"{{.log}}\" | regexp `(?P(GET|POST|PUT|DELETE))` | __error__!=\"JSONParserErr\"[3d]))))", + "legendFormat": "req_count", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Project requests by CRUD for project ID", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(Value #A)", + "renamePattern": "req_count" + } + } + ], + "type": "barchart" + }, + { + "datasource": { + "type": "loki", + "uid": "${DS_LOKI}" + }, + "description": "Look up logs for project ID or request ID", + "gridPos": { + "h": 22, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 2, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${DS_LOKI}" + }, + "editorMode": "code", + "expr": "{application=\"$service\"} | logfmt | json | line_format \"{{ .kubernetes_host}} {{.kubernetes_pod_name}} {{.log}}\" |= `$project_id`", + "queryType": "range", + "refId": "A" + } + ], + "title": "Log lookup", + "type": "logs" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "8fb86e74be8d49f3befde1f647d9f2ef", + "value": "8fb86e74be8d49f3befde1f647d9f2ef" + }, + "description": "The project or tenant ID to query for. Can also be a specific request ID but this may not work with all panels in this dashboard. ", + "hide": 0, + "includeAll": false, + "label": "project_id", + "multi": false, + "name": "project_id", + "options": [], + "query": "", + "queryValue": " ", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "neutron", + "value": "neutron" + }, + "description": "The service to query for. re; nova or neutron", + "hide": 0, + "includeAll": false, + "label": "service", + "multi": false, + "name": "service", + "options": [], + "query": "", + "queryValue": "neutron", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-2d", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Project log lookup", + "uid": "d2733029-1e16-472f-9279-72458dfbc469", + "version": 20, + "weekStart": "" +} diff --git a/etc/grafana-dashboards/Prometheus_Overview.json b/etc/grafana-dashboards/prometheus_overview.json similarity index 100% rename from etc/grafana-dashboards/Prometheus_Overview.json rename to etc/grafana-dashboards/prometheus_overview.json diff --git a/etc/grafana-dashboards/RabbitMQ_Metrics.json b/etc/grafana-dashboards/rabbitmq_metrics.json similarity index 100% rename from etc/grafana-dashboards/RabbitMQ_Metrics.json rename to etc/grafana-dashboards/rabbitmq_metrics.json