Skip to content

Commit

Permalink
[monitoring] Add metrics collectors (#11)
Browse files Browse the repository at this point in the history
Signed-off-by: v.oleynikov <[email protected]>
  • Loading branch information
duckhawk authored Jul 25, 2024
1 parent 9eb031c commit c8329bc
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 8 deletions.
2 changes: 1 addition & 1 deletion images/agent/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

`NODE_NAME`

`METRICS_PORT` - default : 9695
`METRICS_PORT` - default : 4202


#### Metrics
Expand Down
4 changes: 2 additions & 2 deletions images/agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ const (
ThrottleInterval = "THROTTLER_INTERVAL"
CmdDeadlineDuration = "CMD_DEADLINE_DURATION"
DefaultHealthProbeBindAddressEnvName = "HEALTH_PROBE_BIND_ADDRESS"
DefaultHealthProbeBindAddress = ":8081"
DefaultHealthProbeBindAddress = ":4228"
)

type Options struct {
Expand Down Expand Up @@ -77,7 +77,7 @@ func NewConfig() (*Options, error) {

opts.MetricsPort = os.Getenv(MetricsPort)
if opts.MetricsPort == "" {
opts.MetricsPort = ":9695"
opts.MetricsPort = ":4202"
}

opts.HealthProbeBindAddress = os.Getenv(DefaultHealthProbeBindAddressEnvName)
Expand Down
2 changes: 1 addition & 1 deletion images/agent/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func TestNewConfig(t *testing.T) {

t.Run("MetricsPortNotSet_ReturnsDefaultPort", func(t *testing.T) {
expNodeName := "test-node"
expMetricsPort := ":9695"
expMetricsPort := ":4202"
expMachineId := "test-id"

err := os.Setenv(NodeName, expNodeName)
Expand Down
2 changes: 1 addition & 1 deletion images/sds-health-watcher-controller/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func NewConfig() (*Options, error) {

opts.MetricsPort = os.Getenv(MetricsPort)
if opts.MetricsPort == "" {
opts.MetricsPort = ":9695"
opts.MetricsPort = ":8080"
}

opts.HealthProbeBindAddress = os.Getenv(DefaultHealthProbeBindAddressEnvName)
Expand Down
2 changes: 1 addition & 1 deletion images/sds-health-watcher-controller/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func TestNewConfig(t *testing.T) {

t.Run("MetricsPortNotSet_ReturnsDefaultPort", func(t *testing.T) {
expNodeName := "test-node"
expMetricsPort := ":9695"
expMetricsPort := ":8080"
expMachineId := "test-id"

err := os.Setenv(NodeName, expNodeName)
Expand Down
8 changes: 6 additions & 2 deletions templates/agent/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,22 @@ spec:
readinessProbe:
httpGet:
path: /readyz
port: 8081
port: 4228
scheme: HTTP
initialDelaySeconds: 5
failureThreshold: 2
periodSeconds: 1
livenessProbe:
httpGet:
path: /healthz
port: 8081
port: 4228
scheme: HTTP
periodSeconds: 1
failureThreshold: 3
ports:
- name: metrics
containerPort: 4202
protocol: TCP
securityContext:
privileged: true
seLinuxOptions:
Expand Down
32 changes: 32 additions & 0 deletions templates/agent/podmonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{{- if (.Values.global.enabledModules | has "operator-prometheus-crd") }}
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: sds-node-configurator
namespace: d8-monitoring
{{- include "helm_lib_module_labels" (list $ (dict "prometheus" "main")) | nindent 2 }}
spec:
podMetricsEndpoints:
- targetPort: metrics
scheme: http
path: /metrics
relabelings:
- regex: endpoint|namespace|pod|container
action: labeldrop
- targetLabel: job
replacement: sds-node-configurator
- sourceLabels: [__meta_kubernetes_pod_node_name]
targetLabel: node
- targetLabel: tier
replacement: cluster
- sourceLabels: [__meta_kubernetes_pod_ready]
regex: "true"
action: keep
selector:
matchLabels:
app: sds-node-configurator
namespaceSelector:
matchNames:
- d8-{{ .Chart.Name }}
{{- end }}
4 changes: 4 additions & 0 deletions templates/sds-health-watcher-controller/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ spec:
scheme: HTTP
periodSeconds: 1
failureThreshold: 3
ports:
- name: metrics
containerPort: 8080
protocol: TCP
resources:
requests:
{{- include "helm_lib_module_ephemeral_storage_only_logs" . | nindent 14 }}
Expand Down
36 changes: 36 additions & 0 deletions templates/sds-health-watcher-controller/servicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{{- if (.Values.global.enabledModules | has "operator-prometheus-crd") }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: sds-health-watcher-controller
namespace: d8-monitoring
{{- include "helm_lib_module_labels" (list $ (dict "prometheus" "main")) | nindent 2 }}
spec:
endpoints:
- port: metrics
scheme: http
path: /metrics
bearerTokenSecret:
name: "prometheus-token"
key: "token"
tlsConfig:
insecureSkipVerify: true
relabelings:
- regex: endpoint|namespace|pod|container
action: labeldrop
- targetLabel: job
replacement: sds-health-watcher-controller
- targetLabel: tier
replacement: cluster
- sourceLabels: [__meta_kubernetes_endpointslice_endpoint_conditions_ready]
regex: "true"
action: keep
selector:
matchLabels:
app.kubernetes.io/instance: sds-health-watcher-controller
app.kubernetes.io/managed-by: Helm
namespaceSelector:
matchNames:
- d8-{{ .Chart.Name }}
{{- end }}

0 comments on commit c8329bc

Please sign in to comment.