From 361cc1b890007df178e1341ff6baecbdee5a986b Mon Sep 17 00:00:00 2001 From: "v.oleynikov" Date: Fri, 12 Jul 2024 22:44:21 +0300 Subject: [PATCH] [controller] add liveness and readiness checks Signed-off-by: v.oleynikov --- images/agent/cmd/main.go | 7 +++--- images/agent/config/config.go | 22 +++++++++++------ .../sds-health-watcher-controller/cmd/main.go | 7 +++--- .../config/config.go | 24 ++++++++++++------- templates/agent/daemonset.yaml | 15 ++++++++++++ .../deployment.yaml | 15 ++++++++++++ 6 files changed, 69 insertions(+), 21 deletions(-) diff --git a/images/agent/cmd/main.go b/images/agent/cmd/main.go index bf3bb286..71dde698 100644 --- a/images/agent/cmd/main.go +++ b/images/agent/cmd/main.go @@ -93,9 +93,10 @@ func main() { log.Info("[main] successfully read scheme CR") managerOpts := manager.Options{ - Scheme: scheme, - Logger: log.GetLogger(), - Metrics: server.Options{BindAddress: cfgParams.MetricsPort}, + Scheme: scheme, + Logger: log.GetLogger(), + Metrics: server.Options{BindAddress: cfgParams.MetricsPort}, + HealthProbeBindAddress: cfgParams.HealthProbeBindAddress, } mgr, err := manager.New(kConfig, managerOpts) diff --git a/images/agent/config/config.go b/images/agent/config/config.go index dce13f6a..6dd8c441 100644 --- a/images/agent/config/config.go +++ b/images/agent/config/config.go @@ -30,13 +30,15 @@ import ( ) const ( - ScanInterval = "SCAN_INTERVAL" - NodeName = "NODE_NAME" - LogLevel = "LOG_LEVEL" - MetricsPort = "METRICS_PORT" - MachineID = "MACHINE_ID" - ThrottleInterval = "THROTTLER_INTERVAL" - CmdDeadlineDuration = "CMD_DEADLINE_DURATION" + ScanInterval = "SCAN_INTERVAL" + NodeName = "NODE_NAME" + LogLevel = "LOG_LEVEL" + MetricsPort = "METRICS_PORT" + MachineID = "MACHINE_ID" + ThrottleInterval = "THROTTLER_INTERVAL" + CmdDeadlineDuration = "CMD_DEADLINE_DURATION" + DefaultHealthProbeBindAddressEnvName = "HEALTH_PROBE_BIND_ADDRESS" + DefaultHealthProbeBindAddress = ":8081" ) type Options struct { @@ -49,6 +51,7 @@ type Options struct { LLVRequeueIntervalSec time.Duration ThrottleIntervalSec time.Duration CmdDeadlineDurationSec time.Duration + HealthProbeBindAddress string } func NewConfig() (*Options, error) { @@ -77,6 +80,11 @@ func NewConfig() (*Options, error) { opts.MetricsPort = ":9695" } + opts.HealthProbeBindAddress = os.Getenv(DefaultHealthProbeBindAddressEnvName) + if opts.HealthProbeBindAddress == "" { + opts.HealthProbeBindAddress = DefaultHealthProbeBindAddress + } + scanInt := os.Getenv(ScanInterval) if scanInt == "" { opts.BlockDeviceScanIntervalSec = 5 * time.Second diff --git a/images/sds-health-watcher-controller/cmd/main.go b/images/sds-health-watcher-controller/cmd/main.go index 2f7270b2..7a990daf 100644 --- a/images/sds-health-watcher-controller/cmd/main.go +++ b/images/sds-health-watcher-controller/cmd/main.go @@ -88,9 +88,10 @@ func main() { log.Info("[main] successfully read scheme CR") managerOpts := manager.Options{ - Scheme: scheme, - Logger: log.GetLogger(), - Metrics: server.Options{BindAddress: cfgParams.MetricsPort}, + Scheme: scheme, + Logger: log.GetLogger(), + Metrics: server.Options{BindAddress: cfgParams.MetricsPort}, + HealthProbeBindAddress: cfgParams.HealthProbeBindAddress, } mgr, err := manager.New(kConfig, managerOpts) diff --git a/images/sds-health-watcher-controller/config/config.go b/images/sds-health-watcher-controller/config/config.go index 713a222d..905486c3 100644 --- a/images/sds-health-watcher-controller/config/config.go +++ b/images/sds-health-watcher-controller/config/config.go @@ -26,17 +26,20 @@ import ( ) const ( - ScanInterval = "SCAN_INTERVAL" - NodeName = "NODE_NAME" - LogLevel = "LOG_LEVEL" - MetricsPort = "METRICS_PORT" + ScanInterval = "SCAN_INTERVAL" + NodeName = "NODE_NAME" + LogLevel = "LOG_LEVEL" + MetricsPort = "METRICS_PORT" + DefaultHealthProbeBindAddressEnvName = "HEALTH_PROBE_BIND_ADDRESS" + DefaultHealthProbeBindAddress = ":8081" ) type Options struct { - Loglevel logger.Verbosity - MetricsPort string - ScanIntervalSec time.Duration - NodeName string + Loglevel logger.Verbosity + MetricsPort string + ScanIntervalSec time.Duration + NodeName string + HealthProbeBindAddress string } func NewConfig() (*Options, error) { @@ -54,6 +57,11 @@ func NewConfig() (*Options, error) { opts.MetricsPort = ":9695" } + opts.HealthProbeBindAddress = os.Getenv(DefaultHealthProbeBindAddressEnvName) + if opts.HealthProbeBindAddress == "" { + opts.HealthProbeBindAddress = DefaultHealthProbeBindAddress + } + scanInt := os.Getenv(ScanInterval) if scanInt == "" { opts.ScanIntervalSec = 5 * time.Second diff --git a/templates/agent/daemonset.yaml b/templates/agent/daemonset.yaml index 8c654496..897066e4 100644 --- a/templates/agent/daemonset.yaml +++ b/templates/agent/daemonset.yaml @@ -69,6 +69,21 @@ spec: - name: sds-node-configurator-agent image: {{ include "helm_lib_module_image" (list . "agent") }} imagePullPolicy: IfNotPresent + readinessProbe: + httpGet: + path: /readyz + port: 8081 + scheme: HTTP + initialDelaySeconds: 5 + failureThreshold: 2 + periodSeconds: 1 + livenessProbe: + httpGet: + path: /healthz + port: 8081 + scheme: HTTP + periodSeconds: 1 + failureThreshold: 3 securityContext: privileged: true seLinuxOptions: diff --git a/templates/sds-health-watcher-controller/deployment.yaml b/templates/sds-health-watcher-controller/deployment.yaml index 821a77a9..72aac9b3 100644 --- a/templates/sds-health-watcher-controller/deployment.yaml +++ b/templates/sds-health-watcher-controller/deployment.yaml @@ -68,6 +68,21 @@ spec: - name: sds-health-watcher-controller image: {{ include "helm_lib_module_image" (list . "sdsHealthWatcherController") }} imagePullPolicy: IfNotPresent + readinessProbe: + httpGet: + path: /readyz + port: 8081 + scheme: HTTP + initialDelaySeconds: 5 + failureThreshold: 2 + periodSeconds: 1 + livenessProbe: + httpGet: + path: /healthz + port: 8081 + scheme: HTTP + periodSeconds: 1 + failureThreshold: 3 resources: requests: {{- include "helm_lib_module_ephemeral_storage_only_logs" . | nindent 14 }}