diff --git a/controller/cache/cache.go b/controller/cache/cache.go index b17afbca5234b..ee43ddda6e5c4 100644 --- a/controller/cache/cache.go +++ b/controller/cache/cache.go @@ -608,6 +608,10 @@ func (c *liveStateCache) getCluster(server string) (clustercache.ClusterCache, e c.metricsServer.IncClusterEventsCount(cluster.Server, gvk.Group, gvk.Kind) }) + _ = clusterCache.OnProcessEventsHandler(func(duration time.Duration, processedEventsNumber int) { + c.metricsServer.ObserveResourceEventsProcessingDuration(cluster.Server, duration, processedEventsNumber) + }) + c.clusters[server] = clusterCache return clusterCache, nil diff --git a/controller/metrics/metrics.go b/controller/metrics/metrics.go index a9df75aff8015..5a8265768da00 100644 --- a/controller/metrics/metrics.go +++ b/controller/metrics/metrics.go @@ -30,17 +30,19 @@ import ( type MetricsServer struct { *http.Server - syncCounter *prometheus.CounterVec - kubectlExecCounter *prometheus.CounterVec - kubectlExecPendingGauge *prometheus.GaugeVec - k8sRequestCounter *prometheus.CounterVec - clusterEventsCounter *prometheus.CounterVec - redisRequestCounter *prometheus.CounterVec - reconcileHistogram *prometheus.HistogramVec - redisRequestHistogram *prometheus.HistogramVec - registry *prometheus.Registry - hostname string - cron *cron.Cron + syncCounter *prometheus.CounterVec + kubectlExecCounter *prometheus.CounterVec + kubectlExecPendingGauge *prometheus.GaugeVec + k8sRequestCounter *prometheus.CounterVec + clusterEventsCounter *prometheus.CounterVec + redisRequestCounter *prometheus.CounterVec + reconcileHistogram *prometheus.HistogramVec + redisRequestHistogram *prometheus.HistogramVec + resourceEventsProcessingHistogram *prometheus.HistogramVec + resourceEventsNumberGauge *prometheus.GaugeVec + registry *prometheus.Registry + hostname string + cron *cron.Cron } const ( @@ -144,6 +146,20 @@ var ( }, []string{"hostname", "initiator"}, ) + + resourceEventsProcessingHistogram = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "argocd_resource_events_processing", + Help: "Time to process resource events in seconds.", + Buckets: []float64{0.25, .5, 1, 2, 4, 8, 16}, + }, + []string{"server"}, + ) + + resourceEventsNumberGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "argocd_resource_events_number", + Help: "Number of processed resource events", + }, []string{"server"}) ) // NewMetricsServer returns a new prometheus server which collects application metrics @@ -192,6 +208,8 @@ func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFil registry.MustRegister(clusterEventsCounter) registry.MustRegister(redisRequestCounter) registry.MustRegister(redisRequestHistogram) + registry.MustRegister(resourceEventsProcessingHistogram) + registry.MustRegister(resourceEventsNumberGauge) return &MetricsServer{ registry: registry, @@ -199,15 +217,17 @@ func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFil Addr: addr, Handler: mux, }, - syncCounter: syncCounter, - k8sRequestCounter: k8sRequestCounter, - kubectlExecCounter: kubectlExecCounter, - kubectlExecPendingGauge: kubectlExecPendingGauge, - reconcileHistogram: reconcileHistogram, - clusterEventsCounter: clusterEventsCounter, - redisRequestCounter: redisRequestCounter, - redisRequestHistogram: redisRequestHistogram, - hostname: hostname, + syncCounter: syncCounter, + k8sRequestCounter: k8sRequestCounter, + kubectlExecCounter: kubectlExecCounter, + kubectlExecPendingGauge: kubectlExecPendingGauge, + reconcileHistogram: reconcileHistogram, + clusterEventsCounter: clusterEventsCounter, + redisRequestCounter: redisRequestCounter, + redisRequestHistogram: redisRequestHistogram, + resourceEventsProcessingHistogram: resourceEventsProcessingHistogram, + resourceEventsNumberGauge: resourceEventsNumberGauge, + hostname: hostname, // This cron is used to expire the metrics cache. // Currently clearing the metrics cache is logging and deleting from the map // so there is no possibility of panic, but we will add a chain to keep robfig/cron v1 behavior. @@ -269,6 +289,12 @@ func (m *MetricsServer) ObserveRedisRequestDuration(duration time.Duration) { m.redisRequestHistogram.WithLabelValues(m.hostname, common.ApplicationController).Observe(duration.Seconds()) } +// ObserveResourceEventsProcessingDuration observes resource events processing duration +func (m *MetricsServer) ObserveResourceEventsProcessingDuration(server string, duration time.Duration, processedEventsNumber int) { + m.resourceEventsProcessingHistogram.WithLabelValues(server).Observe(duration.Seconds()) + m.resourceEventsNumberGauge.WithLabelValues(server).Set(float64(processedEventsNumber)) +} + // IncReconcile increments the reconcile counter for an application func (m *MetricsServer) IncReconcile(app *argoappv1.Application, duration time.Duration) { m.reconcileHistogram.WithLabelValues(app.Namespace, app.Spec.Destination.Server).Observe(duration.Seconds()) @@ -295,6 +321,8 @@ func (m *MetricsServer) SetExpiration(cacheExpiration time.Duration) error { m.redisRequestCounter.Reset() m.reconcileHistogram.Reset() m.redisRequestHistogram.Reset() + m.resourceEventsProcessingHistogram.Reset() + m.resourceEventsNumberGauge.Reset() }) if err != nil { return err diff --git a/go.mod b/go.mod index ad48c7869f6b8..d85ab1d4181a0 100644 --- a/go.mod +++ b/go.mod @@ -295,6 +295,7 @@ require ( ) replace ( + github.com/argoproj/gitops-engine => github.com/mpelekh/gitops-engine v0.0.0-20241010140240-2f5160baff34 github.com/go-telegram-bot-api/telegram-bot-api/v5 => github.com/OvyFlash/telegram-bot-api/v5 v5.0.0-20240108230938-63e5c59035bf github.com/golang/protobuf => github.com/golang/protobuf v1.5.4 diff --git a/go.sum b/go.sum index b2ddcfcde0689..6be0a76aed864 100644 --- a/go.sum +++ b/go.sum @@ -84,8 +84,6 @@ github.com/antonmedv/expr v1.15.1/go.mod h1:0E/6TxnOlRNp81GMzX9QfDPAmHo2Phg00y4J github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/appscode/go v0.0.0-20191119085241-0887d8ec2ecc/go.mod h1:OawnOmAL4ZX3YaPdN+8HTNwBveT1jMsqP74moa9XUbE= -github.com/argoproj/gitops-engine v0.7.1-0.20240917171920-72bcdda3f0a5 h1:K/e+NsNmE4BccRu21QpqUxkTHxU9YWjU3M775Ck+V/E= -github.com/argoproj/gitops-engine v0.7.1-0.20240917171920-72bcdda3f0a5/go.mod h1:b1vuwkyMUszyUK+USUJqC8vJijnQsEPNDpC+sDdDLtM= github.com/argoproj/notifications-engine v0.4.1-0.20241007194503-2fef5c9049fd h1:lOVVoK89j9Nd4+JYJiKAaMNYC1402C0jICROOfUPWn0= github.com/argoproj/notifications-engine v0.4.1-0.20241007194503-2fef5c9049fd/go.mod h1:N0A4sEws2soZjEpY4hgZpQS8mRIEw6otzwfkgc3g9uQ= github.com/argoproj/pkg v0.13.7-0.20230626144333-d56162821bd1 h1:qsHwwOJ21K2Ao0xPju1sNuqphyMnMYkyB3ZLoLtxWpo= @@ -686,6 +684,8 @@ github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/ github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4= github.com/montanaflynn/stats v0.6.6/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow= github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= +github.com/mpelekh/gitops-engine v0.0.0-20241010140240-2f5160baff34 h1:IOHfurcmFFu/WUg4C3y+U8Uh9VVfL3/fEosjBHTeJXc= +github.com/mpelekh/gitops-engine v0.0.0-20241010140240-2f5160baff34/go.mod h1:b1vuwkyMUszyUK+USUJqC8vJijnQsEPNDpC+sDdDLtM= github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=