Skip to content

Commit

Permalink
chore: avoid resources lock contention (argoproj#8172)
Browse files Browse the repository at this point in the history
Signed-off-by: Mykola Pelekh <[email protected]>
  • Loading branch information
mpelekh committed Oct 11, 2024
1 parent d542b02 commit 07c2774
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 22 deletions.
4 changes: 4 additions & 0 deletions controller/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,10 @@ func (c *liveStateCache) getCluster(server string) (clustercache.ClusterCache, e
c.metricsServer.IncClusterEventsCount(cluster.Server, gvk.Group, gvk.Kind)
})

_ = clusterCache.OnProcessEventsHandler(func(duration time.Duration, processedEventsNumber int) {
c.metricsServer.ObserveResourceEventsProcessingDuration(cluster.Server, duration, processedEventsNumber)
})

c.clusters[server] = clusterCache

return clusterCache, nil
Expand Down
68 changes: 48 additions & 20 deletions controller/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,19 @@ import (

type MetricsServer struct {
*http.Server
syncCounter *prometheus.CounterVec
kubectlExecCounter *prometheus.CounterVec
kubectlExecPendingGauge *prometheus.GaugeVec
k8sRequestCounter *prometheus.CounterVec
clusterEventsCounter *prometheus.CounterVec
redisRequestCounter *prometheus.CounterVec
reconcileHistogram *prometheus.HistogramVec
redisRequestHistogram *prometheus.HistogramVec
registry *prometheus.Registry
hostname string
cron *cron.Cron
syncCounter *prometheus.CounterVec
kubectlExecCounter *prometheus.CounterVec
kubectlExecPendingGauge *prometheus.GaugeVec
k8sRequestCounter *prometheus.CounterVec
clusterEventsCounter *prometheus.CounterVec
redisRequestCounter *prometheus.CounterVec
reconcileHistogram *prometheus.HistogramVec
redisRequestHistogram *prometheus.HistogramVec
resourceEventsProcessingHistogram *prometheus.HistogramVec
resourceEventsNumberGauge *prometheus.GaugeVec
registry *prometheus.Registry
hostname string
cron *cron.Cron
}

const (
Expand Down Expand Up @@ -144,6 +146,20 @@ var (
},
[]string{"hostname", "initiator"},
)

resourceEventsProcessingHistogram = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "argocd_resource_events_processing",
Help: "Time to process resource events in seconds.",
Buckets: []float64{0.25, .5, 1, 2, 4, 8, 16},
},
[]string{"server"},
)

resourceEventsNumberGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "argocd_resource_events_number",
Help: "Number of processed resource events",
}, []string{"server"})
)

// NewMetricsServer returns a new prometheus server which collects application metrics
Expand Down Expand Up @@ -192,22 +208,26 @@ func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFil
registry.MustRegister(clusterEventsCounter)
registry.MustRegister(redisRequestCounter)
registry.MustRegister(redisRequestHistogram)
registry.MustRegister(resourceEventsProcessingHistogram)
registry.MustRegister(resourceEventsNumberGauge)

return &MetricsServer{
registry: registry,
Server: &http.Server{
Addr: addr,
Handler: mux,
},
syncCounter: syncCounter,
k8sRequestCounter: k8sRequestCounter,
kubectlExecCounter: kubectlExecCounter,
kubectlExecPendingGauge: kubectlExecPendingGauge,
reconcileHistogram: reconcileHistogram,
clusterEventsCounter: clusterEventsCounter,
redisRequestCounter: redisRequestCounter,
redisRequestHistogram: redisRequestHistogram,
hostname: hostname,
syncCounter: syncCounter,
k8sRequestCounter: k8sRequestCounter,
kubectlExecCounter: kubectlExecCounter,
kubectlExecPendingGauge: kubectlExecPendingGauge,
reconcileHistogram: reconcileHistogram,
clusterEventsCounter: clusterEventsCounter,
redisRequestCounter: redisRequestCounter,
redisRequestHistogram: redisRequestHistogram,
resourceEventsProcessingHistogram: resourceEventsProcessingHistogram,
resourceEventsNumberGauge: resourceEventsNumberGauge,
hostname: hostname,
// This cron is used to expire the metrics cache.
// Currently clearing the metrics cache is logging and deleting from the map
// so there is no possibility of panic, but we will add a chain to keep robfig/cron v1 behavior.
Expand Down Expand Up @@ -269,6 +289,12 @@ func (m *MetricsServer) ObserveRedisRequestDuration(duration time.Duration) {
m.redisRequestHistogram.WithLabelValues(m.hostname, common.ApplicationController).Observe(duration.Seconds())
}

// ObserveResourceEventsProcessingDuration observes resource events processing duration
func (m *MetricsServer) ObserveResourceEventsProcessingDuration(server string, duration time.Duration, processedEventsNumber int) {
m.resourceEventsProcessingHistogram.WithLabelValues(server).Observe(duration.Seconds())
m.resourceEventsNumberGauge.WithLabelValues(server).Set(float64(processedEventsNumber))
}

// IncReconcile increments the reconcile counter for an application
func (m *MetricsServer) IncReconcile(app *argoappv1.Application, duration time.Duration) {
m.reconcileHistogram.WithLabelValues(app.Namespace, app.Spec.Destination.Server).Observe(duration.Seconds())
Expand All @@ -295,6 +321,8 @@ func (m *MetricsServer) SetExpiration(cacheExpiration time.Duration) error {
m.redisRequestCounter.Reset()
m.reconcileHistogram.Reset()
m.redisRequestHistogram.Reset()
m.resourceEventsProcessingHistogram.Reset()
m.resourceEventsNumberGauge.Reset()
})
if err != nil {
return err
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ require (
)

replace (
github.com/argoproj/gitops-engine => github.com/mpelekh/gitops-engine v0.0.0-20241010140240-2f5160baff34
github.com/go-telegram-bot-api/telegram-bot-api/v5 => github.com/OvyFlash/telegram-bot-api/v5 v5.0.0-20240108230938-63e5c59035bf

github.com/golang/protobuf => github.com/golang/protobuf v1.5.4
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ github.com/antonmedv/expr v1.15.1/go.mod h1:0E/6TxnOlRNp81GMzX9QfDPAmHo2Phg00y4J
github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/appscode/go v0.0.0-20191119085241-0887d8ec2ecc/go.mod h1:OawnOmAL4ZX3YaPdN+8HTNwBveT1jMsqP74moa9XUbE=
github.com/argoproj/gitops-engine v0.7.1-0.20240917171920-72bcdda3f0a5 h1:K/e+NsNmE4BccRu21QpqUxkTHxU9YWjU3M775Ck+V/E=
github.com/argoproj/gitops-engine v0.7.1-0.20240917171920-72bcdda3f0a5/go.mod h1:b1vuwkyMUszyUK+USUJqC8vJijnQsEPNDpC+sDdDLtM=
github.com/argoproj/notifications-engine v0.4.1-0.20241007194503-2fef5c9049fd h1:lOVVoK89j9Nd4+JYJiKAaMNYC1402C0jICROOfUPWn0=
github.com/argoproj/notifications-engine v0.4.1-0.20241007194503-2fef5c9049fd/go.mod h1:N0A4sEws2soZjEpY4hgZpQS8mRIEw6otzwfkgc3g9uQ=
github.com/argoproj/pkg v0.13.7-0.20230626144333-d56162821bd1 h1:qsHwwOJ21K2Ao0xPju1sNuqphyMnMYkyB3ZLoLtxWpo=
Expand Down Expand Up @@ -686,6 +684,8 @@ github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4=
github.com/montanaflynn/stats v0.6.6/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ=
github.com/mpelekh/gitops-engine v0.0.0-20241010140240-2f5160baff34 h1:IOHfurcmFFu/WUg4C3y+U8Uh9VVfL3/fEosjBHTeJXc=
github.com/mpelekh/gitops-engine v0.0.0-20241010140240-2f5160baff34/go.mod h1:b1vuwkyMUszyUK+USUJqC8vJijnQsEPNDpC+sDdDLtM=
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
Expand Down

0 comments on commit 07c2774

Please sign in to comment.