From 34a558de40bd66859ca62cdf2dcb3cde21a9f2a5 Mon Sep 17 00:00:00 2001 From: Cloudzp Date: Wed, 21 Aug 2024 17:21:45 +0800 Subject: [PATCH 01/18] fix golangci error --- pkg/prometheus-adapter/config_fetcher.go | 2 +- pkg/server/store/secret/cluster.go | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pkg/prometheus-adapter/config_fetcher.go b/pkg/prometheus-adapter/config_fetcher.go index 6bc17e20c..fd616e8d5 100644 --- a/pkg/prometheus-adapter/config_fetcher.go +++ b/pkg/prometheus-adapter/config_fetcher.go @@ -151,7 +151,7 @@ func FlushRules(metricsDiscoveryConfig config.MetricsDiscoveryConfig, mapper met } if len(errStr) > 0 { - return fmt.Errorf(strings.Join(errStr, ",")) + return fmt.Errorf("%s", strings.Join(errStr, ",")) } return err } diff --git a/pkg/server/store/secret/cluster.go b/pkg/server/store/secret/cluster.go index c47c5d66a..368304c3f 100644 --- a/pkg/server/store/secret/cluster.go +++ b/pkg/server/store/secret/cluster.go @@ -48,9 +48,8 @@ func updateClusterInSecret(cluster *store.Cluster, secret *v1.Secret) error { } } -func deleteClusterInSecret(clusterid string, secret *v1.Secret) error { +func deleteClusterInSecret(clusterid string, secret *v1.Secret) { delete(secret.Data, clusterid) - return nil } func getClusterInSecret(clusterid string, secret *v1.Secret) (*store.Cluster, error) { @@ -129,10 +128,7 @@ func (c *clusters) DeleteCluster(ctx context.Context, clusterid string) error { if err != nil { return err } - err = deleteClusterInSecret(clusterid, secret) - if err != nil { - return err - } + deleteClusterInSecret(clusterid, secret) _, err = c.writeSecretStore(ctx, secret) return err } From f4630adf396f66ddcab12e7e5a1c3821a541211c Mon Sep 17 00:00:00 2001 From: guoxiongfeng Date: Fri, 9 Jun 2023 11:11:56 +0800 Subject: [PATCH 02/18] fix ehpa controller update hpa logic (cherry picked from commit 91d21b56dc65eb0f41670e164269411d9bcf08cc) --- .golangci.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index 102857c18..faa3ffb90 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -4,9 +4,7 @@ run: linters: disable-all: true enable: - - deadcode - unused - - varcheck - ineffassign - goimports - gofmt @@ -15,7 +13,6 @@ linters: - unconvert - govet - errcheck - - structcheck - staticcheck linters-settings: From 48b5aee76c4bfdf68eb2c861b8b617d595c78aa6 Mon Sep 17 00:00:00 2001 From: qmhu Date: Mon, 19 Jun 2023 16:25:39 +0800 Subject: [PATCH 03/18] completion check (cherry picked from commit 91c4571de3975ba361a26aa1e6e521a30546a7b5) --- pkg/recommendation/recommender/resource/recommend.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/recommendation/recommender/resource/recommend.go b/pkg/recommendation/recommender/resource/recommend.go index f03f8fc5a..e4e879c5e 100644 --- a/pkg/recommendation/recommender/resource/recommend.go +++ b/pkg/recommendation/recommender/resource/recommend.go @@ -122,7 +122,7 @@ func (rr *ResourceRecommender) Recommend(ctx *framework.RecommendationContext) e if rr.HistoryCompletionCheck { completion, existDays, err := utils.DetectTimestampCompletion(tsList, rr.CpuModelHistoryLength, time.Now()) if !completion || err != nil { - return fmt.Errorf("%s: cpu timestamps are not completed, expect %s actual %d days", metricNamer.BuildUniqueKey(), rr.CpuModelHistoryLength, existDays) + return fmt.Errorf("%s: cpu timestamps aren't completed, expect days %s actual %d ", metricNamer.BuildUniqueKey(), rr.CpuModelHistoryLength, existDays) } } @@ -145,7 +145,7 @@ func (rr *ResourceRecommender) Recommend(ctx *framework.RecommendationContext) e if rr.HistoryCompletionCheck { completion, existDays, err := utils.DetectTimestampCompletion(tsList, rr.MemHistoryLength, time.Now()) if !completion || err != nil { - return fmt.Errorf("%s: memory timestamps are not completed, expect %s actual %d days ", metricNamer.BuildUniqueKey(), rr.MemHistoryLength, existDays) + return fmt.Errorf("%s: memory timestamps aren't completed, expect days %s actual %d ", metricNamer.BuildUniqueKey(), rr.MemHistoryLength, existDays) } } From 54196472f7ce4afb5468a62490b347bb2ab993f1 Mon Sep 17 00:00:00 2001 From: qmhu Date: Mon, 19 Jun 2023 17:58:35 +0800 Subject: [PATCH 04/18] optimize log (cherry picked from commit c2757027672e66be4e83670f2e8688ad622ebf57) --- pkg/recommendation/recommender/resource/recommend.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/recommendation/recommender/resource/recommend.go b/pkg/recommendation/recommender/resource/recommend.go index e4e879c5e..f03f8fc5a 100644 --- a/pkg/recommendation/recommender/resource/recommend.go +++ b/pkg/recommendation/recommender/resource/recommend.go @@ -122,7 +122,7 @@ func (rr *ResourceRecommender) Recommend(ctx *framework.RecommendationContext) e if rr.HistoryCompletionCheck { completion, existDays, err := utils.DetectTimestampCompletion(tsList, rr.CpuModelHistoryLength, time.Now()) if !completion || err != nil { - return fmt.Errorf("%s: cpu timestamps aren't completed, expect days %s actual %d ", metricNamer.BuildUniqueKey(), rr.CpuModelHistoryLength, existDays) + return fmt.Errorf("%s: cpu timestamps are not completed, expect %s actual %d days", metricNamer.BuildUniqueKey(), rr.CpuModelHistoryLength, existDays) } } @@ -145,7 +145,7 @@ func (rr *ResourceRecommender) Recommend(ctx *framework.RecommendationContext) e if rr.HistoryCompletionCheck { completion, existDays, err := utils.DetectTimestampCompletion(tsList, rr.MemHistoryLength, time.Now()) if !completion || err != nil { - return fmt.Errorf("%s: memory timestamps aren't completed, expect days %s actual %d ", metricNamer.BuildUniqueKey(), rr.MemHistoryLength, existDays) + return fmt.Errorf("%s: memory timestamps are not completed, expect %s actual %d days ", metricNamer.BuildUniqueKey(), rr.MemHistoryLength, existDays) } } From db71adae74ed438d5ad50060ea9a485167e6aa21 Mon Sep 17 00:00:00 2001 From: qmhu Date: Thu, 10 Aug 2023 19:55:58 +0800 Subject: [PATCH 05/18] monitor recommendation metrics (cherry picked from commit 6df335747da2bcc337b28fb98afb8b07cd8d5269) --- cmd/craned/app/manager.go | 11 +++- cmd/craned/app/options/options.go | 8 +++ .../recommendation/recommendation_checker.go | 62 +++++++++++++++++++ .../recommendation_rule_controller.go | 11 ++++ pkg/metrics/analysis.go | 22 ++++++- 5 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 pkg/controller/recommendation/recommendation_checker.go diff --git a/cmd/craned/app/manager.go b/cmd/craned/app/manager.go index 57dfac9ef..d95c9ffcb 100644 --- a/cmd/craned/app/manager.go +++ b/cmd/craned/app/manager.go @@ -143,7 +143,7 @@ func Run(ctx context.Context, opts *options.Options) error { } }() - initControllers(podOOMRecorder, mgr, opts, predictorMgr, historyDataSources[providers.PrometheusDataSource]) + initControllers(ctx, podOOMRecorder, mgr, opts, predictorMgr, historyDataSources[providers.PrometheusDataSource]) // initialize custom collector metrics initMetricCollector(mgr) runAll(ctx, mgr, predictorMgr, dataSourceProviders[providers.PrometheusDataSource], opts) @@ -266,7 +266,7 @@ func initPredictorManager(opts *options.Options, realtimeDataSources map[provide } // initControllers setup controllers with manager -func initControllers(oomRecorder oom.Recorder, mgr ctrl.Manager, opts *options.Options, predictorMgr predictor.Manager, historyDataSource providers.History) { +func initControllers(ctx context.Context, oomRecorder oom.Recorder, mgr ctrl.Manager, opts *options.Options, predictorMgr predictor.Manager, historyDataSource providers.History) { discoveryClientSet, err := discovery.NewDiscoveryClientForConfig(mgr.GetConfig()) if err != nil { klog.Exit(err, "Unable to create discover client") @@ -417,6 +417,13 @@ func initControllers(oomRecorder oom.Recorder, mgr ctrl.Manager, opts *options.O }).SetupWithManager(mgr); err != nil { klog.Exit(err, "unable to create controller", "controller", "RecommendationTriggerController") } + + checker := recommendationctrl.Checker{ + Client: mgr.GetClient(), + MonitorInterval: opts.MonitorInterval, + OutDateInterval: opts.OutDateInterval, + } + checker.Run(ctx.Done()) } // CnpController diff --git a/cmd/craned/app/options/options.go b/cmd/craned/app/options/options.go index e2094b54a..47299b2f5 100644 --- a/cmd/craned/app/options/options.go +++ b/cmd/craned/app/options/options.go @@ -67,6 +67,12 @@ type Options struct { // CacheUnstructured indicates whether to cache Unstructured objects. When enabled, it will speed up reading Unstructured objects, but will increase memory usage. CacheUnstructured bool + + // MonitorInterval is the interval for recommendation checker + MonitorInterval time.Duration + + // OutDateInterval is the checking interval for identify a recommendation is outdated + OutDateInterval time.Duration } // NewOptions builds an empty options. @@ -139,4 +145,6 @@ func (o *Options) AddFlags(flags *pflag.FlagSet) { flags.IntVar(&o.OOMRecordMaxNumber, "oom-record-max-number", 10000, "Max number for oom records to store in configmap") flags.IntVar(&o.TimeSeriesPredictionMaxConcurrentReconciles, "time-series-prediction-max-concurrent-reconciles", 10, "Max concurrent reconciles for TimeSeriesPrediction controller") flags.BoolVar(&o.CacheUnstructured, "cache-unstructured", true, "whether to cache Unstructured objects. When enabled, it will speed up reading Unstructured objects but will increase memory usage") + flags.DurationVar(&o.MonitorInterval, "recommendation-monitor-interval", time.Hour, "interval for recommendation checker") + flags.DurationVar(&o.OutDateInterval, "recommendation-outdate-interval", 24*time.Hour, "interval for identify a recommendation is outdated") } diff --git a/pkg/controller/recommendation/recommendation_checker.go b/pkg/controller/recommendation/recommendation_checker.go new file mode 100644 index 000000000..804424659 --- /dev/null +++ b/pkg/controller/recommendation/recommendation_checker.go @@ -0,0 +1,62 @@ +package recommendation + +import ( + "context" + "time" + + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + + analysisv1alpha1 "github.com/gocrane/api/analysis/v1alpha1" + + "github.com/gocrane/crane/pkg/metrics" +) + +type Checker struct { + client.Client + MonitorInterval time.Duration + OutDateInterval time.Duration +} + +func (r Checker) Run(stopCh <-chan struct{}) { + go func() { + for { + select { + case <-stopCh: + return + case <-time.Tick(r.MonitorInterval): + r.runChecker() + } + } + }() +} + +func (r Checker) runChecker() { + recommendList := &analysisv1alpha1.RecommendationList{} + err := r.Client.List(context.TODO(), recommendList, []client.ListOption{}...) + if err != nil { + klog.Errorf("Failed to list recommendation: %v", err) + } + + for _, recommend := range recommendList.Items { + updateStatus := "Updated" + if time.Now().Sub(recommend.Status.LastUpdateTime.Time) > r.OutDateInterval { + updateStatus = "OutDate" + } + + resultStatus := "Failed" + if len(recommend.Status.RecommendedInfo) != 0 || len(recommend.Status.RecommendedValue) != 0 { + resultStatus = "Success" + } + + metrics.RecommendationsStatus.With(map[string]string{ + "type": string(recommend.Spec.Type), + "apiversion": recommend.Spec.TargetRef.APIVersion, + "owner_kind": recommend.Spec.TargetRef.Kind, + "namespace": recommend.Spec.TargetRef.Namespace, + "owner_name": recommend.Spec.TargetRef.Name, + "update_status": updateStatus, + "result_status": resultStatus, + }).Set(1) + } +} diff --git a/pkg/controller/recommendation/recommendation_rule_controller.go b/pkg/controller/recommendation/recommendation_rule_controller.go index 2f6d8d082..cdbcfe922 100644 --- a/pkg/controller/recommendation/recommendation_rule_controller.go +++ b/pkg/controller/recommendation/recommendation_rule_controller.go @@ -3,6 +3,7 @@ package recommendation import ( "context" "fmt" + "github.com/gocrane/crane/pkg/metrics" "sort" "strconv" "strings" @@ -309,6 +310,16 @@ func (c *RecommendationRuleController) getIdentities(ctx context.Context, recomm } } + for _, id := range identities { + metrics.SelectTargets.With(map[string]string{ + "type": id.Recommender, + "apiversion": id.APIVersion, + "owner_kind": id.Kind, + "namespace": id.Namespace, + "owner_name": id.Name, + }).Set(1) + } + return identities, nil } diff --git a/pkg/metrics/analysis.go b/pkg/metrics/analysis.go index 10b37cee2..bd930a474 100644 --- a/pkg/metrics/analysis.go +++ b/pkg/metrics/analysis.go @@ -25,8 +25,28 @@ var ( }, []string{"apiversion", "owner_kind", "namespace", "owner_name"}, ) + + SelectTargets = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "crane", + Subsystem: "analysis", + Name: "select_targets", + Help: "The number of selected targets", + }, + []string{"type", "apiversion", "owner_kind", "namespace", "owner_name"}, + ) + + RecommendationsStatus = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "crane", + Subsystem: "analysis", + Name: "recommendations_status", + Help: "The status of recommendations", + }, + []string{"type", "apiversion", "owner_kind", "namespace", "owner_name", "update_status", "result_status"}, + ) ) func init() { - metrics.Registry.MustRegister(ResourceRecommendation, ReplicasRecommendation) + metrics.Registry.MustRegister(ResourceRecommendation, ReplicasRecommendation, SelectTargets, RecommendationsStatus) } From f023e60647104bffb342d84f8d5455c24f29fbdd Mon Sep 17 00:00:00 2001 From: qmhu Date: Fri, 11 Aug 2023 16:10:53 +0800 Subject: [PATCH 06/18] fix lint (cherry picked from commit ca2be388c8628ca5e990ed37ecb9521af39d6223) --- pkg/controller/recommendation/recommendation_checker.go | 5 ++++- .../recommendation/recommendation_rule_controller.go | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pkg/controller/recommendation/recommendation_checker.go b/pkg/controller/recommendation/recommendation_checker.go index 804424659..c3ebfa064 100644 --- a/pkg/controller/recommendation/recommendation_checker.go +++ b/pkg/controller/recommendation/recommendation_checker.go @@ -20,11 +20,14 @@ type Checker struct { func (r Checker) Run(stopCh <-chan struct{}) { go func() { + ticker := time.NewTicker(r.MonitorInterval) + defer ticker.Stop() + for { select { case <-stopCh: return - case <-time.Tick(r.MonitorInterval): + case <-ticker.C: r.runChecker() } } diff --git a/pkg/controller/recommendation/recommendation_rule_controller.go b/pkg/controller/recommendation/recommendation_rule_controller.go index cdbcfe922..f3e9a76e7 100644 --- a/pkg/controller/recommendation/recommendation_rule_controller.go +++ b/pkg/controller/recommendation/recommendation_rule_controller.go @@ -3,7 +3,6 @@ package recommendation import ( "context" "fmt" - "github.com/gocrane/crane/pkg/metrics" "sort" "strconv" "strings" @@ -33,6 +32,7 @@ import ( analysisv1alph1 "github.com/gocrane/api/analysis/v1alpha1" "github.com/gocrane/crane/pkg/known" + "github.com/gocrane/crane/pkg/metrics" "github.com/gocrane/crane/pkg/oom" predictormgr "github.com/gocrane/crane/pkg/predictor" "github.com/gocrane/crane/pkg/providers" From c3bf1509fe950ace5f654a45cef789db035893e1 Mon Sep 17 00:00:00 2001 From: whitebear009 Date: Wed, 30 Aug 2023 17:43:58 +0800 Subject: [PATCH 07/18] add ContainerResource source type for ehpa prediction (cherry picked from commit 4a459d3883565c51449825b4f0f74d50d893596c) --- pkg/controller/ehpa/hpa.go | 60 +++++++++++++++++++++++----- pkg/controller/ehpa/predict.go | 52 ++++++++++++------------ pkg/prometheus-adapter/expression.go | 33 ++++++++------- pkg/utils/ehpa.go | 34 +++++++++++++--- pkg/utils/pod.go | 7 +++- pkg/utils/pod_test.go | 21 ++++++++-- 6 files changed, 145 insertions(+), 62 deletions(-) diff --git a/pkg/controller/ehpa/hpa.go b/pkg/controller/ehpa/hpa.go index b43d7d6cd..9871716e8 100644 --- a/pkg/controller/ehpa/hpa.go +++ b/pkg/controller/ehpa/hpa.go @@ -9,9 +9,11 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/scale" "k8s.io/client-go/util/retry" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" @@ -208,10 +210,30 @@ func (c *EffectiveHPAController) GetHPAMetrics(ctx context.Context, ehpa *autosc var metricIdentifier string var averageValue *resource.Quantity switch metric.Type { - case autoscalingv2.ResourceMetricSourceType: - metricIdentifier = utils.GetMetricIdentifier(metric, metric.Resource.Name.String()) + case autoscalingv2.ResourceMetricSourceType, autoscalingv2.ContainerResourceMetricSourceType: + var averageUtilization *int32 + var containerName string + if metric.Resource != nil { + if metric.Resource.Target.AverageUtilization != nil { + averageUtilization = metric.Resource.Target.AverageUtilization + } + if metric.Resource.Target.AverageValue != nil { + averageValue = metric.Resource.Target.AverageValue + } + } + if metric.ContainerResource != nil { + containerName = metric.ContainerResource.Container + if metric.ContainerResource.Target.AverageUtilization != nil { + averageUtilization = metric.ContainerResource.Target.AverageUtilization + } + if metric.ContainerResource.Target.AverageValue != nil { + averageValue = metric.ContainerResource.Target.AverageValue + } + } + // When use AverageUtilization in EffectiveHorizontalPodAutoscaler's metricSpec, convert to AverageValue - if metric.Resource.Target.AverageUtilization != nil { + if averageUtilization != nil { + metricName := utils.GetMetricName(metric) scale, _, err := utils.GetScale(ctx, c.RestMapper, c.ScaleClient, ehpa.Namespace, ehpa.Spec.ScaleTargetRef) if err != nil { return nil, err @@ -231,24 +253,21 @@ func (c *EffectiveHPAController) GetHPAMetrics(ctx context.Context, ehpa *autosc return nil, fmt.Errorf("failed to get available pods. ") } - requests, err := utils.CalculatePodRequests(availablePods, metric.Resource.Name) + requests, err := utils.CalculatePodRequests(availablePods, v1.ResourceName(metricName), containerName) if err != nil { return nil, err } - value := int64((float64(requests) * float64(*metric.Resource.Target.AverageUtilization) / 100) / float64(len(availablePods))) + value := int64((float64(requests) * float64(*averageUtilization) / 100) / float64(len(availablePods))) averageValue = resource.NewMilliQuantity(value, resource.DecimalSI) - } else { - averageValue = metric.Resource.Target.AverageValue } case autoscalingv2.ExternalMetricSourceType: - metricIdentifier = utils.GetMetricIdentifier(metric, metric.External.Metric.Name) averageValue = metric.External.Target.AverageValue case autoscalingv2.PodsMetricSourceType: - metricIdentifier = utils.GetMetricIdentifier(metric, metric.Pods.Metric.Name) averageValue = metric.Pods.Target.AverageValue } + metricIdentifier = utils.GetPredictionMetricIdentifier(metric) if metricIdentifier == "" { continue } @@ -374,3 +393,26 @@ func (c *EffectiveHPAController) propagateLabelAndAnnotation(ehpa *autoscalingap } } } + +func getAvailablePods(ctx context.Context, restMapper meta.RESTMapper, kubeClient client.Client, scaleClient scale.ScalesGetter, ehpa autoscalingapi.EffectiveHorizontalPodAutoscaler) ([]v1.Pod, error) { + scale, _, err := utils.GetScale(ctx, restMapper, scaleClient, ehpa.Namespace, ehpa.Spec.ScaleTargetRef) + if err != nil { + return nil, err + } + + pods, err := utils.GetPodsFromScale(kubeClient, scale) + if err != nil { + return nil, err + } + + if len(pods) == 0 { + return nil, fmt.Errorf("no pods returns from scale object. ") + } + + availablePods := utils.GetAvailablePods(pods) + if len(availablePods) == 0 { + return nil, fmt.Errorf("failed to get available pods. ") + } + + return availablePods, nil +} diff --git a/pkg/controller/ehpa/predict.go b/pkg/controller/ehpa/predict.go index 97b2d4ac1..e128c5b18 100644 --- a/pkg/controller/ehpa/predict.go +++ b/pkg/controller/ehpa/predict.go @@ -133,25 +133,12 @@ func (c *EffectiveHPAController) NewPredictionObject(ehpa *autoscalingapi.Effect var predictionMetrics []predictionapi.PredictionMetric for _, metric := range ehpa.Spec.Metrics { - var metricName string - //get metricIdentifier by metric.Type and metricName - var metricIdentifier string - switch metric.Type { - case autoscalingv2.ResourceMetricSourceType: - metricName = metric.Resource.Name.String() - metricIdentifier = utils.GetMetricIdentifier(metric, metric.Resource.Name.String()) - case autoscalingv2.ExternalMetricSourceType: - metricName = metric.External.Metric.Name - metricIdentifier = utils.GetMetricIdentifier(metric, metric.External.Metric.Name) - case autoscalingv2.PodsMetricSourceType: - metricName = metric.Pods.Metric.Name - metricIdentifier = utils.GetMetricIdentifier(metric, metric.Pods.Metric.Name) - } - + metricIdentifier := utils.GetPredictionMetricIdentifier(metric) if metricIdentifier == "" { continue } + metricName := utils.GetMetricName(metric) //get matchLabels var matchLabels []string var metricRule *prometheus_adapter.MetricRule @@ -159,7 +146,7 @@ func (c *EffectiveHPAController) NewPredictionObject(ehpa *autoscalingapi.Effect // Supreme priority: annotation expressionQuery := utils.GetExpressionQueryAnnotation(metricIdentifier, ehpa.Annotations) if expressionQuery == "" { - var nameReg string + var podNameReg string // get metricRule from prometheus-adapter switch metric.Type { case autoscalingv2.ResourceMetricSourceType: @@ -169,7 +156,23 @@ func (c *EffectiveHPAController) NewPredictionObject(ehpa *autoscalingapi.Effect klog.Errorf("Got MetricRulesResource prometheus-adapter-resource Failed MetricName[%s]", metricName) } else { klog.V(4).Infof("Got MetricRulesResource prometheus-adapter-resource MetricMatches[%s] SeriesName[%s]", metricRule.MetricMatches, metricRule.SeriesName) - nameReg = utils.GetPodNameReg(ehpa.Spec.ScaleTargetRef.Name, ehpa.Spec.ScaleTargetRef.Kind) + podNameReg = utils.GetPodNameReg(ehpa.Spec.ScaleTargetRef.Name, ehpa.Spec.ScaleTargetRef.Kind) + } + } + case autoscalingv2.ContainerResourceMetricSourceType: + if len(mrs.MetricRulesResource) > 0 { + metricRule = prometheus_adapter.MatchMetricRule(mrs.MetricRulesResource, metricName) + if metricRule == nil { + klog.Errorf("Got MetricRulesResource prometheus-adapter-resource Failed MetricName[%s]", metricName) + } else { + klog.V(4).Infof("Got MetricRulesResource prometheus-adapter-resource MetricMatches[%s] SeriesName[%s]", metricRule.MetricMatches, metricRule.SeriesName) + podNameReg = utils.GetPodNameReg(ehpa.Spec.ScaleTargetRef.Name, ehpa.Spec.ScaleTargetRef.Kind) + // Compared to ResourceMetricSourceType, there is an additional container-name field + containerLabel := "container" + if metricRule.ContainerLabel != "" { + containerLabel = metricRule.ContainerLabel + } + matchLabels = append(matchLabels, utils.MapSortToArray(map[string]string{containerLabel: metric.ContainerResource.Container})...) } } case autoscalingv2.PodsMetricSourceType: @@ -179,12 +182,9 @@ func (c *EffectiveHPAController) NewPredictionObject(ehpa *autoscalingapi.Effect klog.Errorf("Got MetricRulesCustomer prometheus-adapter-customer Failed MetricName[%s]", metricName) } else { klog.V(4).Infof("Got MetricRulesCustomer prometheus-adapter-customer MetricMatches[%s] SeriesName[%s]", metricRule.MetricMatches, metricRule.SeriesName) - nameReg = utils.GetPodNameReg(ehpa.Spec.ScaleTargetRef.Name, ehpa.Spec.ScaleTargetRef.Kind) - + podNameReg = utils.GetPodNameReg(ehpa.Spec.ScaleTargetRef.Name, ehpa.Spec.ScaleTargetRef.Kind) if metric.Pods.Metric.Selector != nil { - for _, i := range utils.MapSortToArray(metric.Pods.Metric.Selector.MatchLabels) { - matchLabels = append(matchLabels, i) - } + matchLabels = append(matchLabels, utils.MapSortToArray(metric.Pods.Metric.Selector.MatchLabels)...) } } } @@ -196,18 +196,16 @@ func (c *EffectiveHPAController) NewPredictionObject(ehpa *autoscalingapi.Effect } else { klog.V(4).Infof("Got MetricRulesExternal prometheus-adapter-external MetricMatches[%s] SeriesName[%s]", metricRule.MetricMatches, metricRule.SeriesName) if metric.External.Metric.Selector != nil { - for _, i := range utils.MapSortToArray(metric.External.Metric.Selector.MatchLabels) { - matchLabels = append(matchLabels, i) - } + matchLabels = append(matchLabels, utils.MapSortToArray(metric.External.Metric.Selector.MatchLabels)...) } } } } if metricRule != nil { - // Second priority: get default expressionQuery + // Second priority: get prometheus-adapter expressionQuery var err error - expressionQuery, err = metricRule.QueryForSeries(ehpa.Namespace, nameReg, append(mrs.ExtensionLabels, matchLabels...)) + expressionQuery, err = metricRule.QueryForSeries(ehpa.Namespace, podNameReg, append(mrs.ExtensionLabels, matchLabels...)) if err != nil { klog.Errorf("Got promSelector prometheus-adapter %v %v", metricRule, err) } else { diff --git a/pkg/prometheus-adapter/expression.go b/pkg/prometheus-adapter/expression.go index b38ae9c91..e53355311 100644 --- a/pkg/prometheus-adapter/expression.go +++ b/pkg/prometheus-adapter/expression.go @@ -47,12 +47,13 @@ type MetricRules struct { } type MetricRule struct { - MetricMatches string - SeriesName string - ResConverter naming.ResourceConverter - Template *template.Template - Namespaced bool - LabelMatchers []string + MetricMatches string + SeriesName string + ResConverter naming.ResourceConverter + Template *template.Template + Namespaced bool + LabelMatchers []string + ContainerLabel string } type QueryTemplateArgs struct { @@ -129,7 +130,7 @@ func SetExtensionLabels(extensionLabels string) { } } -// GetMetricRuleResourceFromRules produces a MetricNamer for each rule in the given config. +// GetMetricRulesFromResourceRules produces a MetricNamer for each rule in the given config. func GetMetricRulesFromResourceRules(cfg config.ResourceRules, mapper meta.RESTMapper) (metricRules []MetricRule, metricRulesError []string, err error) { // get cpu MetricsQuery if cfg.CPU.ContainerQuery != "" { @@ -154,10 +155,11 @@ func GetMetricRulesFromResourceRules(cfg config.ResourceRules, mapper meta.RESTM klog.Errorf("unable to parse metrics query template %q: %v", cfg.CPU.ContainerQuery, err) } else { metricRules = append(metricRules, MetricRule{ - MetricMatches: "cpu", - ResConverter: resConverter, - Template: templ, - Namespaced: true, + MetricMatches: "cpu", + ResConverter: resConverter, + Template: templ, + Namespaced: true, + ContainerLabel: cfg.CPU.ContainerLabel, }) } } @@ -190,10 +192,11 @@ func GetMetricRulesFromResourceRules(cfg config.ResourceRules, mapper meta.RESTM klog.Errorf("unable to parse metrics query template %q: %v", cfg.Memory.ContainerQuery, err) } else { metricRules = append(metricRules, MetricRule{ - MetricMatches: "memory", - ResConverter: resConverter, - Template: templ, - Namespaced: true, + MetricMatches: "memory", + ResConverter: resConverter, + Template: templ, + Namespaced: true, + ContainerLabel: cfg.Memory.ContainerLabel, }) } } diff --git a/pkg/utils/ehpa.go b/pkg/utils/ehpa.go index ead7b081f..dbdd0f2ed 100644 --- a/pkg/utils/ehpa.go +++ b/pkg/utils/ehpa.go @@ -39,7 +39,7 @@ func IsEHPACronEnabled(ehpa *autoscalingapi.EffectiveHorizontalPodAutoscaler) bo // GetPredictionMetricName return metric name used by prediction func GetPredictionMetricName(sourceType autoscalingv2.MetricSourceType) (metricName string) { switch sourceType { - case autoscalingv2.ResourceMetricSourceType, autoscalingv2.PodsMetricSourceType, autoscalingv2.ExternalMetricSourceType: + case autoscalingv2.ResourceMetricSourceType, autoscalingv2.ContainerResourceMetricSourceType, autoscalingv2.PodsMetricSourceType, autoscalingv2.ExternalMetricSourceType: metricName = known.MetricNamePrediction } @@ -51,19 +51,36 @@ func GetCronMetricName() string { return known.MetricNameCron } -// GetGeneralPredictionMetricName return metric name used by prediction -func GetMetricIdentifier(metric autoscalingv2.MetricSpec, name string) string { +func GetMetricName(metric autoscalingv2.MetricSpec) string { + switch metric.Type { + case autoscalingv2.PodsMetricSourceType: + return metric.Pods.Metric.Name + case autoscalingv2.ResourceMetricSourceType: + return metric.Resource.Name.String() + case autoscalingv2.ContainerResourceMetricSourceType: + return metric.ContainerResource.Name.String() + case autoscalingv2.ExternalMetricSourceType: + return metric.External.Metric.Name + default: + return "" + } +} + +// GetPredictionMetricIdentifier return metric name used by prediction +func GetPredictionMetricIdentifier(metric autoscalingv2.MetricSpec) string { var prefix string switch metric.Type { case autoscalingv2.PodsMetricSourceType: prefix = "pods" case autoscalingv2.ResourceMetricSourceType: prefix = "resource" + case autoscalingv2.ContainerResourceMetricSourceType: + prefix = "container-resource" case autoscalingv2.ExternalMetricSourceType: prefix = "external" } - return fmt.Sprintf("%s.%s", prefix, name) + return fmt.Sprintf("%s.%s", prefix, GetMetricName(metric)) } // GetExpressionQueryAnnotation return metric query from annotation by metricName @@ -95,7 +112,7 @@ func IsExpressionQueryAnnotationEnabled(metricIdentifier string, annotations map return false } -// GetExpressionQuery return metric query +// GetExpressionQueryDefault return default metric query func GetExpressionQueryDefault(metric autoscalingv2.MetricSpec, namespace string, name string, kind string) string { var expressionQuery string switch metric.Type { @@ -106,6 +123,13 @@ func GetExpressionQueryDefault(metric autoscalingv2.MetricSpec, namespace string case "memory": expressionQuery = GetWorkloadMemUsageExpression(namespace, name, kind) } + case autoscalingv2.ContainerResourceMetricSourceType: + switch metric.ContainerResource.Name { + case "cpu": + expressionQuery = GetContainerCpuUsageExpression(namespace, name, kind, metric.ContainerResource.Container) + case "memory": + expressionQuery = GetContainerMemUsageExpression(namespace, name, kind, metric.ContainerResource.Container) + } case autoscalingv2.PodsMetricSourceType: var labels []string if metric.Pods.Metric.Selector != nil { diff --git a/pkg/utils/pod.go b/pkg/utils/pod.go index eef7ea3e0..929cfa0f2 100644 --- a/pkg/utils/pod.go +++ b/pkg/utils/pod.go @@ -110,11 +110,14 @@ func EvictPodWithGracePeriod(client clientset.Interface, pod *v1.Pod, gracePerio return client.CoreV1().Pods(pod.Namespace).EvictV1beta1(context.Background(), e) } -// CalculatePodRequests sum request total from pods -func CalculatePodRequests(pods []v1.Pod, resource v1.ResourceName) (int64, error) { +// CalculatePodRequests sum request total from pods. If the containerName is specified, the total amount of requests for that container will be calculated. +func CalculatePodRequests(pods []v1.Pod, resource v1.ResourceName, containerName string) (int64, error) { var requests int64 for _, pod := range pods { for _, c := range pod.Spec.Containers { + if containerName != "" && c.Name != containerName { + continue + } if containerRequest, ok := c.Resources.Requests[resource]; ok { requests += containerRequest.MilliValue() } else { diff --git a/pkg/utils/pod_test.go b/pkg/utils/pod_test.go index d1e64fd39..badeecdcf 100644 --- a/pkg/utils/pod_test.go +++ b/pkg/utils/pod_test.go @@ -61,9 +61,10 @@ func TestCalculatePodRequests(t *testing.T) { } tests := []struct { - description string - resource v1.ResourceName - expect int64 + description string + resource v1.ResourceName + containerName string + expect int64 }{ { description: "calculate cpu request total", @@ -75,10 +76,22 @@ func TestCalculatePodRequests(t *testing.T) { resource: v1.ResourceMemory, expect: 60000, }, + { + description: "calculate cpu request total of container1", + resource: v1.ResourceCPU, + containerName: "container1", + expect: 3000, + }, + { + description: "calculate memory request total of container1", + resource: v1.ResourceMemory, + containerName: "container1", + expect: 30000, + }, } for _, test := range tests { - requests, err := CalculatePodRequests(pods, test.resource) + requests, err := CalculatePodRequests(pods, test.resource, test.containerName) if err != nil { t.Errorf("Failed to calculatePodRequests: %v", err) } From 8b1500630c12aabca3d469b5ef8bcff3e8d69f44 Mon Sep 17 00:00:00 2001 From: whitebear009 Date: Wed, 30 Aug 2023 17:46:32 +0800 Subject: [PATCH 08/18] remove useless function (cherry picked from commit b5475b7f9816dd29e924b43275d0d3629ddc64db) --- pkg/controller/ehpa/hpa.go | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/pkg/controller/ehpa/hpa.go b/pkg/controller/ehpa/hpa.go index 9871716e8..69ac67fea 100644 --- a/pkg/controller/ehpa/hpa.go +++ b/pkg/controller/ehpa/hpa.go @@ -9,11 +9,9 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/scale" "k8s.io/client-go/util/retry" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" @@ -393,26 +391,3 @@ func (c *EffectiveHPAController) propagateLabelAndAnnotation(ehpa *autoscalingap } } } - -func getAvailablePods(ctx context.Context, restMapper meta.RESTMapper, kubeClient client.Client, scaleClient scale.ScalesGetter, ehpa autoscalingapi.EffectiveHorizontalPodAutoscaler) ([]v1.Pod, error) { - scale, _, err := utils.GetScale(ctx, restMapper, scaleClient, ehpa.Namespace, ehpa.Spec.ScaleTargetRef) - if err != nil { - return nil, err - } - - pods, err := utils.GetPodsFromScale(kubeClient, scale) - if err != nil { - return nil, err - } - - if len(pods) == 0 { - return nil, fmt.Errorf("no pods returns from scale object. ") - } - - availablePods := utils.GetAvailablePods(pods) - if len(availablePods) == 0 { - return nil, fmt.Errorf("failed to get available pods. ") - } - - return availablePods, nil -} From 53835b2c4d88d71350d6be5b3e4f3e6098b50c49 Mon Sep 17 00:00:00 2001 From: qmhu Date: Fri, 22 Sep 2023 17:10:42 +0800 Subject: [PATCH 09/18] extension label for prom query (cherry picked from commit 4c29e75c96b0ed0425e528f2b6eca88f77f4d2fc) --- cmd/craned/app/manager.go | 3 + cmd/craned/app/options/options.go | 1 + pkg/providers/config.go | 1 + pkg/utils/expression_prom_default.go | 92 +++++++++++++++++++--------- 4 files changed, 67 insertions(+), 30 deletions(-) diff --git a/cmd/craned/app/manager.go b/cmd/craned/app/manager.go index d95c9ffcb..b18d08a2a 100644 --- a/cmd/craned/app/manager.go +++ b/cmd/craned/app/manager.go @@ -52,6 +52,7 @@ import ( "github.com/gocrane/crane/pkg/recommendation" "github.com/gocrane/crane/pkg/server" serverconfig "github.com/gocrane/crane/pkg/server/config" + "github.com/gocrane/crane/pkg/utils" "github.com/gocrane/crane/pkg/utils/target" "github.com/gocrane/crane/pkg/webhooks" ) @@ -256,6 +257,8 @@ func initDataSources(mgr ctrl.Manager, opts *options.Options) (map[providers.Dat hybridDataSources[providers.PrometheusDataSource] = provider realtimeDataSources[providers.PrometheusDataSource] = provider historyDataSources[providers.PrometheusDataSource] = provider + + utils.SetExtensionLabels(opts.DataSourcePromConfig.ExtensionLabels) } } return realtimeDataSources, historyDataSources, hybridDataSources diff --git a/cmd/craned/app/options/options.go b/cmd/craned/app/options/options.go index 47299b2f5..b33db94c0 100644 --- a/cmd/craned/app/options/options.go +++ b/cmd/craned/app/options/options.go @@ -121,6 +121,7 @@ func (o *Options) AddFlags(flags *pflag.FlagSet) { flags.StringVar(&o.DataSourcePromConfig.AdapterConfigMapKey, "prometheus-adapter-configmap-key", "", "prometheus adapter-configmap key") flags.StringVar(&o.DataSourcePromConfig.AdapterConfig, "prometheus-adapter-config", "", "prometheus adapter-config path") flags.StringVar(&o.DataSourcePromConfig.AdapterExtensionLabels, "prometheus-adapter-extension-labels", "", "prometheus adapter extension-labels for expressionQuery") + flags.StringVar(&o.DataSourcePromConfig.ExtensionLabels, "extension-labels", "", "extension-labels for every prometheus query") flags.StringVar(&o.DataSourcePromConfig.Auth.Username, "prometheus-auth-username", "", "prometheus auth username") flags.StringVar(&o.DataSourcePromConfig.Auth.Password, "prometheus-auth-password", "", "prometheus auth password") flags.StringVar(&o.DataSourcePromConfig.Auth.BearerToken, "prometheus-auth-bearertoken", "", "prometheus auth bearertoken") diff --git a/pkg/providers/config.go b/pkg/providers/config.go index 054b8696d..32fdef34d 100644 --- a/pkg/providers/config.go +++ b/pkg/providers/config.go @@ -13,6 +13,7 @@ type PromConfig struct { AdapterConfigMapKey string AdapterConfig string AdapterExtensionLabels string + ExtensionLabels string Timeout time.Duration KeepAlive time.Duration InsecureSkipVerify bool diff --git a/pkg/utils/expression_prom_default.go b/pkg/utils/expression_prom_default.go index 9d9f00c67..c97faf863 100644 --- a/pkg/utils/expression_prom_default.go +++ b/pkg/utils/expression_prom_default.go @@ -2,46 +2,48 @@ package utils import ( "fmt" + "strings" ) // todo: later we change these templates to configurable like prometheus-adapter const ( + ExtensionLabelsHolder = `EXTENSION_LABELS_HOLDER` // WorkloadCpuUsageExprTemplate is used to query workload cpu usage by promql, param is namespace,workload-name,duration str - WorkloadCpuUsageExprTemplate = `sum(irate(container_cpu_usage_seconds_total{namespace="%s",pod=~"%s",container!=""}[%s]))` + WorkloadCpuUsageExprTemplate = `sum(irate(container_cpu_usage_seconds_total{namespace="%s",pod=~"%s",container!=""EXTENSION_LABELS_HOLDER}[%s]))` // WorkloadMemUsageExprTemplate is used to query workload mem usage by promql, param is namespace, workload-name - WorkloadMemUsageExprTemplate = `sum(container_memory_working_set_bytes{namespace="%s",pod=~"%s",container!=""})` + WorkloadMemUsageExprTemplate = `sum(container_memory_working_set_bytes{namespace="%s",pod=~"%s",container!=""EXTENSION_LABELS_HOLDER})` // following is node exporter metric for node cpu/memory usage // NodeCpuUsageExprTemplate is used to query node cpu usage by promql, param is node name which prometheus scrape, duration str - NodeCpuUsageExprTemplate = `sum(count(node_cpu_seconds_total{mode="idle",instance=~"(%s)(:\\d+)?"}) by (mode, cpu)) - sum(irate(node_cpu_seconds_total{mode="idle",instance=~"(%s)(:\\d+)?"}[%s]))` + NodeCpuUsageExprTemplate = `sum(count(node_cpu_seconds_total{mode="idle",instance=~"(%s)(:\\d+)?"EXTENSION_LABELS_HOLDER}) by (mode, cpu)) - sum(irate(node_cpu_seconds_total{mode="idle",instance=~"(%s)(:\\d+)?"EXTENSION_LABELS_HOLDER}[%s]))` // NodeMemUsageExprTemplate is used to query node memory usage by promql, param is node name, node name which prometheus scrape - NodeMemUsageExprTemplate = `sum(node_memory_MemTotal_bytes{instance=~"(%s)(:\\d+)?"} - node_memory_MemAvailable_bytes{instance=~"(%s)(:\\d+)?"})` + NodeMemUsageExprTemplate = `sum(node_memory_MemTotal_bytes{instance=~"(%s)(:\\d+)?EXTENSION_LABELS_HOLDER"} - node_memory_MemAvailable_bytes{instance=~"(%s)(:\\d+)?"EXTENSION_LABELS_HOLDER})` // NodeCpuRequestUtilizationExprTemplate is used to query node cpu request utilization by promql, param is node name, node name which prometheus scrape - NodeCpuRequestUtilizationExprTemplate = `sum(kube_pod_container_resource_requests{node="%s", resource="cpu", unit="core"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="cpu", unit="core"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) ` + NodeCpuRequestUtilizationExprTemplate = `sum(kube_pod_container_resource_requests{node="%s", resource="cpu", unit="core"EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="cpu", unit="core"EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) ` // NodeMemRequestUtilizationExprTemplate is used to query node memory request utilization by promql, param is node name, node name which prometheus scrape - NodeMemRequestUtilizationExprTemplate = `sum(kube_pod_container_resource_requests{node="%s", resource="memory", unit="byte", namespace!=""} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="memory", unit="byte"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) ` + NodeMemRequestUtilizationExprTemplate = `sum(kube_pod_container_resource_requests{node="%s", resource="memory", unit="byte", namespace!=""EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="memory", unit="byte"EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) ` // NodeCpuUsageUtilizationExprTemplate is used to query node memory usage utilization by promql, param is node name, node name which prometheus scrape - NodeCpuUsageUtilizationExprTemplate = `sum(label_replace(irate(container_cpu_usage_seconds_total{instance="%s", container!="POD", container!="",image!=""}[1h]), "node", "$1", "instance", "(^[^:]+)") * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="cpu", unit="core"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) ` + NodeCpuUsageUtilizationExprTemplate = `sum(label_replace(irate(container_cpu_usage_seconds_total{instance="%s", container!="POD", container!="",image!=""EXTENSION_LABELS_HOLDER}[1h]), "node", "$1", "instance", "(^[^:]+)") * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="cpu", unit="core"EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) ` // NodeMemUsageUtilizationExprTemplate is used to query node memory usage utilization by promql, param is node name, node name which prometheus scrape - NodeMemUsageUtilizationExprTemplate = `sum(label_replace(container_memory_usage_bytes{instance="%s", namespace!="",container!="POD", container!="",image!=""}, "node", "$1", "instance", "(^[^:]+)") * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="memory", unit="byte"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) ` + NodeMemUsageUtilizationExprTemplate = `sum(label_replace(container_memory_usage_bytes{instance="%s", namespace!="",container!="POD", container!="",image!=""EXTENSION_LABELS_HOLDER}, "node", "$1", "instance", "(^[^:]+)") * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="memory", unit="byte"EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) ` // PodCpuUsageExprTemplate is used to query pod cpu usage by promql, param is namespace,pod, duration str - PodCpuUsageExprTemplate = `sum(irate(container_cpu_usage_seconds_total{container!="POD",namespace="%s",pod="%s"}[%s]))` + PodCpuUsageExprTemplate = `sum(irate(container_cpu_usage_seconds_total{container!="POD",namespace="%s",pod="%s"EXTENSION_LABELS_HOLDER}[%s]))` // PodMemUsageExprTemplate is used to query pod cpu usage by promql, param is namespace,pod - PodMemUsageExprTemplate = `sum(container_memory_working_set_bytes{container!="POD",namespace="%s",pod="%s"})` + PodMemUsageExprTemplate = `sum(container_memory_working_set_bytes{container!="POD",namespace="%s",pod="%s"EXTENSION_LABELS_HOLDER})` // ContainerCpuUsageExprTemplate is used to query container cpu usage by promql, param is namespace,pod,container duration str - ContainerCpuUsageExprTemplate = `irate(container_cpu_usage_seconds_total{container!="POD",namespace="%s",pod=~"%s",container="%s"}[%s])` + ContainerCpuUsageExprTemplate = `irate(container_cpu_usage_seconds_total{container!="POD",namespace="%s",pod=~"%s",container="%s"EXTENSION_LABELS_HOLDER}[%s])` // ContainerMemUsageExprTemplate is used to query container cpu usage by promql, param is namespace,pod,container - ContainerMemUsageExprTemplate = `container_memory_working_set_bytes{container!="POD",namespace="%s",pod=~"%s",container="%s"}` + ContainerMemUsageExprTemplate = `container_memory_working_set_bytes{container!="POD",namespace="%s",pod=~"%s",container="%s"EXTENSION_LABELS_HOLDER}` - CustomerExprTemplate = `sum(%s{%s})` + CustomerExprTemplate = `sum(%s{%sEXTENSION_LABELS_HOLDER})` // Container network cumulative count of bytes received - queryFmtNetReceiveBytes = `sum(rate(container_network_receive_bytes_total{namespace="%s",pod=~"%s",container!=""}[3m]))` + queryFmtNetReceiveBytes = `sum(rate(container_network_receive_bytes_total{namespace="%s",pod=~"%s",container!=""EXTENSION_LABELS_HOLDER}[3m]))` // Container network cumulative count of bytes transmitted - queryFmtNetTransferBytes = `sum(rate(container_network_transmit_bytes_total{namespace="%s",pod=~"%s",container!=""}[3m]))` + queryFmtNetTransferBytes = `sum(rate(container_network_transmit_bytes_total{namespace="%s",pod=~"%s",container!=""EXTENSION_LABELS_HOLDER}[3m]))` ) const ( @@ -51,6 +53,31 @@ const ( PostRegMatchesPodStatefulset = `[0-9]+$` ) +var ExtensionLabelArray []string +var extensionLabelsString string + +func SetExtensionLabels(extensionLabels string) { + if extensionLabels != "" { + for _, label := range strings.Split(extensionLabels, ",") { + ExtensionLabelArray = append(ExtensionLabelArray, label) + } + + extensionLabelsString = "," + for index, label := range ExtensionLabelArray { + labelArr := strings.Split(label, "=") + if len(labelArr) != 2 { + // skip the invalid kv + continue + } + + extensionLabelsString += fmt.Sprintf("%s=\"%s\"", labelArr[0], labelArr[1]) + if index != len(ExtensionLabelArray)-1 { + extensionLabelsString += "," + } + } + } +} + func GetPodNameReg(resourceName string, resourceType string) string { switch resourceType { case "DaemonSet": @@ -66,61 +93,66 @@ func GetPodNameReg(resourceName string, resourceType string) string { } func GetCustomerExpression(metricName string, labels string) string { - return fmt.Sprintf(CustomerExprTemplate, metricName, labels) + return fmtSprintfInternal(CustomerExprTemplate, metricName, labels) } func GetWorkloadCpuUsageExpression(namespace string, name string, kind string) string { - return fmt.Sprintf(WorkloadCpuUsageExprTemplate, namespace, GetPodNameReg(name, kind), "3m") + return fmtSprintfInternal(WorkloadCpuUsageExprTemplate, namespace, GetPodNameReg(name, kind), "3m") } func GetWorkloadMemUsageExpression(namespace string, name string, kind string) string { - return fmt.Sprintf(WorkloadMemUsageExprTemplate, namespace, GetPodNameReg(name, kind)) + return fmtSprintfInternal(WorkloadMemUsageExprTemplate, namespace, GetPodNameReg(name, kind)) } func GetContainerCpuUsageExpression(namespace string, workloadName string, kind string, containerName string) string { - return fmt.Sprintf(ContainerCpuUsageExprTemplate, namespace, GetPodNameReg(workloadName, kind), containerName, "3m") + return fmtSprintfInternal(ContainerCpuUsageExprTemplate, namespace, GetPodNameReg(workloadName, kind), containerName, "3m") } func GetContainerMemUsageExpression(namespace string, workloadName string, kind string, containerName string) string { - return fmt.Sprintf(ContainerMemUsageExprTemplate, namespace, GetPodNameReg(workloadName, kind), containerName) + return fmtSprintfInternal(ContainerMemUsageExprTemplate, namespace, GetPodNameReg(workloadName, kind), containerName) } func GetPodCpuUsageExpression(namespace string, name string) string { - return fmt.Sprintf(PodCpuUsageExprTemplate, namespace, name, "3m") + return fmtSprintfInternal(PodCpuUsageExprTemplate, namespace, name, "3m") } func GetPodMemUsageExpression(namespace string, name string) string { - return fmt.Sprintf(PodMemUsageExprTemplate, namespace, name) + return fmtSprintfInternal(PodMemUsageExprTemplate, namespace, name) } func GetNodeCpuUsageExpression(nodeName string) string { - return fmt.Sprintf(NodeCpuUsageExprTemplate, nodeName, nodeName, "3m") + return fmtSprintfInternal(NodeCpuUsageExprTemplate, nodeName, nodeName, "3m") } func GetNodeMemUsageExpression(nodeName string) string { - return fmt.Sprintf(NodeMemUsageExprTemplate, nodeName, nodeName) + return fmtSprintfInternal(NodeMemUsageExprTemplate, nodeName, nodeName) } func GetNodeCpuRequestUtilizationExpression(nodeName string) string { - return fmt.Sprintf(NodeCpuRequestUtilizationExprTemplate, nodeName, nodeName) + return fmtSprintfInternal(NodeCpuRequestUtilizationExprTemplate, nodeName, nodeName) } func GetNodeMemRequestUtilizationExpression(nodeName string) string { - return fmt.Sprintf(NodeMemRequestUtilizationExprTemplate, nodeName, nodeName) + return fmtSprintfInternal(NodeMemRequestUtilizationExprTemplate, nodeName, nodeName) } func GetNodeCpuUsageUtilizationExpression(nodeName string) string { - return fmt.Sprintf(NodeCpuUsageUtilizationExprTemplate, nodeName, nodeName) + return fmtSprintfInternal(NodeCpuUsageUtilizationExprTemplate, nodeName, nodeName) } func GetNodeMemUsageUtilizationExpression(nodeName string) string { - return fmt.Sprintf(NodeMemUsageUtilizationExprTemplate, nodeName, nodeName) + return fmtSprintfInternal(NodeMemUsageUtilizationExprTemplate, nodeName, nodeName) } func GetWorkloadNetReceiveBytesExpression(namespace string, name string, kind string) string { - return fmt.Sprintf(queryFmtNetReceiveBytes, namespace, GetPodNameReg(name, kind)) + return fmtSprintfInternal(queryFmtNetReceiveBytes, namespace, GetPodNameReg(name, kind)) } func GetWorkloadNetTransferBytesExpression(namespace string, name string, kind string) string { - return fmt.Sprintf(queryFmtNetTransferBytes, namespace, GetPodNameReg(name, kind)) + return fmtSprintfInternal(queryFmtNetTransferBytes, namespace, GetPodNameReg(name, kind)) +} + +func fmtSprintfInternal(format string, a ...interface{}) string { + formatReplaced := strings.ReplaceAll(format, ExtensionLabelsHolder, extensionLabelsString) + return fmt.Sprintf(formatReplaced, a...) } From c8591e87adb93e96379c446f5bb915fff389f801 Mon Sep 17 00:00:00 2001 From: qmhu Date: Tue, 24 Oct 2023 11:20:55 +0800 Subject: [PATCH 10/18] fix rr controller npe (cherry picked from commit bb5c1e49587d87383c567d6b757c2e4c6db39511) --- .../recommendation/recommendation_rule_controller.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pkg/controller/recommendation/recommendation_rule_controller.go b/pkg/controller/recommendation/recommendation_rule_controller.go index f3e9a76e7..fa8ec55b4 100644 --- a/pkg/controller/recommendation/recommendation_rule_controller.go +++ b/pkg/controller/recommendation/recommendation_rule_controller.go @@ -215,9 +215,11 @@ func (c *RecommendationRuleController) doReconcile(ctx context.Context, recommen for _, recommendation := range currRecommendations.Items { exist := false for _, id := range identitiesArray { - if recommendation.UID == id.Recommendation.UID { - exist = true - break + if id.Recommendation != nil { + if recommendation.UID == id.Recommendation.UID { + exist = true + break + } } } From 8f94ffc19651430c9d5ed12e58126ba68d9cf932 Mon Sep 17 00:00:00 2001 From: roczzhang Date: Mon, 27 Nov 2023 15:24:49 +0800 Subject: [PATCH 11/18] Convergence RBAC permissions (cherry picked from commit 3ae52c673431cbb0dbbd2fb835fb9ee3658c0b3e) --- deploy/craned/rbac.yaml | 120 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 113 insertions(+), 7 deletions(-) diff --git a/deploy/craned/rbac.yaml b/deploy/craned/rbac.yaml index aa62c04d2..16ec973aa 100644 --- a/deploy/craned/rbac.yaml +++ b/deploy/craned/rbac.yaml @@ -1,13 +1,119 @@ apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: craned + namespace: crane-system +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - create +- apiGroups: + - "" + resourceNames: + - craned + resources: + - configmaps + verbs: + - get + - patch + - update +- apiGroups: + - "" + resourceNames: + - clusters-secret-store + resources: + - secrets + verbs: + - get +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - patch + - update + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: craned rules: - - apiGroups: [ '*' ] - resources: [ '*' ] - verbs: [ "*" ] +- apiGroups: + - "" + resources: + - configmaps + - pods + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - analysis.crane.io + resources: + - "*" + verbs: + - "*" +- apiGroups: + - apps + resources: + - daemonsets + - deployments + - deployments/scale + - statefulsets + - statefulsets/scale + verbs: + - get + - list + - watch + - update +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - '*' +- apiGroups: + - autoscaling.crane.io + resources: + - '*' + verbs: + - '*' +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update +- apiGroups: + - prediction.crane.io + resources: + - '*' + verbs: + - '*' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: craned + namespace: crane-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: craned +subjects: +- kind: ServiceAccount + name: craned + namespace: crane-system --- - apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: @@ -17,6 +123,6 @@ roleRef: kind: ClusterRole name: craned subjects: - - kind: ServiceAccount - name: craned - namespace: crane-system \ No newline at end of file +- kind: ServiceAccount + name: craned + namespace: crane-system \ No newline at end of file From 8f9baa8039cfb1b47c5a5535960bc51a245260b1 Mon Sep 17 00:00:00 2001 From: whitebear009 Date: Thu, 28 Dec 2023 10:44:53 +0800 Subject: [PATCH 12/18] tsp indicator isolated from ehpa (cherry picked from commit 2982d0f5bc796a089d6bca2ef3ca07ed578769c5) --- pkg/metrics/metric_collector.go | 43 ++++++++++++++++----------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/pkg/metrics/metric_collector.go b/pkg/metrics/metric_collector.go index 819d3d1e6..348770d08 100644 --- a/pkg/metrics/metric_collector.go +++ b/pkg/metrics/metric_collector.go @@ -96,29 +96,7 @@ func (c *CraneMetricCollector) Collect(ch chan<- prometheus.Metric) { if err != nil { klog.Errorf("Failed to list ehpa: %v", err) } - var predictionMetrics []PredictionMetric for _, ehpa := range ehpaList.Items { - namespace := ehpa.Namespace - if ehpa.Spec.Prediction != nil { - var tsp predictionapi.TimeSeriesPrediction - tspName := "ehpa-" + ehpa.Name - - err := c.Get(context.TODO(), client.ObjectKey{Namespace: namespace, Name: tspName}, &tsp) - if err != nil { - klog.Errorf("Failed to get tsp: %v", err) - return - } - metricListTsp := c.getMetricsTsp(&tsp) - for _, metric := range metricListTsp { - if MetricContains(predictionMetrics, metric) { - continue - } - - ch <- prometheus.NewMetricWithTimestamp(metric.Timestamp, prometheus.MustNewConstMetric(metric.Desc, prometheus.GaugeValue, metric.MetricValue, metric.TargetKind, metric.TargetName, metric.TargetNamespace, metric.ResourceIdentifier, metric.Algorithm)) - predictionMetrics = append(predictionMetrics, metric) - } - } - if ehpa.Spec.Crons != nil { metricCron, err := c.getMetricsCron(&ehpa) if err != nil { @@ -130,6 +108,27 @@ func (c *CraneMetricCollector) Collect(ch chan<- prometheus.Metric) { } } } + + if utilfeature.DefaultFeatureGate.Enabled(features.CraneTimeSeriesPrediction) { + var tspList predictionapi.TimeSeriesPredictionList + err := c.List(context.TODO(), &tspList) + if err != nil { + klog.Errorf("Failed to list tsp: %v", err) + } + + var predictionMetrics []PredictionMetric + for _, tsp := range tspList.Items { + metricListTsp := c.getMetricsTsp(&tsp) + for _, metric := range metricListTsp { + if MetricContains(predictionMetrics, metric) { + continue + } + + ch <- prometheus.NewMetricWithTimestamp(metric.Timestamp, prometheus.MustNewConstMetric(metric.Desc, prometheus.GaugeValue, metric.MetricValue, metric.TargetKind, metric.TargetName, metric.TargetNamespace, metric.ResourceIdentifier, metric.Algorithm)) + predictionMetrics = append(predictionMetrics, metric) + } + } + } metricsRuleError := c.getMetricsRuleError() for _, i := range metricsRuleError { ch <- i From 73d3b0ca61965217a9fe54abb557bac1d05c7351 Mon Sep 17 00:00:00 2001 From: brucejunlli Date: Thu, 18 Jan 2024 11:01:39 +0800 Subject: [PATCH 13/18] change update workload to update workload status (cherry picked from commit 8380d7fd140235b8cbb2af14f98364facdd9d8d2) --- deploy/craned/rbac.yaml | 11 ++++++++++- pkg/controller/recommendation/updater.go | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/deploy/craned/rbac.yaml b/deploy/craned/rbac.yaml index 16ec973aa..275648654 100644 --- a/deploy/craned/rbac.yaml +++ b/deploy/craned/rbac.yaml @@ -72,7 +72,16 @@ rules: - get - list - watch - - update +- apiGroups: + - apps + resources: + - daemonsets/status + - deployments/status + - deployments/scale + - statefulsets/status + - statefulsets/scale + verbs: + - update - apiGroups: - autoscaling resources: diff --git a/pkg/controller/recommendation/updater.go b/pkg/controller/recommendation/updater.go index 1bc8178ab..7a2359096 100644 --- a/pkg/controller/recommendation/updater.go +++ b/pkg/controller/recommendation/updater.go @@ -99,7 +99,8 @@ func (c *RecommendationController) UpdateRecommendation(ctx context.Context, rec if needUpdate { unstructed.SetAnnotations(annotation) - err = c.Client.Update(ctx, unstructed) + //Convergence craned permissions + err = c.Client.Status().Update(ctx, unstructed) if err != nil { return false, fmt.Errorf("update target annotation failed: %v. ", err) } From a8ca3b3606828decb7a92a16a07f25f101d32739 Mon Sep 17 00:00:00 2001 From: Cloudzp Date: Sun, 7 Apr 2024 18:20:07 +0800 Subject: [PATCH 14/18] fix issues #898 (cherry picked from commit c48a90b0080ac22bd4e110a86167145247684086) --- .../recommendation_rule_controller.go | 121 +++++++++++++++++- .../recommendation_rule_controller_test.go | 113 ++++++++++++++++ 2 files changed, 229 insertions(+), 5 deletions(-) create mode 100644 pkg/controller/recommendation/recommendation_rule_controller_test.go diff --git a/pkg/controller/recommendation/recommendation_rule_controller.go b/pkg/controller/recommendation/recommendation_rule_controller.go index fa8ec55b4..d6ebfa77b 100644 --- a/pkg/controller/recommendation/recommendation_rule_controller.go +++ b/pkg/controller/recommendation/recommendation_rule_controller.go @@ -3,6 +3,10 @@ package recommendation import ( "context" "fmt" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic/dynamicinformer" + "k8s.io/client-go/tools/cache" "sort" "strconv" "strings" @@ -54,6 +58,7 @@ type RecommendationRuleController struct { dynamicClient dynamic.Interface discoveryClient discovery.DiscoveryInterface Provider providers.History + dynamicLister DynamicLister } func (c *RecommendationRuleController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { @@ -147,9 +152,10 @@ func (c *RecommendationRuleController) doReconcile(ctx context.Context, recommen keys = append(keys, k) } sort.Strings(keys) // sort key to get a certain order + recommendationIndex := NewRecommendationIndex(currRecommendations) for _, key := range keys { id := identities[key] - id.Recommendation = GetRecommendationFromIdentity(identities[key], currRecommendations) + id.Recommendation = recommendationIndex.GetRecommendation(id) identitiesArray = append(identitiesArray, id) } @@ -243,6 +249,8 @@ func (c *RecommendationRuleController) SetupWithManager(mgr ctrl.Manager) error c.kubeClient = kubernetes.NewForConfigOrDie(mgr.GetConfig()) c.discoveryClient = discovery.NewDiscoveryClientForConfigOrDie(mgr.GetConfig()) c.dynamicClient = dynamic.NewForConfigOrDie(mgr.GetConfig()) + dynamicInformerFactory := dynamicinformer.NewDynamicSharedInformerFactory(c.dynamicClient, 0) + c.dynamicLister = NewDynamicInformerLister(dynamicInformerFactory) return ctrl.NewControllerManagedBy(mgr). For(&analysisv1alph1.RecommendationRule{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). @@ -264,19 +272,19 @@ func (c *RecommendationRuleController) getIdentities(ctx context.Context, recomm var unstructureds []unstructuredv1.Unstructured if recommendationRule.Spec.NamespaceSelector.Any { - unstructuredList, err := c.dynamicClient.Resource(*gvr).List(ctx, metav1.ListOptions{}) + unstructuredList, err := c.dynamicLister.List(ctx, *gvr, "") if err != nil { return nil, err } - unstructureds = append(unstructureds, unstructuredList.Items...) + unstructureds = append(unstructureds, unstructuredList...) } else { for _, namespace := range recommendationRule.Spec.NamespaceSelector.MatchNames { - unstructuredList, err := c.dynamicClient.Resource(*gvr).Namespace(namespace).List(ctx, metav1.ListOptions{}) + unstructuredList, err := c.dynamicLister.List(ctx, *gvr, namespace) if err != nil { return nil, err } - unstructureds = append(unstructureds, unstructuredList.Items...) + unstructureds = append(unstructureds, unstructuredList...) } } @@ -528,3 +536,106 @@ func IsConvertFromAnalytics(recommendationRule *analysisv1alph1.RecommendationRu return false, "" } + +// DynamicLister is a lister for dynamic resources. +type DynamicLister interface { + // List returns a list of resources matching the given groupVersionResource. + List(ctx context.Context, gvk schema.GroupVersionResource, namespace string) ([]unstructuredv1.Unstructured, error) +} + +type dynamicInformerLister struct { + dynamicLister map[schema.GroupVersionResource]cache.GenericLister + dynamicInformerFactory dynamicinformer.DynamicSharedInformerFactory + stopCh <-chan struct{} +} + +func NewDynamicInformerLister(dynamicInformerFactory dynamicinformer.DynamicSharedInformerFactory) DynamicLister { + return &dynamicInformerLister{ + dynamicLister: map[schema.GroupVersionResource]cache.GenericLister{}, + dynamicInformerFactory: dynamicInformerFactory, + stopCh: make(chan struct{}), + } +} + +func (d *dynamicInformerLister) List(ctx context.Context, gvr schema.GroupVersionResource, namespace string) ([]unstructuredv1.Unstructured, error) { + var ( + objects []runtime.Object + err error + ) + + lister, exists := d.dynamicLister[gvr] + if !exists { + lister = d.dynamicInformerFactory.ForResource(gvr).Lister() + d.dynamicLister[gvr] = lister + d.dynamicInformerFactory.Start(d.stopCh) + if !d.dynamicInformerFactory.WaitForCacheSync(d.stopCh)[gvr] { + return nil, fmt.Errorf("failed to sync informer for %s", gvr) + } + } + if namespace != "" { + objects, err = lister.ByNamespace(namespace).List(labels.Everything()) + } else { + objects, err = lister.List(labels.Everything()) + } + if err != nil { + return nil, err + } + + var unstructuredObjects []unstructuredv1.Unstructured + for _, obj := range objects { + unstructuredObj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj) + if err != nil { + return nil, err + } + unstructuredObjects = append(unstructuredObjects, unstructuredv1.Unstructured{Object: unstructuredObj}) + } + return unstructuredObjects, nil +} + +type IndexKey struct { + Namespace string + APIVersion string + Kind string + Name string + Recommender string +} + +type RecommendationIndex struct { + mtx sync.RWMutex + idx map[IndexKey]*analysisv1alph1.Recommendation +} + +func NewRecommendationIndex(recommendations analysisv1alph1.RecommendationList) *RecommendationIndex { + idx := make(map[IndexKey]*analysisv1alph1.Recommendation, len(recommendations.Items)) + for i := range recommendations.Items { + r := &recommendations.Items[i] + idx[createIndexKey(r)] = r + } + + return &RecommendationIndex{ + idx: idx, + } +} + +func createIndexKey(r *analysisv1alph1.Recommendation) IndexKey { + return IndexKey{ + Kind: r.Spec.TargetRef.Kind, + APIVersion: r.Spec.TargetRef.APIVersion, + Namespace: r.Spec.TargetRef.Namespace, + Name: r.Spec.TargetRef.Name, + Recommender: string(r.Spec.Type), + } +} + +func (idx *RecommendationIndex) GetRecommendation(id ObjectIdentity) *analysisv1alph1.Recommendation { + key := IndexKey{ + Kind: id.Kind, + APIVersion: id.APIVersion, + Namespace: id.Namespace, + Name: id.Name, + Recommender: id.Recommender, + } + idx.mtx.RLock() + defer idx.mtx.RUnlock() + return idx.idx[key] +} diff --git a/pkg/controller/recommendation/recommendation_rule_controller_test.go b/pkg/controller/recommendation/recommendation_rule_controller_test.go new file mode 100644 index 000000000..ba436ba66 --- /dev/null +++ b/pkg/controller/recommendation/recommendation_rule_controller_test.go @@ -0,0 +1,113 @@ +package recommendation + +import ( + analysisv1alph1 "github.com/gocrane/api/analysis/v1alpha1" + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "reflect" + "testing" +) + +func TestRecommendationIndex_GetRecommendation(t *testing.T) { + type fields struct { + recommendationList analysisv1alph1.RecommendationList + } + type args struct { + id ObjectIdentity + } + + tests := []struct { + name string + fields fields + args args + want *analysisv1alph1.Recommendation + }{ + { + name: "TestRecommendationIndex_GetRecommendation good case", + fields: fields{ + recommendationList: analysisv1alph1.RecommendationList{ + Items: []analysisv1alph1.Recommendation{ + { + ObjectMeta: v1.ObjectMeta{ + Name: "test-recommendation-rule", + Namespace: "test-namespace", + }, + Spec: analysisv1alph1.RecommendationSpec{ + TargetRef: corev1.ObjectReference{ + Namespace: "test-namespace", + Kind: "Deployment", + Name: "test-deployment-bar", + APIVersion: "app/v1", + }, + Type: analysisv1alph1.AnalysisTypeResource, + }, + }, + { + ObjectMeta: v1.ObjectMeta{ + Name: "test-recommendation-rule", + Namespace: "test-namespace", + }, + Spec: analysisv1alph1.RecommendationSpec{ + TargetRef: corev1.ObjectReference{ + Namespace: "test-namespace", + Kind: "Deployment", + Name: "test-deployment-foo", + APIVersion: "app/v1", + }, + Type: analysisv1alph1.AnalysisTypeResource, + }, + }, + }, + }, + }, + want: &analysisv1alph1.Recommendation{ + ObjectMeta: v1.ObjectMeta{ + Name: "test-recommendation-rule", + Namespace: "test-namespace", + }, + Spec: analysisv1alph1.RecommendationSpec{ + TargetRef: corev1.ObjectReference{ + Namespace: "test-namespace", + Kind: "Deployment", + Name: "test-deployment-name", + APIVersion: "app/v1", + }, + }, + }, + args: args{ + id: ObjectIdentity{ + Name: "test-deployment-name", + Namespace: "test-namespace", + APIVersion: "app/v1", + Kind: "Deployment", + Recommender: "Resource", + }, + }, + }, + { + name: "TestRecommendationIndex_GetRecommendation empty case", + fields: fields{ + recommendationList: analysisv1alph1.RecommendationList{ + Items: []analysisv1alph1.Recommendation{}, + }, + }, + args: args{ + id: ObjectIdentity{ + Name: "test-deployment-name", + Namespace: "test-namespace", + APIVersion: "app/v1", + Kind: "Deployment", + Recommender: "Resources", + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + idx := NewRecommendationIndex(tt.fields.recommendationList) + if got := idx.GetRecommendation(tt.args.id); !reflect.DeepEqual(got, tt.want) { + t.Errorf("GetRecommendation() = %v, want %v", got, tt.want) + } + }) + } +} From d4cce46c9294983469854eb9b5e65eed1f6600d5 Mon Sep 17 00:00:00 2001 From: Cloudzp Date: Tue, 16 Apr 2024 18:20:37 +0800 Subject: [PATCH 15/18] add metrics (cherry picked from commit 3bceccd13cd80bbcb59713b615c46ada5d100a46) --- .../recommendation/recommendation_checker.go | 2 +- .../recommendation/recommendation_rule_controller.go | 1 + pkg/metrics/analysis.go | 12 +++++++++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/pkg/controller/recommendation/recommendation_checker.go b/pkg/controller/recommendation/recommendation_checker.go index c3ebfa064..1d9a646e5 100644 --- a/pkg/controller/recommendation/recommendation_checker.go +++ b/pkg/controller/recommendation/recommendation_checker.go @@ -60,6 +60,6 @@ func (r Checker) runChecker() { "owner_name": recommend.Spec.TargetRef.Name, "update_status": updateStatus, "result_status": resultStatus, - }).Set(1) + }).Set(time.Now().Sub(recommend.Status.LastUpdateTime.Time).Seconds()) } } diff --git a/pkg/controller/recommendation/recommendation_rule_controller.go b/pkg/controller/recommendation/recommendation_rule_controller.go index d6ebfa77b..d6f5dd596 100644 --- a/pkg/controller/recommendation/recommendation_rule_controller.go +++ b/pkg/controller/recommendation/recommendation_rule_controller.go @@ -461,6 +461,7 @@ func executeIdentity(ctx context.Context, wg *sync.WaitGroup, recommenderMgr rec defer func() { if wg != nil { wg.Done() + metrics.RecommendationExecutionCounter.WithLabelValues(id.APIVersion, id.Kind, id.Namespace, id.Name, id.Recommender).Inc() } }() var message string diff --git a/pkg/metrics/analysis.go b/pkg/metrics/analysis.go index bd930a474..e0078fe7f 100644 --- a/pkg/metrics/analysis.go +++ b/pkg/metrics/analysis.go @@ -6,6 +6,16 @@ import ( ) var ( + RecommendationExecutionCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "crane", + Subsystem: "analysis", + Name: "recommendation_execution_total", + Help: "The number of times Recommendation has been executed", + }, + []string{"apiversion", "owner_kind", "namespace", "owner_name", "type"}, + ) + ResourceRecommendation = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: "crane", @@ -48,5 +58,5 @@ var ( ) func init() { - metrics.Registry.MustRegister(ResourceRecommendation, ReplicasRecommendation, SelectTargets, RecommendationsStatus) + metrics.Registry.MustRegister(RecommendationExecutionCounter, ResourceRecommendation, ReplicasRecommendation, SelectTargets, RecommendationsStatus) } From bcac6a5236554b7b7925dde84a3a18a21af877f1 Mon Sep 17 00:00:00 2001 From: Cloudzp Date: Wed, 17 Apr 2024 16:02:50 +0800 Subject: [PATCH 16/18] fix ut and fmt error (cherry picked from commit d8d5dd20a4bc5e3ba6f277f058e0a8d419253343) --- .../recommendation/recommendation_rule_controller.go | 9 ++++----- .../recommendation_rule_controller_test.go | 10 ++++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pkg/controller/recommendation/recommendation_rule_controller.go b/pkg/controller/recommendation/recommendation_rule_controller.go index d6f5dd596..feccea912 100644 --- a/pkg/controller/recommendation/recommendation_rule_controller.go +++ b/pkg/controller/recommendation/recommendation_rule_controller.go @@ -3,10 +3,6 @@ package recommendation import ( "context" "fmt" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/client-go/dynamic/dynamicinformer" - "k8s.io/client-go/tools/cache" "sort" "strconv" "strings" @@ -19,12 +15,16 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" unstructuredv1 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/discovery" "k8s.io/client-go/dynamic" + "k8s.io/client-go/dynamic/dynamicinformer" "k8s.io/client-go/kubernetes" "k8s.io/client-go/scale" + "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/retry" "k8s.io/klog/v2" @@ -34,7 +34,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" analysisv1alph1 "github.com/gocrane/api/analysis/v1alpha1" - "github.com/gocrane/crane/pkg/known" "github.com/gocrane/crane/pkg/metrics" "github.com/gocrane/crane/pkg/oom" diff --git a/pkg/controller/recommendation/recommendation_rule_controller_test.go b/pkg/controller/recommendation/recommendation_rule_controller_test.go index ba436ba66..f96536fcd 100644 --- a/pkg/controller/recommendation/recommendation_rule_controller_test.go +++ b/pkg/controller/recommendation/recommendation_rule_controller_test.go @@ -1,11 +1,12 @@ package recommendation import ( + "reflect" + "testing" + analysisv1alph1 "github.com/gocrane/api/analysis/v1alpha1" corev1 "k8s.io/api/core/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "reflect" - "testing" ) func TestRecommendationIndex_GetRecommendation(t *testing.T) { @@ -69,14 +70,15 @@ func TestRecommendationIndex_GetRecommendation(t *testing.T) { TargetRef: corev1.ObjectReference{ Namespace: "test-namespace", Kind: "Deployment", - Name: "test-deployment-name", + Name: "test-deployment-bar", APIVersion: "app/v1", }, + Type: analysisv1alph1.AnalysisTypeResource, }, }, args: args{ id: ObjectIdentity{ - Name: "test-deployment-name", + Name: "test-deployment-bar", Namespace: "test-namespace", APIVersion: "app/v1", Kind: "Deployment", From df15e7525263b9973c6b5ff8e1a1a124bda9b243 Mon Sep 17 00:00:00 2001 From: Cloudzp Date: Thu, 22 Aug 2024 14:28:27 +0800 Subject: [PATCH 17/18] craned support klog flag --- cmd/craned/app/manager.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/craned/app/manager.go b/cmd/craned/app/manager.go index b18d08a2a..b939e9c28 100644 --- a/cmd/craned/app/manager.go +++ b/cmd/craned/app/manager.go @@ -17,6 +17,7 @@ import ( clientgoscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" "k8s.io/client-go/scale" + "k8s.io/component-base/logs" "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" @@ -84,6 +85,7 @@ func NewManagerCommand(ctx context.Context) *cobra.Command { cmd.Flags().AddGoFlagSet(flag.CommandLine) opts.AddFlags(cmd.Flags()) + logs.AddFlags(cmd.Flags()) utilfeature.DefaultMutableFeatureGate.AddFlag(cmd.Flags()) return cmd From 323c01fc420448bca104bed4af38de84a9111daf Mon Sep 17 00:00:00 2001 From: zhangpeng Date: Thu, 22 Aug 2024 15:45:14 +0800 Subject: [PATCH 18/18] Debug ci error and update go.yml --- .github/workflows/go.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 9392ad43c..6ec0b09df 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -13,6 +13,8 @@ jobs: cover: runs-on: ubuntu-latest steps: + - name: Check architecture + run: uname -m - name: Set up Go uses: actions/setup-go@v2 with: