feat: PC-13045 Good total single query experimental support (#458)

marcinlawnik · web-flow · commit 77ff3d4038a3 · 2024-07-11T12:12:57.000+02:00
## Motivation

Enabling support for gathering godd and total points using single query.
Currently for Splunk only.

Sample YAML:
```
---
apiVersion: n9/v1alpha
kind: SLO
metadata:
  name: splunk-counts-calendar
  project: splunk
spec:
  service: splunk-service
  indicator:
    metricSource:
      kind: Agent
      name: splunk
      project: splunk
  timeWindows:
    - unit: Day
      count: 1
      calendar:
        startTime: 2021-04-09 00:00:00
        timeZone: Europe/Warsaw
  budgetingMethod: Occurrences
  objectives:
    - displayName: So so
      target: 0.80
      name: objective-1
      countMetrics:
        incremental: false
        goodTotal:
          splunk:
            query:  |-
              | mstats avg("spl.intr.resource_usage.IOWait.data.avg_cpu_pct") as n9good WHERE index="_metrics" span=15s 
              | join type=left _time [
              | mstats avg("spl.intr.resource_usage.IOWait.data.max_cpus_pct") as n9total WHERE index="_metrics" span=15s
              ]
              | rename _time as n9time
              | fields n9time n9good n9total
```

## Summary

Added new `goodTotal` field to count metrics spec
Added validation for splunk query with new field names `n9time`,
`n9good`, `n9total`

## Testing

- Unit tests
- Manual planned tests after sloctl and platform changes

No release notes, as this is in experimental stage.
diff --git a/cspell.yaml b/cspell.yaml
@@ -100,6 +100,7 @@ words:
   - mockgen
   - mprofile
   - msgf
+  - mstats
   - msteams
   - ningxia
   - nobl
diff --git a/manifest/v1alpha/slo/metrics.go b/manifest/v1alpha/slo/metrics.go
@@ -11,7 +11,9 @@ type CountMetricsSpec struct {
 	Incremental *bool       `json:"incremental"`
 	GoodMetric  *MetricSpec `json:"good,omitempty"`
 	BadMetric   *MetricSpec `json:"bad,omitempty"`
-	TotalMetric *MetricSpec `json:"total"`
+	TotalMetric *MetricSpec `json:"total,omitempty"`
+	// Experimental: Splunk only, a single query returning both good and total counts.
+	GoodTotalMetric *MetricSpec `json:"goodTotal,omitempty"`
 }
 
 // RawMetricSpec represents integration with a metric source for a particular objective.
@@ -143,6 +145,9 @@ func (s *Spec) CountMetricsCount() int {
 			if objective.CountMetrics.BadMetric != nil {
 				count++
 			}
+			if objective.CountMetrics.GoodTotalMetric != nil {
+				count++
+			}
 		}
 	}
 	return count
@@ -168,6 +173,10 @@ func (s *Spec) CountMetrics() []*MetricSpec {
 			countMetrics[i] = objective.CountMetrics.BadMetric
 			i++
 		}
+		if objective.CountMetrics.GoodTotalMetric != nil {
+			countMetrics[i] = objective.CountMetrics.GoodTotalMetric
+			i++
+		}
 	}
 	return countMetrics
 }
diff --git a/manifest/v1alpha/slo/metrics_bigquery_test.go b/manifest/v1alpha/slo/metrics_bigquery_test.go
@@ -14,6 +14,18 @@ func TestBigQuery_CountMetrics(t *testing.T) {
 		err := validate(slo)
 		testutils.AssertNoError(t, slo, err)
 	})
+	t.Run("unsupported goodTotal single query", func(t *testing.T) {
+		slo := validCountMetricSLO(v1alpha.BigQuery)
+		slo.Spec.Objectives[0].CountMetrics = &CountMetricsSpec{
+			Incremental:     ptr(false),
+			GoodTotalMetric: validMetricSpec(v1alpha.BigQuery),
+		}
+		err := validate(slo)
+		testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
+			Prop: "spec.objectives[0].countMetrics.goodTotal",
+			Code: joinErrorCodes(errCodeSingleQueryGoodOverTotalDisabled, validation.ErrorCodeOneOf),
+		})
+	})
 	t.Run("projectId must be the same for good and total", func(t *testing.T) {
 		slo := validCountMetricSLO(v1alpha.BigQuery)
 		slo.Spec.Objectives[0].CountMetrics.TotalMetric.BigQuery.ProjectID = "1"
diff --git a/manifest/v1alpha/slo/metrics_splunk.go b/manifest/v1alpha/slo/metrics_splunk.go
@@ -3,14 +3,33 @@ package slo
 import (
 	"regexp"
 
+	"github.com/pkg/errors"
+
 	"github.com/nobl9/nobl9-go/internal/validation"
+	"github.com/nobl9/nobl9-go/manifest/v1alpha"
 )
 
 // SplunkMetric represents metric from Splunk
 type SplunkMetric struct {
 	Query *string `json:"query"`
 }
 
+var splunkCountMetricsLevelValidation = validation.New[CountMetricsSpec](
+	validation.For(validation.GetSelf[CountMetricsSpec]()).
+		Rules(
+			validation.NewSingleRule(func(c CountMetricsSpec) error {
+				if c.GoodTotalMetric != nil {
+					if c.GoodMetric != nil || c.BadMetric != nil || c.TotalMetric != nil {
+						return errors.New("goodTotal is mutually exclusive with good, bad, and total")
+					}
+				}
+				return nil
+			}).WithErrorCode(validation.ErrorCodeMutuallyExclusive)),
+).When(
+	whenCountMetricsIs(v1alpha.Splunk),
+	validation.WhenDescription("countMetrics is splunk"),
+)
+
 var splunkValidation = validation.New[SplunkMetric](
 	validation.ForPointer(func(s SplunkMetric) *string { return s.Query }).
 		WithName("query").
@@ -24,3 +43,17 @@ var splunkValidation = validation.New[SplunkMetric](
 				"index=svc-events", `"index"=svc-events`).
 				WithDetails(`query has to contain index=<NAME> or "index"=<NAME>`)),
 )
+
+var splunkSingleQueryValidation = validation.New[SplunkMetric](
+	validation.ForPointer(func(s SplunkMetric) *string { return s.Query }).
+		WithName("query").
+		Required().
+		Cascade(validation.CascadeModeStop).
+		Rules(validation.StringNotEmpty()).
+		Rules(
+			validation.StringContains("n9time", "n9good", "n9total"),
+			validation.StringMatchRegexp(
+				regexp.MustCompile(`(\bindex\s*=.+)|("\bindex"\s*=.+)`),
+				"index=svc-events", `"index"=svc-events`).
+				WithDetails(`query has to contain index=<NAME> or "index"=<NAME>`)),
+)
diff --git a/manifest/v1alpha/slo/metrics_splunk_test.go b/manifest/v1alpha/slo/metrics_splunk_test.go
@@ -77,3 +77,116 @@ fields n9time n9value`,
 		}
 	})
 }
+
+func TestSplunk_CountMetrics_SingleQuery(t *testing.T) {
+	t.Run("passes", func(t *testing.T) {
+		slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
+		err := validate(slo)
+		testutils.AssertNoError(t, slo, err)
+	})
+	t.Run("required", func(t *testing.T) {
+		slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
+		slo.Spec.Objectives[0].CountMetrics.GoodTotalMetric.Splunk.Query = nil
+		err := validate(slo)
+		testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
+			Prop: "spec.objectives[0].countMetrics.goodTotal.splunk.query",
+			Code: validation.ErrorCodeRequired,
+		})
+	})
+	t.Run("empty", func(t *testing.T) {
+		slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
+		slo.Spec.Objectives[0].CountMetrics.GoodTotalMetric.Splunk.Query = ptr("")
+		err := validate(slo)
+		testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
+			Prop: "spec.objectives[0].countMetrics.goodTotal.splunk.query",
+			Code: validation.ErrorCodeStringNotEmpty,
+		})
+	})
+	t.Run("goodTotal mixed with total", func(t *testing.T) {
+		slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
+		slo.Spec.Objectives[0].CountMetrics.TotalMetric = validMetricSpec(v1alpha.Splunk)
+		err := validate(slo)
+		testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
+			Prop: "spec.objectives[0].countMetrics",
+			Code: validation.ErrorCodeMutuallyExclusive,
+		})
+	})
+	t.Run("goodTotal mixed with good", func(t *testing.T) {
+		slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
+		slo.Spec.Objectives[0].CountMetrics.GoodMetric = validMetricSpec(v1alpha.Splunk)
+		err := validate(slo)
+		testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
+			Prop: "spec.objectives[0].countMetrics",
+			Code: validation.ErrorCodeMutuallyExclusive,
+		})
+	})
+	t.Run("goodTotal mixed with bad", func(t *testing.T) {
+		slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
+		slo.Spec.Objectives[0].CountMetrics.BadMetric = validMetricSpec(v1alpha.Splunk)
+		err := validate(slo)
+		testutils.AssertContainsErrors(t, slo, err, 2, testutils.ExpectedError{
+			Prop: "spec.objectives[0].countMetrics.bad",
+			Code: joinErrorCodes(errCodeBadOverTotalDisabled, validation.ErrorCodeOneOf),
+		}, testutils.ExpectedError{
+			Prop: "spec.objectives[0].countMetrics",
+			Code: validation.ErrorCodeMutuallyExclusive,
+		})
+	})
+	t.Run("invalid query", func(t *testing.T) {
+		tests := map[string]struct {
+			Query        string
+			ExpectedCode string
+		}{
+			"missing n9time": {
+				Query: `
+    | mstats avg("spl.intr.resource_usage.IOWait.data.avg_cpu_pct") as n9good WHERE index="_metrics" span=15s
+    | join type=left _time [
+    | mstats avg("spl.intr.resource_usage.IOWait.data.max_cpus_pct") as n9total WHERE index="_metrics" span=15s
+    ]
+    | fields _time n9good n9total`,
+				ExpectedCode: validation.ErrorCodeStringContains,
+			},
+			"missing n9good": {
+				Query: `
+    | mstats avg("spl.intr.resource_usage.IOWait.data.avg_cpu_pct") as good WHERE index="_metrics" span=15s
+    | join type=left _time [
+    | mstats avg("spl.intr.resource_usage.IOWait.data.max_cpus_pct") as n9total WHERE index="_metrics" span=15s
+    ]
+    | rename _time as n9time
+    | fields n9time good n9total`,
+				ExpectedCode: validation.ErrorCodeStringContains,
+			},
+			"missing n9total": {
+				Query: `
+    | mstats avg("spl.intr.resource_usage.IOWait.data.avg_cpu_pct") as n9good WHERE index="_metrics" span=15s
+    | join type=left _time [
+    | mstats avg("spl.intr.resource_usage.IOWait.data.max_cpus_pct") as total WHERE index="_metrics" span=15s
+    ]
+    | rename _time as n9time
+    | fields n9time n9good total`,
+				ExpectedCode: validation.ErrorCodeStringContains,
+			},
+			"missing index": {
+				Query: `
+    | mstats avg("spl.intr.resource_usage.IOWait.data.avg_cpu_pct") as n9good span=15s
+    | join type=left _time [
+    | mstats avg("spl.intr.resource_usage.IOWait.data.max_cpus_pct") as n9total span=15s
+    ]
+    | rename _time as n9time
+    | fields n9time n9good n9total`,
+				ExpectedCode: validation.ErrorCodeStringMatchRegexp,
+			},
+		}
+		for name, test := range tests {
+			t.Run(name, func(t *testing.T) {
+				slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
+				slo.Spec.Objectives[0].CountMetrics.GoodTotalMetric.Splunk.Query = ptr(test.Query)
+				err := validate(slo)
+				testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
+					Prop: "spec.objectives[0].countMetrics.goodTotal.splunk.query",
+					Code: test.ExpectedCode,
+				})
+			})
+		}
+	})
+}
diff --git a/manifest/v1alpha/slo/metrics_test.go b/manifest/v1alpha/slo/metrics_test.go
@@ -1,8 +1,12 @@
 package slo
 
 import (
+	"slices"
 	"testing"
 
+	"github.com/nobl9/nobl9-go/internal/testutils"
+	"github.com/nobl9/nobl9-go/internal/validation"
+
 	"github.com/stretchr/testify/assert"
 
 	"github.com/nobl9/nobl9-go/manifest/v1alpha"
@@ -21,3 +25,27 @@ func TestQuery(t *testing.T) {
 		assert.NotEmpty(t, spec)
 	}
 }
+
+func Test_SingleQueryDisabled(t *testing.T) {
+	skippedDataSources := []v1alpha.DataSourceType{
+		v1alpha.ThousandEyes, // query is forbidden for this plugin
+	}
+	for _, src := range v1alpha.DataSourceTypeValues() {
+		if slices.Contains(singleQueryGoodOverTotalEnabledSources, src) {
+			continue
+		}
+		if slices.Contains(skippedDataSources, src) {
+			continue
+		}
+		slo := validCountMetricSLO(src)
+		slo.Spec.Objectives[0].CountMetrics = &CountMetricsSpec{
+			Incremental:     ptr(false),
+			GoodTotalMetric: validMetricSpec(src),
+		}
+		err := validate(slo)
+		testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
+			Prop: "spec.objectives[0].countMetrics.goodTotal",
+			Code: joinErrorCodes(errCodeSingleQueryGoodOverTotalDisabled, validation.ErrorCodeOneOf),
+		})
+	}
+}
diff --git a/manifest/v1alpha/slo/metrics_validation.go b/manifest/v1alpha/slo/metrics_validation.go
@@ -11,11 +11,12 @@ import (
 )
 
 const (
-	errCodeExactlyOneMetricType       = "exactly_one_metric_type"
-	errCodeBadOverTotalDisabled       = "bad_over_total_disabled"
-	errCodeExactlyOneMetricSpecType   = "exactly_one_metric_spec_type"
-	errCodeEitherBadOrGoodCountMetric = "either_bad_or_good_count_metric"
-	errCodeTimeSliceTarget            = "time_slice_target"
+	errCodeExactlyOneMetricType             = "exactly_one_metric_type"
+	errCodeBadOverTotalDisabled             = "bad_over_total_disabled"
+	errCodeSingleQueryGoodOverTotalDisabled = "single_query_good_over_total_disabled"
+	errCodeExactlyOneMetricSpecType         = "exactly_one_metric_spec_type"
+	errCodeEitherBadOrGoodCountMetric       = "either_bad_or_good_count_metric"
+	errCodeTimeSliceTarget                  = "time_slice_target"
 )
 
 var specMetricsValidation = validation.New[Spec](
@@ -61,13 +62,13 @@ var countMetricsSpecValidation = validation.New[CountMetricsSpec](
 			sumoLogicCountMetricsLevelValidation,
 			instanaCountMetricsLevelValidation,
 			redshiftCountMetricsLevelValidation,
-			bigQueryCountMetricsLevelValidation),
+			bigQueryCountMetricsLevelValidation,
+			splunkCountMetricsLevelValidation),
 	validation.ForPointer(func(c CountMetricsSpec) *bool { return c.Incremental }).
 		WithName("incremental").
 		Required(),
 	validation.ForPointer(func(c CountMetricsSpec) *MetricSpec { return c.TotalMetric }).
 		WithName("total").
-		Required().
 		Include(
 			metricSpecValidation,
 			countMetricsValidation,
@@ -84,6 +85,12 @@ var countMetricsSpecValidation = validation.New[CountMetricsSpec](
 		Include(
 			countMetricsValidation,
 			metricSpecValidation),
+	validation.ForPointer(func(c CountMetricsSpec) *MetricSpec { return c.GoodTotalMetric }).
+		WithName("goodTotal").
+		Rules(oneOfSingleQueryGoodOverTotalValidationRule).
+		Include(
+			countMetricsValidation,
+			singleQueryMetricSpecValidation),
 )
 
 var rawMetricsValidation = validation.New[RawMetricSpec](
@@ -106,6 +113,12 @@ var countMetricsValidation = validation.New[MetricSpec](
 			instanaCountMetricsValidation),
 )
 
+var singleQueryMetricSpecValidation = validation.New[MetricSpec](
+	validation.ForPointer(func(m MetricSpec) *SplunkMetric { return m.Splunk }).
+		WithName("splunk").
+		Include(splunkSingleQueryValidation),
+)
+
 var metricSpecValidation = validation.New[MetricSpec](
 	validation.ForPointer(func(m MetricSpec) *AppDynamicsMetric { return m.AppDynamics }).
 		WithName("appDynamics").
@@ -200,6 +213,17 @@ var oneOfBadOverTotalValidationRule = validation.NewSingleRule(func(v MetricSpec
 	return validation.OneOf(badOverTotalEnabledSources...).Validate(v.DataSourceType())
 }).WithErrorCode(errCodeBadOverTotalDisabled)
 
+var singleQueryGoodOverTotalEnabledSources = []v1alpha.DataSourceType{
+	v1alpha.Splunk,
+}
+
+// Support for single query good/total metrics is experimental.
+// Splunk is the only datasource integration to have this feature
+// - extend the list while adding support for next integrations.
+var oneOfSingleQueryGoodOverTotalValidationRule = validation.NewSingleRule(func(v MetricSpec) error {
+	return validation.OneOf(singleQueryGoodOverTotalEnabledSources...).Validate(v.DataSourceType())
+}).WithErrorCode(errCodeSingleQueryGoodOverTotalDisabled)
+
 var exactlyOneMetricSpecTypeValidationRule = validation.NewSingleRule(func(v Spec) error {
 	if v.Indicator == nil {
 		return nil
@@ -401,6 +425,12 @@ var timeSliceTargetsValidationRule = validation.NewSingleRule[Spec](func(s Spec)
 // the count metrics is of the given type.
 func whenCountMetricsIs(typ v1alpha.DataSourceType) func(c CountMetricsSpec) bool {
 	return func(c CountMetricsSpec) bool {
+		if slices.Contains(singleQueryGoodOverTotalEnabledSources, typ) {
+			if c.GoodTotalMetric != nil && typ != c.GoodTotalMetric.DataSourceType() {
+				return false
+			}
+			return c.GoodMetric != nil || c.BadMetric != nil || c.TotalMetric != nil
+		}
 		if c.TotalMetric == nil {
 			return false
 		}
diff --git a/manifest/v1alpha/slo/metrics_validation_test.go b/manifest/v1alpha/slo/metrics_validation_test.go
diff --git a/manifest/v1alpha/slo/validation_test.go b/manifest/v1alpha/slo/validation_test.go

-Original file line number
+Diff line change
   - mockgen
   - mprofile
   - msgf
 +  - mstats
   - msteams
   - ningxia
   - nobl
Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,9 @@ type CountMetricsSpec struct {`
`11`	`11`	Incremental *bool `json:"incremental"`
`12`	`12`	GoodMetric *MetricSpec `json:"good,omitempty"`
`13`	`13`	BadMetric *MetricSpec `json:"bad,omitempty"`
`14`		- TotalMetric *MetricSpec `json:"total"`
	`14`	+ TotalMetric *MetricSpec `json:"total,omitempty"`
	`15`	`+ // Experimental: Splunk only, a single query returning both good and total counts.`
	`16`	+ GoodTotalMetric *MetricSpec `json:"goodTotal,omitempty"`
`15`	`17`	`}`
`16`	`18`
`17`	`19`	`// RawMetricSpec represents integration with a metric source for a particular objective.`
`@@ -143,6 +145,9 @@ func (s *Spec) CountMetricsCount() int {`
`143`	`145`	`if objective.CountMetrics.BadMetric != nil {`
`144`	`146`	`count++`
`145`	`147`	`}`
	`148`	`+ if objective.CountMetrics.GoodTotalMetric != nil {`
	`149`	`+ count++`
	`150`	`+ }`
`146`	`151`	`}`
`147`	`152`	`}`
`148`	`153`	`return count`
`@@ -168,6 +173,10 @@ func (s Spec) CountMetrics() []MetricSpec {`
`168`	`173`	`countMetrics[i] = objective.CountMetrics.BadMetric`
`169`	`174`	`i++`
`170`	`175`	`}`
	`176`	`+ if objective.CountMetrics.GoodTotalMetric != nil {`
	`177`	`+ countMetrics[i] = objective.CountMetrics.GoodTotalMetric`
	`178`	`+ i++`
	`179`	`+ }`
`171`	`180`	`}`
`172`	`181`	`return countMetrics`
`173`	`182`	`}`