From aa52c87670eeb932094194a50fcb40e3285404b6 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Nov 2024 16:37:07 +0700 Subject: [PATCH 1/7] set `metrics_flush_interval` -> `60s` --- charts/logzio-telemetry/values.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/charts/logzio-telemetry/values.yaml b/charts/logzio-telemetry/values.yaml index d19046e8..208d3b09 100644 --- a/charts/logzio-telemetry/values.yaml +++ b/charts/logzio-telemetry/values.yaml @@ -700,6 +700,7 @@ serviceGraph: store: ttl: 5s max_items: 100000 + metrics_flush_interval: 60s service: pipelines: traces: From 86d6930c0fb1483f18309b9c4e6bb0fb124fc5fa Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Nov 2024 16:37:28 +0700 Subject: [PATCH 2/7] add test for `servicegraph` metrics --- .github/workflows/logzio-telemetry-test.yaml | 1 + tests/metrics_e2e_test.go | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/logzio-telemetry-test.yaml b/.github/workflows/logzio-telemetry-test.yaml index 8285bfb0..a8de79ee 100644 --- a/.github/workflows/logzio-telemetry-test.yaml +++ b/.github/workflows/logzio-telemetry-test.yaml @@ -56,6 +56,7 @@ jobs: helm upgrade --install \ --set traces.enabled=true \ --set spm.enabled=true \ + --set serviceGraph.enabled=true \ --set metrics.enabled=true \ --set secrets.TracesToken=${{ secrets.LOGZIO_TRACES_TOKEN }} \ --set secrets.SpmToken=${{ secrets.LOGZIO_METRICS_TOKEN }} \ diff --git a/tests/metrics_e2e_test.go b/tests/metrics_e2e_test.go index ac38b9a3..46f9cdc0 100644 --- a/tests/metrics_e2e_test.go +++ b/tests/metrics_e2e_test.go @@ -3,13 +3,14 @@ package tests import ( "encoding/json" "fmt" - "go.uber.org/zap" "io" "net/http" "net/url" "os" "strings" "testing" + + "go.uber.org/zap" ) // MetricResponse represents the structure of the API response @@ -38,6 +39,20 @@ func TestContainerMetrics(t *testing.T) { testMetrics(t, requiredMetrics, escapedQuery) } +func TestServiceGraphMetrics(t *testing.T) { + requiredMetrics := map[string][]string{ + "traces_service_graph_request_total": {"client", "server", "connection_type"}, + "traces_service_graph_request_failed_total": {"client", "server", "connection_type"}, + "traces_service_graph_request_server_seconds": {"client", "server", "connection_type"}, + "traces_service_graph_request_client_seconds": {"client", "server", "connection_type"}, + "traces_service_graph_unpaired_spans_total": {"client", "server", "connection_type"}, + "traces_service_graph_dropped_spans_total": {"client", "server", "connection_type"}, + } + envId := os.Getenv("ENV_ID") + query := fmt.Sprintf(`{env_id='%s'}`, envId) + testMetrics(t, requiredMetrics, query) +} + func TestInfrastructureMetrics(t *testing.T) { if os.Getenv("KUBERNETES_ENV") == "eks-fargate" { t.Skip("Skipping infrastructure metrics test") From 450d1d263585acfd64d9e6c254f40f1fbbcd2583 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Nov 2024 16:40:06 +0700 Subject: [PATCH 3/7] Update Chart.yaml --- charts/logzio-telemetry/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/logzio-telemetry/Chart.yaml b/charts/logzio-telemetry/Chart.yaml index aea9c0e6..e16a9622 100644 --- a/charts/logzio-telemetry/Chart.yaml +++ b/charts/logzio-telemetry/Chart.yaml @@ -25,7 +25,7 @@ dependencies: # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 4.2.9 +version: 4.3.0 # This is the version number of the application being deployed. This version number should be From 25274c00cbbf94b01658fe7bbcfa275ffc432e76 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Nov 2024 16:40:23 +0700 Subject: [PATCH 4/7] changelog --- charts/logzio-telemetry/README.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/charts/logzio-telemetry/README.md b/charts/logzio-telemetry/README.md index 4e903848..7ce9f2d4 100644 --- a/charts/logzio-telemetry/README.md +++ b/charts/logzio-telemetry/README.md @@ -412,6 +412,8 @@ If you don't want the sub charts to installed add the relevant flag per sub char ## Change log +* 4.3.0 + - Set `servicegraph` connector, `metrics_flush_interval` setting to `60s` to reduce outgoing connections * 4.2.9 - Add batch processor to the SPM pipeline, to reduce stress and increase efficiency. * 4.2.8 @@ -467,6 +469,12 @@ If you don't want the sub charts to installed add the relevant flag per sub char - `kubeStateMetrics.enabled` - `pushGateway.enabled` - `nodeExporter.enabled` + + + +
+ Expand to check old versions + * 2.2.0 - Upgraded SPM collector image to version `0.80.0`. - Added service graph connector metrics. @@ -479,11 +487,6 @@ If you don't want the sub charts to installed add the relevant flag per sub char - Add `unified_status_code` dimension - Takes value of `rpc_grpc_status_code` / `http_status_code` - Add `containerSecurityContext` configuration option for container based policies. - - -
- Expand to check old versions - * 2.0.0 - Upgrade sub charts to their latest versions. - `kube-state-metrics` to `4.24.0` From c9e343a321533128bf89040ab74d40d66126e633 Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Nov 2024 18:15:04 +0700 Subject: [PATCH 5/7] run otel demo in tests --- .github/workflows/logzio-monitoring-test.yaml | 6 ++++++ .github/workflows/logzio-telemetry-test.yaml | 8 +++++++- tests/resources/otel-demo-monitoring.yaml | 18 ++++++++++++++++++ tests/resources/otel-demo.yaml | 18 ++++++++++++++++++ 4 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 tests/resources/otel-demo-monitoring.yaml create mode 100644 tests/resources/otel-demo.yaml diff --git a/.github/workflows/logzio-monitoring-test.yaml b/.github/workflows/logzio-monitoring-test.yaml index d8226566..dd555d0a 100644 --- a/.github/workflows/logzio-monitoring-test.yaml +++ b/.github/workflows/logzio-monitoring-test.yaml @@ -148,6 +148,12 @@ jobs: kubectl apply -f tests/resources/tracegen-monitoring.yaml kubectl rollout status deployment/trace-gen --timeout=300s + - name: Run otel demo + run: | + helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts + helm repo update + helm install otel-demo -f tests/resources/otel-demo-monitoring.yaml open-telemetry/opentelemetry-demo --version 0.32.5 + kubectl rollout status deployment/otel-demo-loadgenerator --timeout=300s - name: Sleep run: sleep 180 diff --git a/.github/workflows/logzio-telemetry-test.yaml b/.github/workflows/logzio-telemetry-test.yaml index a8de79ee..8214f564 100644 --- a/.github/workflows/logzio-telemetry-test.yaml +++ b/.github/workflows/logzio-telemetry-test.yaml @@ -73,7 +73,13 @@ jobs: run: | kubectl apply -f tests/resources/tracegen.yaml kubectl rollout status deployment/trace-gen --timeout=300s - + - name: Run otel demo + run: | + helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts + helm repo update + helm install otel-demo -f tests/resources/otel-demo.yaml open-telemetry/opentelemetry-demo --version 0.32.5 + kubectl rollout status deployment/otel-demo-loadgenerator --timeout=300s + - name: sleep for 3 minutes run: sleep 180 diff --git a/tests/resources/otel-demo-monitoring.yaml b/tests/resources/otel-demo-monitoring.yaml new file mode 100644 index 00000000..15fa2935 --- /dev/null +++ b/tests/resources/otel-demo-monitoring.yaml @@ -0,0 +1,18 @@ + +default: + envOverrides: + - name: OTEL_COLLECTOR_NAME + value: logzio-monitoring-otel-collector.monitoring.svc.cluster.local + +opentelemetry-collector: + enabled: false + +jaeger: + enabled: false + +prometheus: + enabled: false + +grafana: + enabled: false + diff --git a/tests/resources/otel-demo.yaml b/tests/resources/otel-demo.yaml new file mode 100644 index 00000000..cb433700 --- /dev/null +++ b/tests/resources/otel-demo.yaml @@ -0,0 +1,18 @@ + +default: + envOverrides: + - name: OTEL_COLLECTOR_NAME + value: logzio-k8s-telemetry-otel-collector.default.svc.cluster.local + +opentelemetry-collector: + enabled: false + +jaeger: + enabled: false + +prometheus: + enabled: false + +grafana: + enabled: false + From 9e26c5e58ebfa5064d2ec1b63b60dc91411f26cf Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Nov 2024 18:28:16 +0700 Subject: [PATCH 6/7] Update metrics_e2e_test.go --- tests/metrics_e2e_test.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/metrics_e2e_test.go b/tests/metrics_e2e_test.go index 46f9cdc0..5004785c 100644 --- a/tests/metrics_e2e_test.go +++ b/tests/metrics_e2e_test.go @@ -41,12 +41,14 @@ func TestContainerMetrics(t *testing.T) { func TestServiceGraphMetrics(t *testing.T) { requiredMetrics := map[string][]string{ - "traces_service_graph_request_total": {"client", "server", "connection_type"}, - "traces_service_graph_request_failed_total": {"client", "server", "connection_type"}, - "traces_service_graph_request_server_seconds": {"client", "server", "connection_type"}, - "traces_service_graph_request_client_seconds": {"client", "server", "connection_type"}, - "traces_service_graph_unpaired_spans_total": {"client", "server", "connection_type"}, - "traces_service_graph_dropped_spans_total": {"client", "server", "connection_type"}, + "traces_service_graph_request_total": {"client", "server"}, + "traces_service_graph_request_failed_total": {"client", "server"}, + "traces_service_graph_request_server_seconds_bucket": {"client", "server"}, + "traces_service_graph_request_server_seconds_count": {"client", "server"}, + "traces_service_graph_request_server_seconds_sum": {"client", "server"}, + "traces_service_graph_request_client_seconds_bucket": {"client", "server"}, + "traces_service_graph_request_client_seconds_count": {"client", "server"}, + "traces_service_graph_request_client_seconds_sum": {"client", "server"}, } envId := os.Getenv("ENV_ID") query := fmt.Sprintf(`{env_id='%s'}`, envId) From 077de06b4bf4368110783689862c03d72f8b27db Mon Sep 17 00:00:00 2001 From: Yotam loewenbach Date: Wed, 20 Nov 2024 18:29:19 +0700 Subject: [PATCH 7/7] Update metrics_e2e_test.go --- tests/metrics_e2e_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metrics_e2e_test.go b/tests/metrics_e2e_test.go index 5004785c..169c0c2c 100644 --- a/tests/metrics_e2e_test.go +++ b/tests/metrics_e2e_test.go @@ -51,7 +51,7 @@ func TestServiceGraphMetrics(t *testing.T) { "traces_service_graph_request_client_seconds_sum": {"client", "server"}, } envId := os.Getenv("ENV_ID") - query := fmt.Sprintf(`{env_id='%s'}`, envId) + query := fmt.Sprintf(`{client_env_id='%s'}`, envId) testMetrics(t, requiredMetrics, query) }