Add functional test for product telemetry

Problem: Ensure product telemetry feature is tested with a functional test Solution: - Add a functional test. - Because it requires a NGF with a custom built, it needs to run with telemetry label. Testing: Ran successfully: - make test TAG=$(whoami) GINKGO_LABEL=telemetry - make test TAG=$(whoami) # telemetry test didn't run as expected, the functional test succeeded ClOSES - #1640
nginxinc · Mar 11, 2024 · fd959f3 · fd959f3
1 parent 15bab00
commit fd959f3
Show file tree

Hide file tree

Showing 6 changed files with 340 additions and 26 deletions.
diff --git a/tests/Makefile b/tests/Makefile
@@ -13,6 +13,8 @@ GINKGO_LABEL=
 GINKGO_FLAGS=
 NGF_VERSION=
 CI=false
+TELEMETRY_ENDPOINT=
+TELEMETRY_ENDPOINT_INSECURE=
 
 ifneq ($(GINKGO_LABEL),)
     override GINKGO_FLAGS += -ginkgo.label-filter "$(GINKGO_LABEL)"
@@ -38,11 +40,11 @@ delete-kind-cluster: ## Delete kind cluster
 
 .PHONY: build-images
 build-images: ## Build NGF and NGINX images
-	cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) build-images
+	cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) TELEMETRY_ENDPOINT=$(TELEMETRY_ENDPOINT) TELEMETRY_ENDPOINT_INSECURE=$(TELEMETRY_ENDPOINT_INSECURE) build-images
 
 .PHONY: build-images-with-plus
 build-images-with-plus: ## Build NGF and NGINX Plus images
-	cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) build-images-with-plus
+	cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) TELEMETRY_ENDPOINT=$(TELEMETRY_ENDPOINT) TELEMETRY_ENDPOINT_INSECURE=$(TELEMETRY_ENDPOINT_INSECURE) build-images-with-plus
 
 .PHONY: load-images
 load-images: ## Load NGF and NGINX images on configured kind cluster

diff --git a/tests/README.md b/tests/README.md
@@ -67,20 +67,23 @@ test                           Runs the functional tests on your default k8s clu
 
 **Note:** The following variables are configurable when running the below `make` commands:
 
-| Variable            | Default                         | Description                                                    |
-| ------------------- | ------------------------------- | -------------------------------------------------------------- |
-| TAG                 | edge                            | tag for the locally built NGF images                           |
-| PREFIX              | nginx-gateway-fabric            | prefix for the locally built NGF image                         |
-| NGINX_PREFIX        | nginx-gateway-fabric/nginx      | prefix for the locally built NGINX image                       |
-| NGINX_PLUS_PREFIX   | nginx-gateway-fabric/nginx-plus | prefix for the locally built NGINX Plus image                  |
-| PLUS_ENABLED        | false                           | Flag to indicate if NGINX Plus should be enabled               |
-| PULL_POLICY         | Never                           | NGF image pull policy                                          |
-| GW_API_VERSION      | 1.0.0                           | version of Gateway API resources to install                    |
-| K8S_VERSION         | latest                          | version of k8s that the tests are run on                       |
-| GW_SERVICE_TYPE     | NodePort                        | type of Service that should be created                         |
-| GW_SVC_GKE_INTERNAL | false                           | specifies if the LoadBalancer should be a GKE internal service |
-| GINKGO_LABEL        | ""                              | name of the ginkgo label that will filter the tests to run     |
-| GINKGO_FLAGS        | ""                              | other ginkgo flags to pass to the go test command              |
+| Variable                     | Default                         | Description                                                         |
+|------------------------------|---------------------------------|---------------------------------------------------------------------|
+| TAG                          | edge                            | tag for the locally built NGF images                                |
+| PREFIX                       | nginx-gateway-fabric            | prefix for the locally built NGF image                              |
+| NGINX_PREFIX                 | nginx-gateway-fabric/nginx      | prefix for the locally built NGINX image                            |
+| NGINX_PLUS_PREFIX            | nginx-gateway-fabric/nginx-plus | prefix for the locally built NGINX Plus image                       |
+| PLUS_ENABLED                 | false                           | Flag to indicate if NGINX Plus should be enabled                    |
+| PULL_POLICY                  | Never                           | NGF image pull policy                                               |
+| GW_API_VERSION               | 1.0.0                           | version of Gateway API resources to install                         |
+| K8S_VERSION                  | latest                          | version of k8s that the tests are run on                            |
+| GW_SERVICE_TYPE              | NodePort                        | type of Service that should be created                              |
+| GW_SVC_GKE_INTERNAL          | false                           | specifies if the LoadBalancer should be a GKE internal service      |
+| GINKGO_LABEL                 | ""                              | name of the ginkgo label that will filter the tests to run          |
+| GINKGO_FLAGS                 | ""                              | other ginkgo flags to pass to the go test command                   |
+| TELEMETRY_ENDPOINT           | Set in the main Makefile        | The endpoint to which telemetry reports are sent                    |
+| TELEMETRY_ENDPOINT_INSECURE= | Set in the main Makefile        | Controls whether TLS should be used when sending telemetry reports. |
+
 
 ## Step 1 - Create a Kubernetes cluster
 
@@ -136,6 +139,12 @@ Or, to build NGF with NGINX Plus enabled (NGINX Plus cert and key must exist in
 make build-images-with-plus load-images-with-plus TAG=$(whoami)
 ```
 
+For the telemetry test, which requires a OTel collector, build an image with the following variables set:
+
+```makefile
+TELEMETRY_ENDPOINT=otel-collector-opentelemetry-collector.collector.svc.cluster.local:4317 TELEMETRY_ENDPOINT_INSECURE=true
+```
+
 ## Step 3 - Run the tests
 
 ### 3a - Run the functional tests locally
@@ -150,6 +159,15 @@ Or, to run the tests with NGINX Plus enabled:
 make test TAG=$(whoami) PLUS_ENABLED=true
 ```
 
+> The command above doesn't run the telemetry functional test, which requires a dedicated invocation because it uses a
+> specially built image (see above) and it needs to deploy NGF differently from the rest of functional tests.
+
+To run the telemetry test:
+
+```makefile
+make test TAG=$(whoami) GINKGO_LABEL=telemetry
+```
+
 ### 3b - Run the tests on a GKE cluster from a GCP VM
 
 This step only applies if you are running the NFR tests, or would like to run the functional tests on a GKE cluster from a GCP based VM.

diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go
@@ -30,27 +30,32 @@ import (
 	"strings"
 	"time"
 
+	apps "k8s.io/api/apps/v1"
 	core "k8s.io/api/core/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/apimachinery/pkg/util/yaml"
+	"k8s.io/client-go/kubernetes"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 )
 
 // ResourceManager handles creating/updating/deleting Kubernetes resources.
 type ResourceManager struct {
-	K8sClient     client.Client
-	FS            embed.FS
-	TimeoutConfig TimeoutConfig
+	K8sClient      client.Client
+	ClientGoClient kubernetes.Interface // used when k8sClient is not enough
+	FS             embed.FS
+	TimeoutConfig  TimeoutConfig
 }
 
 // ClusterInfo holds the cluster metadata
 type ClusterInfo struct {
-	K8sVersion      string
+	K8sVersion string
+	// ID is the UID of kube-system namespace
+	ID              string
 	MemoryPerNode   string
 	GkeInstanceType string
 	GkeZone         string
@@ -406,9 +411,89 @@ func (rm *ResourceManager) GetClusterInfo() (ClusterInfo, error) {
 		ci.GkeZone = node.Labels["topology.kubernetes.io/zone"]
 	}
 
+	var ns core.Namespace
+	key := types.NamespacedName{Name: "kube-system"}
+
+	if err := rm.K8sClient.Get(ctx, key, &ns); err != nil {
+		return *ci, fmt.Errorf("error getting kube-system namespace: %w", err)
+	}
+
+	ci.ID = string(ns.UID)
+
 	return *ci, nil
 }
 
+// GetPodNames returns the names of all Pods in the specified namespace that match the given labels.
+func (rm *ResourceManager) GetPodNames(namespace string, labels client.MatchingLabels) ([]string, error) {
+	ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout)
+	defer cancel()
+
+	var podList core.PodList
+	if err := rm.K8sClient.List(
+		ctx,
+		&podList,
+		client.InNamespace(namespace),
+		labels,
+	); err != nil {
+		return nil, fmt.Errorf("error getting list of Pods: %w", err)
+	}
+
+	names := make([]string, 0, len(podList.Items))
+
+	for _, pod := range podList.Items {
+		names = append(names, pod.Name)
+	}
+
+	return names, nil
+}
+
+// GetPodLogs returns the logs from the specified Pod
+func (rm *ResourceManager) GetPodLogs(namespace, name string, opts *core.PodLogOptions) (string, error) {
+	ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout)
+	defer cancel()
+
+	req := rm.ClientGoClient.CoreV1().Pods(namespace).GetLogs(name, opts)
+
+	logs, err := req.Stream(ctx)
+	if err != nil {
+		return "", fmt.Errorf("error getting logs from Pod: %w", err)
+	}
+	defer logs.Close()
+
+	buf := new(bytes.Buffer)
+	if _, err := buf.ReadFrom(logs); err != nil {
+		return "", fmt.Errorf("error reading logs from Pod: %w", err)
+	}
+
+	return buf.String(), nil
+}
+
+// GetNGFDeployment returns the NGF Deployment in the specified namespace with the given release name.
+func (rm *ResourceManager) GetNGFDeployment(namespace, releaseName string) (*apps.Deployment, error) {
+	ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout)
+	defer cancel()
+
+	var deployments apps.DeploymentList
+
+	if err := rm.K8sClient.List(
+		ctx,
+		&deployments,
+		client.InNamespace(namespace),
+		client.MatchingLabels{
+			"app.kubernetes.io/instance": releaseName,
+		},
+	); err != nil {
+		return nil, fmt.Errorf("error getting list of Deployments: %w", err)
+	}
+
+	if len(deployments.Items) != 1 {
+		return nil, fmt.Errorf("expected 1 NGF Deployment, got %d", len(deployments.Items))
+	}
+
+	deployment := deployments.Items[0]
+	return &deployment, nil
+}
+
 // GetReadyNGFPodNames returns the name(s) of the NGF Pod(s).
 func GetReadyNGFPodNames(
 	k8sClient client.Client,

diff --git a/tests/suite/manifests/telemetry/collector-values.yaml b/tests/suite/manifests/telemetry/collector-values.yaml
@@ -0,0 +1,31 @@
+mode: deployment
+replicaCount: 1
+config:
+  exporters:
+    debug:
+      verbosity: detailed
+    logging: {}
+  extensions:
+    health_check: {}
+    memory_ballast:
+      size_in_percentage: 40
+  processors:
+    batch: {}
+    memory_limiter:
+      check_interval: 5s
+      limit_percentage: 80
+      spike_limit_percentage: 25
+  receivers:
+    otlp:
+      protocols:
+        grpc:
+          endpoint: 0.0.0.0:4317
+  service:
+    extensions:
+    - health_check
+    pipelines:
+      traces:
+        exporters:
+        - debug
+        receivers:
+        - otlp
diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go
@@ -21,6 +21,7 @@ import (
 	k8sRuntime "k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/client-go/kubernetes"
 	ctlr "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/log"
@@ -103,11 +104,15 @@ func setup(cfg setupConfig, extraInstallArgs ...string) {
 	k8sClient, err = client.New(k8sConfig, options)
 	Expect(err).ToNot(HaveOccurred())
 
+	clientGoClient, err := kubernetes.NewForConfig(k8sConfig)
+	Expect(err).ToNot(HaveOccurred())
+
 	timeoutConfig = framework.DefaultTimeoutConfig()
 	resourceManager = framework.ResourceManager{
-		K8sClient:     k8sClient,
-		FS:            manifests,
-		TimeoutConfig: timeoutConfig,
+		K8sClient:      k8sClient,
+		ClientGoClient: clientGoClient,
+		FS:             manifests,
+		TimeoutConfig:  timeoutConfig,
 	}
 
 	clusterInfo, err = resourceManager.GetClusterInfo()
@@ -210,26 +215,33 @@ func teardown(relName string) {
 	)).To(Succeed())
 }
 
-var _ = BeforeSuite(func() {
+func getDefaultSetupCfg() setupConfig {
 	_, file, _, _ := runtime.Caller(0)
 	fileDir := path.Join(path.Dir(file), "../")
 	basepath := filepath.Dir(fileDir)
 	localChartPath = filepath.Join(basepath, "deploy/helm-chart")
 
-	cfg := setupConfig{
+	return setupConfig{
 		releaseName:  releaseName,
 		chartPath:    localChartPath,
 		gwAPIVersion: *gatewayAPIVersion,
 		deploy:       true,
 	}
+}
+
+var _ = BeforeSuite(func() {
+	cfg := getDefaultSetupCfg()
 
 	labelFilter := GinkgoLabelFilter()
 	cfg.nfr = isNFR(labelFilter)
 
 	// Skip deployment if:
 	// - running upgrade test (this test will deploy its own version)
 	// - running longevity teardown (deployment will already exist)
-	if strings.Contains(labelFilter, "upgrade") || strings.Contains(labelFilter, "longevity-teardown") {
+	// - running telemetry test (NGF will be deployed as part of the test)
+	if strings.Contains(labelFilter, "upgrade") ||
+		strings.Contains(labelFilter, "longevity-teardown") ||
+		strings.Contains(labelFilter, "telemetry") {
 		cfg.deploy = false
 	}