Skip to content

Commit

Permalink
Add functional test for product telemetry (#1659)
Browse files Browse the repository at this point in the history
* Add functional test for product telemetry

Problem:
Ensure product telemetry feature is tested with a functional test

Solution:
- Add a functional test.
- Because it requires a NGF with a custom built, it needs to run with
telemetry label.
- Enable telemetry test in the pipeline

Testing:
Ran successfully:
- make test TAG=$(whoami) GINKGO_LABEL=telemetry
- make test TAG=$(whoami) # telemetry test didn't run as expected,
  the functional test succeeded
Also, the pipeline runs telemetry tests successfully

ClOSES - #1640

Co-authored-by: Saylor Berman <[email protected]>
  • Loading branch information
pleshakov and sjberman authored Mar 13, 2024
1 parent e1d7691 commit f059cf0
Show file tree
Hide file tree
Showing 10 changed files with 357 additions and 29 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ jobs:
AZURE_STORAGE_KEY: ${{ secrets.AZURE_STORAGE_KEY }}
AZURE_BUCKET_NAME: ${{ secrets.AZURE_BUCKET_NAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_COMMUNITY }}
TELEMETRY_ENDPOINT: "" # disables sending telemetry
TELEMETRY_ENDPOINT_INSECURE: "false"

- name: Cache Artifacts
uses: actions/cache@ab5e6d0c87105b4c9c2047343972218f562e4319 # v4.0.1
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/conformance.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ jobs:
with:
version: latest
args: build --snapshot --clean
env:
TELEMETRY_ENDPOINT: "" # disables sending telemetry
TELEMETRY_ENDPOINT_INSECURE: "false"

- name: Build NGF Docker Image
uses: docker/build-push-action@af5a7ed5ba88268d5278f7203fb52cd833f66d6e # v5.2.0
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/functional.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ jobs:
with:
version: latest
args: build --snapshot --clean
env:
TELEMETRY_ENDPOINT: otel-collector-opentelemetry-collector.collector.svc.cluster.local:4317
TELEMETRY_ENDPOINT_INSECURE: "true"

- name: Build NGF Docker Image
uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # v5.1.0
Expand Down Expand Up @@ -116,9 +119,9 @@ jobs:
make load-images${{ matrix.nginx-image == 'nginx-plus' && '-with-plus' || ''}} PREFIX=${ngf_prefix} TAG=${ngf_tag}
working-directory: ./tests

- name: Run functional tests
- name: Run functional telemetry tests
run: |
ngf_prefix=ghcr.io/nginxinc/nginx-gateway-fabric
ngf_tag=${{ steps.ngf-meta.outputs.version }}
make test${{ matrix.nginx-image == 'nginx-plus' && '-with-plus' || ''}} PREFIX=${ngf_prefix} TAG=${ngf_tag}
make test${{ matrix.nginx-image == 'nginx-plus' && '-with-plus' || ''}} PREFIX=${ngf_prefix} TAG=${ngf_tag} GINKGO_LABEL=telemetry
working-directory: ./tests
8 changes: 7 additions & 1 deletion .goreleaser.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@ builds:
asmflags:
- all=-trimpath={{.Env.GOPATH}}
ldflags:
- -s -w -X main.version={{.Version}} -X main.commit={{.Commit}} -X main.date={{.Date}} -X main.telemetryReportPeriod=24h -X main.telemetryEndpointInsecure=false
- -s -w
- -X main.version={{.Version}}
- -X main.commit={{.Commit}}
- -X main.date={{.Date}}
- -X main.telemetryReportPeriod=24h
- -X main.telemetryEndpoint={{.Env.TELEMETRY_ENDPOINT}}
- -X main.telemetryEndpointInsecure={{.Env.TELEMETRY_ENDPOINT_INSECURE}}
main: ./cmd/gateway/
binary: gateway

Expand Down
6 changes: 4 additions & 2 deletions tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ GINKGO_LABEL=
GINKGO_FLAGS=
NGF_VERSION=
CI=false
TELEMETRY_ENDPOINT=
TELEMETRY_ENDPOINT_INSECURE=

ifneq ($(GINKGO_LABEL),)
override GINKGO_FLAGS += -ginkgo.label-filter "$(GINKGO_LABEL)"
Expand All @@ -38,11 +40,11 @@ delete-kind-cluster: ## Delete kind cluster

.PHONY: build-images
build-images: ## Build NGF and NGINX images
cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) build-images
cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) TELEMETRY_ENDPOINT=$(TELEMETRY_ENDPOINT) TELEMETRY_ENDPOINT_INSECURE=$(TELEMETRY_ENDPOINT_INSECURE) build-images

.PHONY: build-images-with-plus
build-images-with-plus: ## Build NGF and NGINX Plus images
cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) build-images-with-plus
cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) TELEMETRY_ENDPOINT=$(TELEMETRY_ENDPOINT) TELEMETRY_ENDPOINT_INSECURE=$(TELEMETRY_ENDPOINT_INSECURE) build-images-with-plus

.PHONY: load-images
load-images: ## Load NGF and NGINX images on configured kind cluster
Expand Down
46 changes: 32 additions & 14 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,20 +68,23 @@ test-with-plus Runs the functional tests for NGF with NGINX Plus

**Note:** The following variables are configurable when running the below `make` commands:

| Variable | Default | Description |
| ------------------- | ------------------------------- | -------------------------------------------------------------- |
| TAG | edge | tag for the locally built NGF images |
| PREFIX | nginx-gateway-fabric | prefix for the locally built NGF image |
| NGINX_PREFIX | nginx-gateway-fabric/nginx | prefix for the locally built NGINX image |
| NGINX_PLUS_PREFIX | nginx-gateway-fabric/nginx-plus | prefix for the locally built NGINX Plus image |
| PLUS_ENABLED | false | Flag to indicate if NGINX Plus should be enabled |
| PULL_POLICY | Never | NGF image pull policy |
| GW_API_VERSION | 1.0.0 | version of Gateway API resources to install |
| K8S_VERSION | latest | version of k8s that the tests are run on |
| GW_SERVICE_TYPE | NodePort | type of Service that should be created |
| GW_SVC_GKE_INTERNAL | false | specifies if the LoadBalancer should be a GKE internal service |
| GINKGO_LABEL | "" | name of the ginkgo label that will filter the tests to run |
| GINKGO_FLAGS | "" | other ginkgo flags to pass to the go test command |
| Variable | Default | Description |
|------------------------------|---------------------------------|---------------------------------------------------------------------|
| TAG | edge | tag for the locally built NGF images |
| PREFIX | nginx-gateway-fabric | prefix for the locally built NGF image |
| NGINX_PREFIX | nginx-gateway-fabric/nginx | prefix for the locally built NGINX image |
| NGINX_PLUS_PREFIX | nginx-gateway-fabric/nginx-plus | prefix for the locally built NGINX Plus image |
| PLUS_ENABLED | false | Flag to indicate if NGINX Plus should be enabled |
| PULL_POLICY | Never | NGF image pull policy |
| GW_API_VERSION | 1.0.0 | version of Gateway API resources to install |
| K8S_VERSION | latest | version of k8s that the tests are run on |
| GW_SERVICE_TYPE | NodePort | type of Service that should be created |
| GW_SVC_GKE_INTERNAL | false | specifies if the LoadBalancer should be a GKE internal service |
| GINKGO_LABEL | "" | name of the ginkgo label that will filter the tests to run |
| GINKGO_FLAGS | "" | other ginkgo flags to pass to the go test command |
| TELEMETRY_ENDPOINT | Set in the main Makefile | The endpoint to which telemetry reports are sent |
| TELEMETRY_ENDPOINT_INSECURE= | Set in the main Makefile | Controls whether TLS should be used when sending telemetry reports. |


## Step 1 - Create a Kubernetes cluster

Expand Down Expand Up @@ -137,6 +140,12 @@ Or, to build NGF with NGINX Plus enabled (NGINX Plus cert and key must exist in
make build-images-with-plus load-images-with-plus TAG=$(whoami)
```

For the telemetry test, which requires a OTel collector, build an image with the following variables set:

```makefile
TELEMETRY_ENDPOINT=otel-collector-opentelemetry-collector.collector.svc.cluster.local:4317 TELEMETRY_ENDPOINT_INSECURE=true
```

## Step 3 - Run the tests

### 3a - Run the functional tests locally
Expand All @@ -151,6 +160,15 @@ Or, to run the tests with NGINX Plus enabled:
make test TAG=$(whoami) PLUS_ENABLED=true
```

> The command above doesn't run the telemetry functional test, which requires a dedicated invocation because it uses a
> specially built image (see above) and it needs to deploy NGF differently from the rest of functional tests.
To run the telemetry test:

```makefile
make test TAG=$(whoami) GINKGO_LABEL=telemetry
```

### 3b - Run the tests on a GKE cluster from a GCP VM

This step only applies if you are running the NFR tests, or would like to run the functional tests on a GKE cluster from a GCP based VM.
Expand Down
93 changes: 89 additions & 4 deletions tests/framework/resourcemanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,27 +30,32 @@ import (
"strings"
"time"

apps "k8s.io/api/apps/v1"
core "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/client"
v1 "sigs.k8s.io/gateway-api/apis/v1"
)

// ResourceManager handles creating/updating/deleting Kubernetes resources.
type ResourceManager struct {
K8sClient client.Client
FS embed.FS
TimeoutConfig TimeoutConfig
K8sClient client.Client
ClientGoClient kubernetes.Interface // used when k8sClient is not enough
FS embed.FS
TimeoutConfig TimeoutConfig
}

// ClusterInfo holds the cluster metadata
type ClusterInfo struct {
K8sVersion string
K8sVersion string
// ID is the UID of kube-system namespace
ID string
MemoryPerNode string
GkeInstanceType string
GkeZone string
Expand Down Expand Up @@ -406,9 +411,89 @@ func (rm *ResourceManager) GetClusterInfo() (ClusterInfo, error) {
ci.GkeZone = node.Labels["topology.kubernetes.io/zone"]
}

var ns core.Namespace
key := types.NamespacedName{Name: "kube-system"}

if err := rm.K8sClient.Get(ctx, key, &ns); err != nil {
return *ci, fmt.Errorf("error getting kube-system namespace: %w", err)
}

ci.ID = string(ns.UID)

return *ci, nil
}

// GetPodNames returns the names of all Pods in the specified namespace that match the given labels.
func (rm *ResourceManager) GetPodNames(namespace string, labels client.MatchingLabels) ([]string, error) {
ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout)
defer cancel()

var podList core.PodList
if err := rm.K8sClient.List(
ctx,
&podList,
client.InNamespace(namespace),
labels,
); err != nil {
return nil, fmt.Errorf("error getting list of Pods: %w", err)
}

names := make([]string, 0, len(podList.Items))

for _, pod := range podList.Items {
names = append(names, pod.Name)
}

return names, nil
}

// GetPodLogs returns the logs from the specified Pod
func (rm *ResourceManager) GetPodLogs(namespace, name string, opts *core.PodLogOptions) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout)
defer cancel()

req := rm.ClientGoClient.CoreV1().Pods(namespace).GetLogs(name, opts)

logs, err := req.Stream(ctx)
if err != nil {
return "", fmt.Errorf("error getting logs from Pod: %w", err)
}
defer logs.Close()

buf := new(bytes.Buffer)
if _, err := buf.ReadFrom(logs); err != nil {
return "", fmt.Errorf("error reading logs from Pod: %w", err)
}

return buf.String(), nil
}

// GetNGFDeployment returns the NGF Deployment in the specified namespace with the given release name.
func (rm *ResourceManager) GetNGFDeployment(namespace, releaseName string) (*apps.Deployment, error) {
ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout)
defer cancel()

var deployments apps.DeploymentList

if err := rm.K8sClient.List(
ctx,
&deployments,
client.InNamespace(namespace),
client.MatchingLabels{
"app.kubernetes.io/instance": releaseName,
},
); err != nil {
return nil, fmt.Errorf("error getting list of Deployments: %w", err)
}

if len(deployments.Items) != 1 {
return nil, fmt.Errorf("expected 1 NGF Deployment, got %d", len(deployments.Items))
}

deployment := deployments.Items[0]
return &deployment, nil
}

// GetReadyNGFPodNames returns the name(s) of the NGF Pod(s).
func GetReadyNGFPodNames(
k8sClient client.Client,
Expand Down
31 changes: 31 additions & 0 deletions tests/suite/manifests/telemetry/collector-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
mode: deployment
replicaCount: 1
config:
exporters:
debug:
verbosity: detailed
logging: {}
extensions:
health_check: {}
memory_ballast:
size_in_percentage: 40
processors:
batch: {}
memory_limiter:
check_interval: 5s
limit_percentage: 80
spike_limit_percentage: 25
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
service:
extensions:
- health_check
pipelines:
traces:
exporters:
- debug
receivers:
- otlp
24 changes: 18 additions & 6 deletions tests/suite/system_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
k8sRuntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
ctlr "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
Expand Down Expand Up @@ -103,11 +104,15 @@ func setup(cfg setupConfig, extraInstallArgs ...string) {
k8sClient, err = client.New(k8sConfig, options)
Expect(err).ToNot(HaveOccurred())

clientGoClient, err := kubernetes.NewForConfig(k8sConfig)
Expect(err).ToNot(HaveOccurred())

timeoutConfig = framework.DefaultTimeoutConfig()
resourceManager = framework.ResourceManager{
K8sClient: k8sClient,
FS: manifests,
TimeoutConfig: timeoutConfig,
K8sClient: k8sClient,
ClientGoClient: clientGoClient,
FS: manifests,
TimeoutConfig: timeoutConfig,
}

clusterInfo, err = resourceManager.GetClusterInfo()
Expand Down Expand Up @@ -210,26 +215,33 @@ func teardown(relName string) {
)).To(Succeed())
}

var _ = BeforeSuite(func() {
func getDefaultSetupCfg() setupConfig {
_, file, _, _ := runtime.Caller(0)
fileDir := path.Join(path.Dir(file), "../")
basepath := filepath.Dir(fileDir)
localChartPath = filepath.Join(basepath, "deploy/helm-chart")

cfg := setupConfig{
return setupConfig{
releaseName: releaseName,
chartPath: localChartPath,
gwAPIVersion: *gatewayAPIVersion,
deploy: true,
}
}

var _ = BeforeSuite(func() {
cfg := getDefaultSetupCfg()

labelFilter := GinkgoLabelFilter()
cfg.nfr = isNFR(labelFilter)

// Skip deployment if:
// - running upgrade test (this test will deploy its own version)
// - running longevity teardown (deployment will already exist)
if strings.Contains(labelFilter, "upgrade") || strings.Contains(labelFilter, "longevity-teardown") {
// - running telemetry test (NGF will be deployed as part of the test)
if strings.Contains(labelFilter, "upgrade") ||
strings.Contains(labelFilter, "longevity-teardown") ||
strings.Contains(labelFilter, "telemetry") {
cfg.deploy = false
}

Expand Down
Loading

0 comments on commit f059cf0

Please sign in to comment.