From 935813634e2a94067b0f3f560df7f0cb261aa9fa Mon Sep 17 00:00:00 2001
From: nkinkade <kinkade@measurementlab.net>
Date: Thu, 24 Oct 2024 13:40:08 -0600
Subject: [PATCH] Allow multi-networkpolicy to deploy to mlab-oti with canary
 nodeSelector (#913)

* Increases memory request and limit for multi-networkpolicy

I noticed that some multi-networkpolicy containers were getting OOM-killed.
Looking at typical memory usage of the container across all staging nodes, the
usage seems to hover right around 125MB for most of them, with usage sometimes
spiking up to the limit of 150MB and getting OOM-killed. This commit increases
the request to 150MB and sets the limit to 250MB. Based on monitoring data,
these should be more reasonable settings.

Additionally, I changed the label "name" label on the pod to "workload", which
is more consistent with how we label other pods. I removed the "app" label,
since we don't use that convention and it was, I believe, unused.

* Add canary nodeSelector for multi-networkpolicy in mlab-oti

For sandbox and staging it's fine to deploy multi-networkpolicy everywhere, but
for production we want to do some canaries before deploying widely to be sure
that it works as intended and that it doesn't impact performance in any way.

Also, increase the memory limit to 500Mi. I have noticed that on physical
machines in sandbox multi-networkpolicy only uses around 30 or 40MB, but in
staging it uses more like 120MB. The only way I can account for this is that
staging has more nodes and perhaps the service somehow keeps track other nodes
for some reason.

* Allows multi-networkpolicy DS to deploy to mlab-oti

For now it has a canary nodeSelector, so it will only to deploy to nodes that
we manually label with that nodeSelector.

* Uses versioned image for multi-networkpolicy DaemonSet
---
 k8s/daemonsets/core/multi-networkpolicy.jsonnet | 13 ++++++-------
 system.jsonnet                                  |  4 +---
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/k8s/daemonsets/core/multi-networkpolicy.jsonnet b/k8s/daemonsets/core/multi-networkpolicy.jsonnet
index 8be08074..e98898ed 100644
--- a/k8s/daemonsets/core/multi-networkpolicy.jsonnet
+++ b/k8s/daemonsets/core/multi-networkpolicy.jsonnet
@@ -6,14 +6,13 @@
     namespace: 'kube-system',
     labels: {
       tier: 'node',
-      app: 'multi-networkpolicy',
       name: 'multi-networkpolicy',
     },
   },
   spec: {
     selector: {
       matchLabels: {
-        name: 'multi-networkpolicy',
+        workload: 'multi-networkpolicy',
       },
     },
     updateStrategy: {
@@ -23,14 +22,14 @@
       metadata: {
         labels: {
           tier: 'node',
-          app: 'multi-networkpolicy',
-          name: 'multi-networkpolicy',
+          workload: 'multi-networkpolicy'
         },
       },
       spec: {
         hostNetwork: true,
         nodeSelector: {
           'kubernetes.io/arch': 'amd64',
+          [if std.extVar('PROJECT_ID') == 'mlab-oti' then 'mlab/run']: 'multi-networkpolicy-canary',
         },
         tolerations: [
           {
@@ -42,7 +41,7 @@
         containers: [
           {
             name: 'multi-networkpolicy',
-            image: 'measurementlab/multi-networkpolicy-iptables:latest',
+            image: 'measurementlab/multi-networkpolicy-iptables:v1.0.0',
             imagePullPolicy: 'Always',
             command: [
               '/usr/bin/multi-networkpolicy-iptables',
@@ -66,11 +65,11 @@
             resources: {
               requests: {
                 cpu: '100m',
-                memory: '80Mi',
+                memory: '150Mi',
               },
               limits: {
                 cpu: '100m',
-                memory: '150Mi',
+                memory: '500Mi',
               },
             },
             securityContext: {
diff --git a/system.jsonnet b/system.jsonnet
index 15d580a7..ae7bcc2f 100644
--- a/system.jsonnet
+++ b/system.jsonnet
@@ -17,6 +17,7 @@
     import 'k8s/daemonsets/core/flannel.jsonnet',
     import 'k8s/daemonsets/core/host.jsonnet',
     import 'k8s/daemonsets/core/node-exporter.jsonnet',
+    import 'k8s/daemonsets/core/multi-networkpolicy.jsonnet',
   ] + std.flattenArrays([
     import 'k8s/daemonsets/experiments/msak.jsonnet',
     import 'k8s/daemonsets/experiments/ndt.jsonnet',
@@ -40,9 +41,6 @@
       // A internal Google service we are experimenting with only in sandbox
       // and staging.
       import 'k8s/daemonsets/core/flooefi.jsonnet',
-      // Keep this back from production until we can do more extensive testing
-      // in staging.
-      import 'k8s/daemonsets/core/multi-networkpolicy.jsonnet',
     ] else []
   ) + [
     // Deployments