From f9c8773e15163c6e772bf9672041ff6a995f50e0 Mon Sep 17 00:00:00 2001 From: Shiva Krishna Merla Date: Thu, 24 Oct 2024 06:43:57 -0700 Subject: [PATCH] Allow users to specify runtimeclass name for NIM deployments (#194) * Allow users to specify runtimeclass name for NIM deployments Signed-off-by: Shiva Krishna, Merla * Avoid runtimeclass name for caching as model can be downloaded on a non-gpu node Signed-off-by: Shiva Krishna, Merla --------- Signed-off-by: Shiva Krishna, Merla --- api/apps/v1alpha1/nemo_guardrails_types.go | 18 ++++++++++++--- api/apps/v1alpha1/nimservice_types.go | 22 ++++++++++++++----- .../apps.nvidia.com_nemoguardrails.yaml | 2 ++ .../apps.nvidia.com_nimpipelines.yaml | 2 ++ .../apps.nvidia.com_nimservices.yaml | 2 ++ .../bases/apps.nvidia.com_nemoguardrails.yaml | 2 ++ .../bases/apps.nvidia.com_nimpipelines.yaml | 2 ++ .../bases/apps.nvidia.com_nimservices.yaml | 2 ++ .../crds/apps.nvidia.com_nemoguardrails.yaml | 2 ++ .../crds/apps.nvidia.com_nimpipelines.yaml | 2 ++ .../crds/apps.nvidia.com_nimservices.yaml | 2 ++ .../platform/standalone/nimservice_test.go | 2 ++ internal/render/types/types.go | 2 ++ manifests/deployment.yaml | 1 + manifests/statefulset.yaml | 1 + 15 files changed, 56 insertions(+), 8 deletions(-) diff --git a/api/apps/v1alpha1/nemo_guardrails_types.go b/api/apps/v1alpha1/nemo_guardrails_types.go index fd513e33..c99383c8 100644 --- a/api/apps/v1alpha1/nemo_guardrails_types.go +++ b/api/apps/v1alpha1/nemo_guardrails_types.go @@ -75,9 +75,10 @@ type NemoGuardrailSpec struct { Metrics Metrics `json:"metrics,omitempty"` // +kubebuilder:validation:Minimum=1 // +kubebuilder:default:=1 - Replicas int `json:"replicas,omitempty"` - UserID *int64 `json:"userID,omitempty"` - GroupID *int64 `json:"groupID,omitempty"` + Replicas int `json:"replicas,omitempty"` + UserID *int64 `json:"userID,omitempty"` + GroupID *int64 `json:"groupID,omitempty"` + RuntimeClass string `json:"runtimeClass,omitempty"` } type GuardrailConfig struct { @@ -414,6 +415,11 @@ func (n *NemoGuardrail) GetServiceAccountName() string { return n.Name } +// GetRuntimeClass return the runtime class name for the NemoGuardrail deployment +func (n *NemoGuardrail) GetRuntimeClass() string { + return n.Spec.RuntimeClass +} + // GetHPA returns the HPA spec for the NemoGuardrail deployment func (n *NemoGuardrail) GetHPA() HorizontalPodAutoscalerSpec { return n.Spec.Scale.HPA @@ -537,6 +543,9 @@ func (n *NemoGuardrail) GetDeploymentParams() *rendertypes.DeploymentParams { // Set service account params.ServiceAccountName = n.GetServiceAccountName() + + // Set runtime class + params.RuntimeClassName = n.GetRuntimeClass() return params } @@ -580,6 +589,9 @@ func (n *NemoGuardrail) GetStatefulSetParams() *rendertypes.StatefulSetParams { // Set service account params.ServiceAccountName = n.GetServiceAccountName() + + // Set runtime class + params.RuntimeClassName = n.GetRuntimeClass() return params } diff --git a/api/apps/v1alpha1/nimservice_types.go b/api/apps/v1alpha1/nimservice_types.go index 5a64aa8a..c7f65d66 100644 --- a/api/apps/v1alpha1/nimservice_types.go +++ b/api/apps/v1alpha1/nimservice_types.go @@ -75,9 +75,10 @@ type NIMServiceSpec struct { Metrics Metrics `json:"metrics,omitempty"` // +kubebuilder:validation:Minimum=1 // +kubebuilder:default:=1 - Replicas int `json:"replicas,omitempty"` - UserID *int64 `json:"userID,omitempty"` - GroupID *int64 `json:"groupID,omitempty"` + Replicas int `json:"replicas,omitempty"` + UserID *int64 `json:"userID,omitempty"` + GroupID *int64 `json:"groupID,omitempty"` + RuntimeClass string `json:"runtimeClass,omitempty"` } // NIMCacheVolSpec defines the spec to use NIMCache volume @@ -438,6 +439,11 @@ func (n *NIMService) GetServiceAccountName() string { return n.Name } +// GetRuntimeClass return the runtime class name for the NIMService deployment +func (n *NIMService) GetRuntimeClass() string { + return n.Spec.RuntimeClass +} + // GetNIMCacheName returns the NIMCache name to use for the NIMService deployment func (n *NIMService) GetNIMCacheName() string { return n.Spec.Storage.NIMCache.Name @@ -510,10 +516,9 @@ func (n *NIMService) GetUserID() *int64 { // GetGroupID returns the group ID for the NIMService deployment func (n *NIMService) GetGroupID() *int64 { return n.Spec.GroupID - } -// GetGroupID returns the group ID for the NIMService deployment +// GetStorageReadOnly returns true if the volume have to be mounted as read-only for the NIMService deployment func (n *NIMService) GetStorageReadOnly() bool { if n.Spec.Storage.ReadOnly == nil { return false @@ -579,6 +584,10 @@ func (n *NIMService) GetDeploymentParams() *rendertypes.DeploymentParams { // Set service account params.ServiceAccountName = n.GetServiceAccountName() + + // Set runtime class + params.RuntimeClassName = n.GetRuntimeClass() + return params } @@ -622,6 +631,9 @@ func (n *NIMService) GetStatefulSetParams() *rendertypes.StatefulSetParams { // Set service account params.ServiceAccountName = n.GetServiceAccountName() + + // Set runtime class + params.RuntimeClassName = n.GetRuntimeClass() return params } diff --git a/bundle/manifests/apps.nvidia.com_nemoguardrails.yaml b/bundle/manifests/apps.nvidia.com_nemoguardrails.yaml index 5516e1f9..085924cd 100644 --- a/bundle/manifests/apps.nvidia.com_nemoguardrails.yaml +++ b/bundle/manifests/apps.nvidia.com_nemoguardrails.yaml @@ -1318,6 +1318,8 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object + runtimeClass: + type: string scale: description: Autoscaling defines attributes to automatically scale the service based on metrics diff --git a/bundle/manifests/apps.nvidia.com_nimpipelines.yaml b/bundle/manifests/apps.nvidia.com_nimpipelines.yaml index 76fb5b49..da567ad1 100644 --- a/bundle/manifests/apps.nvidia.com_nimpipelines.yaml +++ b/bundle/manifests/apps.nvidia.com_nimpipelines.yaml @@ -1348,6 +1348,8 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object + runtimeClass: + type: string scale: description: Autoscaling defines attributes to automatically scale the service based on metrics diff --git a/bundle/manifests/apps.nvidia.com_nimservices.yaml b/bundle/manifests/apps.nvidia.com_nimservices.yaml index 78643707..a5d7eb0c 100644 --- a/bundle/manifests/apps.nvidia.com_nimservices.yaml +++ b/bundle/manifests/apps.nvidia.com_nimservices.yaml @@ -1286,6 +1286,8 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object + runtimeClass: + type: string scale: description: Autoscaling defines attributes to automatically scale the service based on metrics diff --git a/config/crd/bases/apps.nvidia.com_nemoguardrails.yaml b/config/crd/bases/apps.nvidia.com_nemoguardrails.yaml index 5516e1f9..085924cd 100644 --- a/config/crd/bases/apps.nvidia.com_nemoguardrails.yaml +++ b/config/crd/bases/apps.nvidia.com_nemoguardrails.yaml @@ -1318,6 +1318,8 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object + runtimeClass: + type: string scale: description: Autoscaling defines attributes to automatically scale the service based on metrics diff --git a/config/crd/bases/apps.nvidia.com_nimpipelines.yaml b/config/crd/bases/apps.nvidia.com_nimpipelines.yaml index 76fb5b49..da567ad1 100644 --- a/config/crd/bases/apps.nvidia.com_nimpipelines.yaml +++ b/config/crd/bases/apps.nvidia.com_nimpipelines.yaml @@ -1348,6 +1348,8 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object + runtimeClass: + type: string scale: description: Autoscaling defines attributes to automatically scale the service based on metrics diff --git a/config/crd/bases/apps.nvidia.com_nimservices.yaml b/config/crd/bases/apps.nvidia.com_nimservices.yaml index 78643707..a5d7eb0c 100644 --- a/config/crd/bases/apps.nvidia.com_nimservices.yaml +++ b/config/crd/bases/apps.nvidia.com_nimservices.yaml @@ -1286,6 +1286,8 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object + runtimeClass: + type: string scale: description: Autoscaling defines attributes to automatically scale the service based on metrics diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoguardrails.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoguardrails.yaml index 5516e1f9..085924cd 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoguardrails.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoguardrails.yaml @@ -1318,6 +1318,8 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object + runtimeClass: + type: string scale: description: Autoscaling defines attributes to automatically scale the service based on metrics diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml index 76fb5b49..da567ad1 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml @@ -1348,6 +1348,8 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object + runtimeClass: + type: string scale: description: Autoscaling defines attributes to automatically scale the service based on metrics diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml index 78643707..a5d7eb0c 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml @@ -1286,6 +1286,8 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object + runtimeClass: + type: string scale: description: Autoscaling defines attributes to automatically scale the service based on metrics diff --git a/internal/controller/platform/standalone/nimservice_test.go b/internal/controller/platform/standalone/nimservice_test.go index 6bcb0364..ff831aae 100644 --- a/internal/controller/platform/standalone/nimservice_test.go +++ b/internal/controller/platform/standalone/nimservice_test.go @@ -281,6 +281,7 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() { }, }, }, + RuntimeClass: "nvidia", }, Status: appsv1alpha1.NIMServiceStatus{ State: conditions.NotReady, @@ -459,6 +460,7 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() { Expect(deployment.Spec.Template.Spec.Containers[0].ReadinessProbe).To(Equal(nimService.Spec.ReadinessProbe.Probe)) Expect(deployment.Spec.Template.Spec.Containers[0].LivenessProbe).To(Equal(nimService.Spec.LivenessProbe.Probe)) Expect(deployment.Spec.Template.Spec.Containers[0].StartupProbe).To(Equal(nimService.Spec.StartupProbe.Probe)) + Expect(*deployment.Spec.Template.Spec.RuntimeClassName).To(Equal(nimService.Spec.RuntimeClass)) sortEnvVars(deployment.Spec.Template.Spec.Containers[0].Env) sortEnvVars(nimService.Spec.Env) diff --git a/internal/render/types/types.go b/internal/render/types/types.go index aa97503b..04e24f89 100644 --- a/internal/render/types/types.go +++ b/internal/render/types/types.go @@ -79,6 +79,7 @@ type DeploymentParams struct { NIMCachePVC string UserID *int64 GroupID *int64 + RuntimeClassName string } // StatefulSetParams holds the parameters for rendering a StatefulSet template @@ -108,6 +109,7 @@ type StatefulSetParams struct { ReadinessProbe *corev1.Probe StartupProbe *corev1.Probe NIMCachePVC string + RuntimeClassName string } // ServiceParams holds the parameters for rendering a Service template diff --git a/manifests/deployment.yaml b/manifests/deployment.yaml index 5f107031..47a48690 100644 --- a/manifests/deployment.yaml +++ b/manifests/deployment.yaml @@ -36,6 +36,7 @@ spec: {{- end }} spec: serviceAccountName: {{ .ServiceAccountName }} + runtimeClassName: {{ .RuntimeClassName }} containers: - name: {{ .ContainerName }} image: {{ .Image }} diff --git a/manifests/statefulset.yaml b/manifests/statefulset.yaml index f7538b77..d57dd39e 100644 --- a/manifests/statefulset.yaml +++ b/manifests/statefulset.yaml @@ -23,6 +23,7 @@ spec: labels: app: {{ .Name }} spec: + runtimeClassName: {{ .RuntimeClassName }} containers: - name: {{ .ContainerName }} image: {{ .Image }}