diff --git a/api/apps/v1alpha1/nimcache_types.go b/api/apps/v1alpha1/nimcache_types.go index da9eeb65..654c8f9d 100644 --- a/api/apps/v1alpha1/nimcache_types.go +++ b/api/apps/v1alpha1/nimcache_types.go @@ -128,6 +128,7 @@ type PersistentVolumeClaim struct { Size string `json:"size,omitempty"` // VolumeAccessMode is the volume access mode of the PVC VolumeAccessMode corev1.PersistentVolumeAccessMode `json:"volumeAccessMode,omitempty"` + SubPath string `json:"subPath,omitempty"` } // NIMCacheStatus defines the observed state of NIMCache diff --git a/api/apps/v1alpha1/nimservice_types.go b/api/apps/v1alpha1/nimservice_types.go index 9f44aaec..772fceca 100644 --- a/api/apps/v1alpha1/nimservice_types.go +++ b/api/apps/v1alpha1/nimservice_types.go @@ -376,7 +376,7 @@ func (n *NIMService) GetVolumesMounts() []corev1.Volume { } // GetVolumes returns volumes for the NIMService container -func (n *NIMService) GetVolumes(modelPVC string) []corev1.Volume { +func (n *NIMService) GetVolumes(modelPVC PersistentVolumeClaim) []corev1.Volume { // TODO: Fetch actual PVC name from associated NIMCache obj volumes := []corev1.Volume{ { @@ -391,7 +391,7 @@ func (n *NIMService) GetVolumes(modelPVC string) []corev1.Volume { Name: "model-store", VolumeSource: corev1.VolumeSource{ PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ - ClaimName: modelPVC, + ClaimName: *modelPVC.Name, }, }, }, @@ -401,11 +401,12 @@ func (n *NIMService) GetVolumes(modelPVC string) []corev1.Volume { } // GetVolumeMounts returns volumes for the NIMService container -func (n *NIMService) GetVolumeMounts() []corev1.VolumeMount { +func (n *NIMService) GetVolumeMounts(modelPVC PersistentVolumeClaim) []corev1.VolumeMount { volumeMounts := []corev1.VolumeMount{ { Name: "model-store", MountPath: "/model-store", + SubPath: modelPVC.SubPath, }, { Name: "dshm", @@ -432,8 +433,8 @@ func (n *NIMService) GetNIMCacheProfile() string { } // GetExternalPVC returns the external PVC name to use for the NIMService deployment -func (n *NIMService) GetExternalPVC() *string { - return n.Spec.Storage.PVC.Name +func (n *NIMService) GetExternalPVC() *PersistentVolumeClaim { + return &n.Spec.Storage.PVC } // GetHPASpec returns the HPA spec for the NIMService deployment diff --git a/bundle/manifests/apps.nvidia.com_nimcaches.yaml b/bundle/manifests/apps.nvidia.com_nimcaches.yaml index 08cf8e68..31d5a49d 100644 --- a/bundle/manifests/apps.nvidia.com_nimcaches.yaml +++ b/bundle/manifests/apps.nvidia.com_nimcaches.yaml @@ -220,6 +220,8 @@ spec: description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created. type: string + subPath: + type: string volumeAccessMode: description: VolumeAccessMode is the volume access mode of the PVC diff --git a/bundle/manifests/apps.nvidia.com_nimpipelines.yaml b/bundle/manifests/apps.nvidia.com_nimpipelines.yaml index 6a4eb6c4..f9b9be3d 100644 --- a/bundle/manifests/apps.nvidia.com_nimpipelines.yaml +++ b/bundle/manifests/apps.nvidia.com_nimpipelines.yaml @@ -2138,6 +2138,8 @@ spec: description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created. type: string + subPath: + type: string volumeAccessMode: description: VolumeAccessMode is the volume access mode of the PVC diff --git a/bundle/manifests/apps.nvidia.com_nimservices.yaml b/bundle/manifests/apps.nvidia.com_nimservices.yaml index c47385ea..f2d57df9 100644 --- a/bundle/manifests/apps.nvidia.com_nimservices.yaml +++ b/bundle/manifests/apps.nvidia.com_nimservices.yaml @@ -2084,6 +2084,8 @@ spec: description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created. type: string + subPath: + type: string volumeAccessMode: description: VolumeAccessMode is the volume access mode of the PVC diff --git a/config/crd/bases/apps.nvidia.com_nimcaches.yaml b/config/crd/bases/apps.nvidia.com_nimcaches.yaml index 08cf8e68..31d5a49d 100644 --- a/config/crd/bases/apps.nvidia.com_nimcaches.yaml +++ b/config/crd/bases/apps.nvidia.com_nimcaches.yaml @@ -220,6 +220,8 @@ spec: description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created. type: string + subPath: + type: string volumeAccessMode: description: VolumeAccessMode is the volume access mode of the PVC diff --git a/config/crd/bases/apps.nvidia.com_nimpipelines.yaml b/config/crd/bases/apps.nvidia.com_nimpipelines.yaml index 6a4eb6c4..f9b9be3d 100644 --- a/config/crd/bases/apps.nvidia.com_nimpipelines.yaml +++ b/config/crd/bases/apps.nvidia.com_nimpipelines.yaml @@ -2138,6 +2138,8 @@ spec: description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created. type: string + subPath: + type: string volumeAccessMode: description: VolumeAccessMode is the volume access mode of the PVC diff --git a/config/crd/bases/apps.nvidia.com_nimservices.yaml b/config/crd/bases/apps.nvidia.com_nimservices.yaml index c47385ea..f2d57df9 100644 --- a/config/crd/bases/apps.nvidia.com_nimservices.yaml +++ b/config/crd/bases/apps.nvidia.com_nimservices.yaml @@ -2084,6 +2084,8 @@ spec: description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created. type: string + subPath: + type: string volumeAccessMode: description: VolumeAccessMode is the volume access mode of the PVC diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml index e8e2e9b5..31d5a49d 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml @@ -220,6 +220,8 @@ spec: description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created. type: string + subPath: + type: string volumeAccessMode: description: VolumeAccessMode is the volume access mode of the PVC @@ -266,7 +268,6 @@ spec: type: object type: array required: - - resources - source - storage type: object diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml index 6a4eb6c4..f9b9be3d 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimpipelines.yaml @@ -2138,6 +2138,8 @@ spec: description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created. type: string + subPath: + type: string volumeAccessMode: description: VolumeAccessMode is the volume access mode of the PVC diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml index c47385ea..f2d57df9 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimservices.yaml @@ -2084,6 +2084,8 @@ spec: description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created. type: string + subPath: + type: string volumeAccessMode: description: VolumeAccessMode is the volume access mode of the PVC diff --git a/internal/controller/nimcache_controller.go b/internal/controller/nimcache_controller.go index 099b9763..6c01da7f 100644 --- a/internal/controller/nimcache_controller.go +++ b/internal/controller/nimcache_controller.go @@ -735,6 +735,7 @@ func constructJob(nimCache *appsv1alpha1.NIMCache) (*batchv1.Job, error) { { Name: "nim-cache-volume", MountPath: "/output", + SubPath: nimCache.Spec.Storage.PVC.SubPath, }, }, SecurityContext: &corev1.SecurityContext{ @@ -779,6 +780,7 @@ func constructJob(nimCache *appsv1alpha1.NIMCache) (*batchv1.Job, error) { { Name: "nim-cache-volume", MountPath: "/model-store", + SubPath: nimCache.Spec.Storage.PVC.SubPath, }, }, Resources: corev1.ResourceRequirements{ diff --git a/internal/controller/platform/standalone/nimservice.go b/internal/controller/platform/standalone/nimservice.go index 5595ed24..79704d1a 100644 --- a/internal/controller/platform/standalone/nimservice.go +++ b/internal/controller/platform/standalone/nimservice.go @@ -133,7 +133,7 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi } deploymentParams := nimService.GetDeploymentParams() - modelPVC := "" + var modelPVC *appsv1alpha1.PersistentVolumeClaim modelProfile := "" // If external PVC is provided, use that as model-store @@ -152,7 +152,7 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi modelProfile = profile } } else if externalPVC := nimService.GetExternalPVC(); externalPVC != nil { - modelPVC = *externalPVC + modelPVC = externalPVC } else if nimService.Spec.Storage.PVC.Create != nil && *nimService.Spec.Storage.PVC.Create { modelPVC, err = r.reconcilePVC(ctx, nimService) if err != nil { @@ -164,10 +164,9 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi logger.Error(err, "failed to determine PVC for model-store") return ctrl.Result{}, err } - // Setup volume mounts with model store - deploymentParams.Volumes = nimService.GetVolumes(modelPVC) - deploymentParams.VolumeMounts = nimService.GetVolumeMounts() + deploymentParams.Volumes = nimService.GetVolumes(*modelPVC) + deploymentParams.VolumeMounts = nimService.GetVolumeMounts(*modelPVC) // Setup env for explicit override profile is specified if modelProfile != "" { @@ -313,42 +312,43 @@ func (r *NIMServiceReconciler) syncResource(ctx context.Context, obj client.Obje } // getNIMCachePVC returns PVC backing the NIM cache instance -func (r *NIMServiceReconciler) getNIMCachePVC(ctx context.Context, nimService *appsv1alpha1.NIMService) (string, error) { +func (r *NIMServiceReconciler) getNIMCachePVC(ctx context.Context, nimService *appsv1alpha1.NIMService) (*appsv1alpha1.PersistentVolumeClaim, error) { logger := log.FromContext(ctx) if nimService.GetNIMCacheName() == "" { // NIM cache PVC is not used - return "", nil + return nil, nil } - // Lookup NIMCache instance in the same namespace as the NIMService instance nimCache := &appsv1alpha1.NIMCache{} if err := r.Get(ctx, types.NamespacedName{Name: nimService.GetNIMCacheName(), Namespace: nimService.Namespace}, nimCache); err != nil { logger.Error(err, "unable to fetch nimcache", "nimcache", nimService.GetNIMCacheName(), "nimservice", nimService.Name) - return "", err + return nil, err } - // Get the status of NIMCache if nimCache.Status.State != appsv1alpha1.NimCacheStatusReady { - return "", fmt.Errorf("nimcache %s is not ready, nimservice %s", nimCache.GetName(), nimService.GetName()) + return nil, fmt.Errorf("nimcache %s is not ready, nimservice %s", nimCache.GetName(), nimService.GetName()) } if nimCache.Status.PVC == "" { - return "", fmt.Errorf("missing PVC for the nimcache instance %s, nimservice %s", nimCache.GetName(), nimService.GetName()) + return nil, fmt.Errorf("missing PVC for the nimcache instance %s, nimservice %s", nimCache.GetName(), nimService.GetName()) } + if nimCache.Spec.Storage.PVC.Name == nil { + nimCache.Spec.Storage.PVC.Name = &nimCache.Status.PVC + } // Get the underlying PVC for the NIMCache instance - return nimCache.Status.PVC, nil + return &nimCache.Spec.Storage.PVC, nil } -func (r *NIMServiceReconciler) reconcilePVC(ctx context.Context, nimService *appsv1alpha1.NIMService) (string, error) { +func (r *NIMServiceReconciler) reconcilePVC(ctx context.Context, nimService *appsv1alpha1.NIMService) (*appsv1alpha1.PersistentVolumeClaim, error) { logger := r.GetLogger() pvcName := nimService.GetPVCName(nimService.Spec.Storage.PVC) pvcNamespacedName := types.NamespacedName{Name: pvcName, Namespace: nimService.GetNamespace()} pvc := &corev1.PersistentVolumeClaim{} err := r.Get(ctx, pvcNamespacedName, pvc) if err != nil && client.IgnoreNotFound(err) != nil { - return "", err + return nil, err } // If PVC does not exist, create a new one if creation flag is enabled @@ -357,15 +357,15 @@ func (r *NIMServiceReconciler) reconcilePVC(ctx context.Context, nimService *app pvc, err = shared.ConstructPVC(nimService.Spec.Storage.PVC, metav1.ObjectMeta{Name: pvcName, Namespace: nimService.GetNamespace()}) if err != nil { logger.Error(err, "Failed to construct pvc", "name", pvc.Name) - return "", err + return nil, err } if err := controllerutil.SetControllerReference(nimService, pvc, r.GetScheme()); err != nil { - return "", err + return nil, err } err = r.Create(ctx, pvc) if err != nil { logger.Error(err, "Failed to create pvc", "name", pvc.Name) - return "", err + return nil, err } logger.Info("Created PVC for NIM Service", "pvc", pvcName) @@ -373,12 +373,12 @@ func (r *NIMServiceReconciler) reconcilePVC(ctx context.Context, nimService *app nimService.Status.State = appsv1alpha1.NimCacheStatusPVCCreated if err := r.Status().Update(ctx, nimService); err != nil { logger.Error(err, "Failed to update status", "NIMService", nimService.Name) - return "", err + return nil, err } } else { logger.Error(err, "PVC doesn't exist and auto-creation is not enabled", "name", pvcNamespacedName) - return "", err + return nil, err } } - return pvcName, nil + return &nimService.Spec.Storage.PVC, nil } diff --git a/internal/controller/platform/standalone/nimservice_test.go b/internal/controller/platform/standalone/nimservice_test.go index f55f376f..1796156f 100644 --- a/internal/controller/platform/standalone/nimservice_test.go +++ b/internal/controller/platform/standalone/nimservice_test.go @@ -17,7 +17,9 @@ limitations under the License. package standalone import ( + "bytes" "context" + "log" "path" "sort" "strings" @@ -111,6 +113,9 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() { Name: &pvcName, }, }, + NIMCache: appsv1alpha1.NIMCacheVolSpec{ + Name: "test-nimcache", + }, Env: []corev1.EnvVar{ { Name: "custom-env", @@ -230,12 +235,42 @@ var _ = Describe("NIMServiceReconciler for a standalone platform", func() { { Name: "model-store", MountPath: "/model-store", + SubPath: "subPath", }, { Name: "dshm", MountPath: "/dev/shm", }, } + NIMCache := &appsv1alpha1.NIMCache{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-nimcache", + Namespace: "default", + }, + Spec: appsv1alpha1.NIMCacheSpec{ + Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "test-container", PullSecret: "my-secret"}}, + Storage: appsv1alpha1.Storage{PVC: appsv1alpha1.PersistentVolumeClaim{Create: ptr.To[bool](true), StorageClass: "standard", Size: "1Gi", SubPath: "subPath"}}, + }, + Status: appsv1alpha1.NIMCacheStatus{ + State: appsv1alpha1.NimCacheStatusReady, + PVC: pvcName, + }, + } + _ = client.Create(context.TODO(), NIMCache) + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: "default", + }, + } + _ = client.Create(context.TODO(), pvc) + + var buf bytes.Buffer + log.SetOutput(&buf) + defer func() { + log.SetOutput(os.Stderr) + }() + }) AfterEach(func() { diff --git a/internal/render/render_test.go b/internal/render/render_test.go index c14fa073..b40d62d3 100644 --- a/internal/render/render_test.go +++ b/internal/render/render_test.go @@ -140,6 +140,7 @@ var _ = Describe("K8s Resources Rendering", func() { { Name: "test-volume", MountPath: "/data", + SubPath: "subPath", }, }, Env: []corev1.EnvVar{ @@ -179,6 +180,9 @@ var _ = Describe("K8s Resources Rendering", func() { Expect(*deployment.Spec.Replicas).To(Equal(int32(3))) Expect(deployment.Spec.Template.Spec.Containers[0].Name).To(Equal("test-container")) Expect(deployment.Spec.Template.Spec.Containers[0].Image).To(Equal("nim-llm:latest")) + Expect(deployment.Spec.Template.Spec.Containers[0].VolumeMounts[0].Name).To(Equal("test-volume")) + Expect(deployment.Spec.Template.Spec.Containers[0].VolumeMounts[0].MountPath).To(Equal("/data")) + Expect(deployment.Spec.Template.Spec.Containers[0].VolumeMounts[0].SubPath).To(Equal("subPath")) }) It("should render StatefulSet template correctly", func() { diff --git a/manifests/deployment.yaml b/manifests/deployment.yaml index 0dafa9c7..c27b69aa 100644 --- a/manifests/deployment.yaml +++ b/manifests/deployment.yaml @@ -36,6 +36,7 @@ spec: {{- range .VolumeMounts }} - name: {{ .Name }} mountPath: {{ .MountPath }} + subPath: {{ .SubPath }} {{- end }} env: {{- range .Env }}