|
4 | 4 | package gpusharing |
5 | 5 |
|
6 | 6 | import ( |
| 7 | + "context" |
7 | 8 | "testing" |
8 | 9 |
|
| 10 | + "github.com/stretchr/testify/assert" |
9 | 11 | v1 "k8s.io/api/core/v1" |
10 | 12 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 13 | + "k8s.io/apimachinery/pkg/runtime" |
| 14 | + "k8s.io/apimachinery/pkg/types" |
| 15 | + "sigs.k8s.io/controller-runtime/pkg/client" |
| 16 | + "sigs.k8s.io/controller-runtime/pkg/client/fake" |
11 | 17 |
|
| 18 | + "github.com/NVIDIA/KAI-scheduler/pkg/apis/scheduling/v1alpha2" |
12 | 19 | "github.com/NVIDIA/KAI-scheduler/pkg/binder/common" |
13 | 20 | "github.com/NVIDIA/KAI-scheduler/pkg/binder/common/gpusharingconfigmap" |
14 | 21 | "github.com/NVIDIA/KAI-scheduler/pkg/common/constants" |
@@ -249,3 +256,261 @@ func TestGetFractionContainerRef(t *testing.T) { |
249 | 256 | }) |
250 | 257 | } |
251 | 258 | } |
| 259 | + |
| 260 | +func TestGPUSharingRollback(t *testing.T) { |
| 261 | + tests := []struct { |
| 262 | + name string |
| 263 | + pod *v1.Pod |
| 264 | + bindRequest *v1alpha2.BindRequest |
| 265 | + existingConfigMaps []*v1.ConfigMap |
| 266 | + expectError bool |
| 267 | + expectConfigMapsDeleted bool |
| 268 | + expectedRemainingCMCount int |
| 269 | + }{ |
| 270 | + { |
| 271 | + name: "rollback skipped for non-shared GPU allocation", |
| 272 | + pod: &v1.Pod{ |
| 273 | + ObjectMeta: metav1.ObjectMeta{ |
| 274 | + Name: "test-pod", |
| 275 | + Namespace: "test-ns", |
| 276 | + }, |
| 277 | + Spec: v1.PodSpec{ |
| 278 | + Containers: []v1.Container{{Name: "container-0"}}, |
| 279 | + }, |
| 280 | + }, |
| 281 | + bindRequest: &v1alpha2.BindRequest{ |
| 282 | + Spec: v1alpha2.BindRequestSpec{ |
| 283 | + ReceivedResourceType: "regular", // Not a fraction |
| 284 | + }, |
| 285 | + }, |
| 286 | + existingConfigMaps: nil, |
| 287 | + expectError: false, |
| 288 | + expectConfigMapsDeleted: false, |
| 289 | + expectedRemainingCMCount: 0, |
| 290 | + }, |
| 291 | + { |
| 292 | + name: "rollback skipped when configmap annotation not set", |
| 293 | + pod: &v1.Pod{ |
| 294 | + ObjectMeta: metav1.ObjectMeta{ |
| 295 | + Name: "test-pod", |
| 296 | + Namespace: "test-ns", |
| 297 | + // No runai/shared-gpu-configmap annotation |
| 298 | + }, |
| 299 | + Spec: v1.PodSpec{ |
| 300 | + Containers: []v1.Container{{Name: "container-0"}}, |
| 301 | + }, |
| 302 | + }, |
| 303 | + bindRequest: &v1alpha2.BindRequest{ |
| 304 | + Spec: v1alpha2.BindRequestSpec{ |
| 305 | + ReceivedResourceType: common.ReceivedTypeFraction, |
| 306 | + }, |
| 307 | + }, |
| 308 | + existingConfigMaps: nil, |
| 309 | + expectError: false, |
| 310 | + expectConfigMapsDeleted: false, |
| 311 | + expectedRemainingCMCount: 0, |
| 312 | + }, |
| 313 | + { |
| 314 | + name: "rollback successfully deletes both configmaps", |
| 315 | + pod: &v1.Pod{ |
| 316 | + ObjectMeta: metav1.ObjectMeta{ |
| 317 | + Name: "test-pod", |
| 318 | + Namespace: "test-ns", |
| 319 | + UID: "test-pod-uid", |
| 320 | + Annotations: map[string]string{ |
| 321 | + "runai/shared-gpu-configmap": "test-pod-abc1234-shared-gpu", |
| 322 | + }, |
| 323 | + }, |
| 324 | + Spec: v1.PodSpec{ |
| 325 | + Containers: []v1.Container{{Name: "container-0"}}, |
| 326 | + }, |
| 327 | + }, |
| 328 | + bindRequest: &v1alpha2.BindRequest{ |
| 329 | + Spec: v1alpha2.BindRequestSpec{ |
| 330 | + ReceivedResourceType: common.ReceivedTypeFraction, |
| 331 | + }, |
| 332 | + }, |
| 333 | + existingConfigMaps: []*v1.ConfigMap{ |
| 334 | + { |
| 335 | + ObjectMeta: metav1.ObjectMeta{ |
| 336 | + Name: "test-pod-abc1234-shared-gpu-0", |
| 337 | + Namespace: "test-ns", |
| 338 | + }, |
| 339 | + }, |
| 340 | + { |
| 341 | + ObjectMeta: metav1.ObjectMeta{ |
| 342 | + Name: "test-pod-abc1234-shared-gpu-0-evar", |
| 343 | + Namespace: "test-ns", |
| 344 | + }, |
| 345 | + }, |
| 346 | + }, |
| 347 | + expectError: false, |
| 348 | + expectConfigMapsDeleted: true, |
| 349 | + expectedRemainingCMCount: 0, |
| 350 | + }, |
| 351 | + { |
| 352 | + name: "rollback succeeds when configmaps already deleted (idempotent)", |
| 353 | + pod: &v1.Pod{ |
| 354 | + ObjectMeta: metav1.ObjectMeta{ |
| 355 | + Name: "test-pod", |
| 356 | + Namespace: "test-ns", |
| 357 | + UID: "test-pod-uid", |
| 358 | + Annotations: map[string]string{ |
| 359 | + "runai/shared-gpu-configmap": "test-pod-abc1234-shared-gpu", |
| 360 | + }, |
| 361 | + }, |
| 362 | + Spec: v1.PodSpec{ |
| 363 | + Containers: []v1.Container{{Name: "container-0"}}, |
| 364 | + }, |
| 365 | + }, |
| 366 | + bindRequest: &v1alpha2.BindRequest{ |
| 367 | + Spec: v1alpha2.BindRequestSpec{ |
| 368 | + ReceivedResourceType: common.ReceivedTypeFraction, |
| 369 | + }, |
| 370 | + }, |
| 371 | + existingConfigMaps: nil, // ConfigMaps don't exist |
| 372 | + expectError: false, |
| 373 | + expectConfigMapsDeleted: true, |
| 374 | + expectedRemainingCMCount: 0, |
| 375 | + }, |
| 376 | + { |
| 377 | + name: "rollback for init container", |
| 378 | + pod: &v1.Pod{ |
| 379 | + ObjectMeta: metav1.ObjectMeta{ |
| 380 | + Name: "test-pod", |
| 381 | + Namespace: "test-ns", |
| 382 | + UID: "test-pod-uid", |
| 383 | + Annotations: map[string]string{ |
| 384 | + "runai/shared-gpu-configmap": "test-pod-abc1234-shared-gpu", |
| 385 | + constants.GpuFractionContainerName: "init-container", |
| 386 | + }, |
| 387 | + }, |
| 388 | + Spec: v1.PodSpec{ |
| 389 | + InitContainers: []v1.Container{{Name: "init-container"}}, |
| 390 | + Containers: []v1.Container{{Name: "container-0"}}, |
| 391 | + }, |
| 392 | + }, |
| 393 | + bindRequest: &v1alpha2.BindRequest{ |
| 394 | + Spec: v1alpha2.BindRequestSpec{ |
| 395 | + ReceivedResourceType: common.ReceivedTypeFraction, |
| 396 | + }, |
| 397 | + }, |
| 398 | + existingConfigMaps: []*v1.ConfigMap{ |
| 399 | + { |
| 400 | + ObjectMeta: metav1.ObjectMeta{ |
| 401 | + Name: "test-pod-abc1234-shared-gpu-i0", |
| 402 | + Namespace: "test-ns", |
| 403 | + }, |
| 404 | + }, |
| 405 | + { |
| 406 | + ObjectMeta: metav1.ObjectMeta{ |
| 407 | + Name: "test-pod-abc1234-shared-gpu-i0-evar", |
| 408 | + Namespace: "test-ns", |
| 409 | + }, |
| 410 | + }, |
| 411 | + }, |
| 412 | + expectError: false, |
| 413 | + expectConfigMapsDeleted: true, |
| 414 | + expectedRemainingCMCount: 0, |
| 415 | + }, |
| 416 | + } |
| 417 | + |
| 418 | + for _, tt := range tests { |
| 419 | + t.Run(tt.name, func(t *testing.T) { |
| 420 | + // Setup fake client |
| 421 | + scheme := runtime.NewScheme() |
| 422 | + _ = v1.AddToScheme(scheme) |
| 423 | + |
| 424 | + clientBuilder := fake.NewClientBuilder().WithScheme(scheme) |
| 425 | + if tt.pod != nil { |
| 426 | + clientBuilder.WithObjects(tt.pod) |
| 427 | + } |
| 428 | + for _, cm := range tt.existingConfigMaps { |
| 429 | + clientBuilder.WithObjects(cm) |
| 430 | + } |
| 431 | + kubeClient := clientBuilder.Build() |
| 432 | + |
| 433 | + // Create GPUSharing plugin |
| 434 | + plugin := New(kubeClient, false) |
| 435 | + |
| 436 | + // Execute rollback |
| 437 | + err := plugin.Rollback(context.Background(), tt.pod, nil, tt.bindRequest, nil) |
| 438 | + |
| 439 | + // Verify error expectation |
| 440 | + if tt.expectError { |
| 441 | + assert.Error(t, err) |
| 442 | + } else { |
| 443 | + assert.NoError(t, err) |
| 444 | + } |
| 445 | + |
| 446 | + // Verify configmaps were deleted |
| 447 | + if tt.expectConfigMapsDeleted { |
| 448 | + cmList := &v1.ConfigMapList{} |
| 449 | + err := kubeClient.List(context.Background(), cmList, client.InNamespace(tt.pod.Namespace)) |
| 450 | + assert.NoError(t, err) |
| 451 | + assert.Equal(t, tt.expectedRemainingCMCount, len(cmList.Items), |
| 452 | + "Expected %d configmaps remaining, got %d", tt.expectedRemainingCMCount, len(cmList.Items)) |
| 453 | + } |
| 454 | + }) |
| 455 | + } |
| 456 | +} |
| 457 | + |
| 458 | +func TestGPUSharingRollbackDeleteConfigMap(t *testing.T) { |
| 459 | + tests := []struct { |
| 460 | + name string |
| 461 | + namespace string |
| 462 | + cmName string |
| 463 | + existingCM *v1.ConfigMap |
| 464 | + expectError bool |
| 465 | + }{ |
| 466 | + { |
| 467 | + name: "successfully deletes existing configmap", |
| 468 | + namespace: "test-ns", |
| 469 | + cmName: "test-cm", |
| 470 | + existingCM: &v1.ConfigMap{ |
| 471 | + ObjectMeta: metav1.ObjectMeta{ |
| 472 | + Name: "test-cm", |
| 473 | + Namespace: "test-ns", |
| 474 | + }, |
| 475 | + }, |
| 476 | + expectError: false, |
| 477 | + }, |
| 478 | + { |
| 479 | + name: "succeeds when configmap does not exist (IgnoreNotFound)", |
| 480 | + namespace: "test-ns", |
| 481 | + cmName: "non-existent-cm", |
| 482 | + existingCM: nil, |
| 483 | + expectError: false, |
| 484 | + }, |
| 485 | + } |
| 486 | + |
| 487 | + for _, tt := range tests { |
| 488 | + t.Run(tt.name, func(t *testing.T) { |
| 489 | + scheme := runtime.NewScheme() |
| 490 | + _ = v1.AddToScheme(scheme) |
| 491 | + |
| 492 | + clientBuilder := fake.NewClientBuilder().WithScheme(scheme) |
| 493 | + if tt.existingCM != nil { |
| 494 | + clientBuilder.WithObjects(tt.existingCM) |
| 495 | + } |
| 496 | + kubeClient := clientBuilder.Build() |
| 497 | + |
| 498 | + plugin := New(kubeClient, false) |
| 499 | + err := plugin.deleteConfigMap(context.Background(), tt.namespace, tt.cmName) |
| 500 | + |
| 501 | + if tt.expectError { |
| 502 | + assert.Error(t, err) |
| 503 | + } else { |
| 504 | + assert.NoError(t, err) |
| 505 | + } |
| 506 | + |
| 507 | + // Verify configmap is gone |
| 508 | + cm := &v1.ConfigMap{} |
| 509 | + err = kubeClient.Get(context.Background(), types.NamespacedName{ |
| 510 | + Namespace: tt.namespace, |
| 511 | + Name: tt.cmName, |
| 512 | + }, cm) |
| 513 | + assert.True(t, client.IgnoreNotFound(err) == nil, "ConfigMap should not exist after deletion") |
| 514 | + }) |
| 515 | + } |
| 516 | +} |
0 commit comments