From fd427045ea0204d359d47724c215a0c953db9066 Mon Sep 17 00:00:00 2001 From: Luca Burgazzoli Date: Tue, 14 Jan 2025 15:33:55 +0100 Subject: [PATCH] Simplify DataScienceCluster conditions This commit aims to simpplify the DataScienceCluster status conditions by removing transient conditions (Reconciling, Deploying, etc) and replacing them with two conditsion: - Available that is used to reflects the work done by the controller, so an Available condition with status False would signal that the controller encountered some troubles while reconciling the status of the cluster - Ready that is used to reflect the status of the component managed by the controller. It is perfectly possible to be in a state where Available=False and Ready=True which can happen in case the controller has troubles to perform some reconciliation logic because of i.e. a transient kubernetes error, but at the same time all the components are running. The object would then result in soimething like: apiVersion: datasciencecluster.opendatahub.io/v1 kind: DataScienceCluster metadata: name: default-dsc spec: {} status: conditions: - lastHeartbeatTime: "2025-01-15T12:54:20Z" lastTransitionTime: "2025-01-15T12:54:20Z" message: DataScienceCluster resource reconciled successfully reason: Available status: "True" type: Available - lastHeartbeatTime: "2025-01-15T12:54:20Z" lastTransitionTime: "2025-01-15T12:54:20Z" message: Ready reason: Ready status: "True" type: Ready It is possible to wait for the DataScienceCluster to be ready by using the kubectl wait command: kubectl wait --for=condition=ready datascienceclusters.datasciencecluster.opendatahub.io default-dsc --- .../modelcontroller/modelcontroller.go | 4 +- .../modelmeshserving/modelmeshserving.go | 4 +- .../datasciencecluster_controller.go | 138 +---- .../datasciencecluster_controller_support.go | 189 +++++++ ...asciencecluster_controller_support_test.go | 472 ++++++++++++++++++ controllers/status/status.go | 6 + go.mod | 1 + pkg/utils/test/fakeclient/fakeclient.go | 4 + pkg/utils/test/matchers/jq/jq_support.go | 10 + pkg/utils/test/matchers/jq/jq_transform.go | 11 +- 10 files changed, 703 insertions(+), 136 deletions(-) create mode 100644 controllers/datasciencecluster/datasciencecluster_controller_support.go create mode 100644 controllers/datasciencecluster/datasciencecluster_controller_support_test.go diff --git a/controllers/components/modelcontroller/modelcontroller.go b/controllers/components/modelcontroller/modelcontroller.go index 234281939ef..5a08ef3d6a0 100644 --- a/controllers/components/modelcontroller/modelcontroller.go +++ b/controllers/components/modelcontroller/modelcontroller.go @@ -16,7 +16,7 @@ import ( dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/status" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" - "github.com/opendatahub-io/opendatahub-operator/v2/pkg/componentsregistry" + cr "github.com/opendatahub-io/opendatahub-operator/v2/pkg/componentsregistry" odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/annotations" ) @@ -24,7 +24,7 @@ import ( type componentHandler struct{} func init() { //nolint:gochecknoinits - componentsregistry.Add(&componentHandler{}) + cr.Add(&componentHandler{}) } func (s *componentHandler) GetName() string { diff --git a/controllers/components/modelmeshserving/modelmeshserving.go b/controllers/components/modelmeshserving/modelmeshserving.go index 22bbb6aceea..f024efaf40d 100644 --- a/controllers/components/modelmeshserving/modelmeshserving.go +++ b/controllers/components/modelmeshserving/modelmeshserving.go @@ -16,7 +16,7 @@ import ( dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/status" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" - "github.com/opendatahub-io/opendatahub-operator/v2/pkg/componentsregistry" + cr "github.com/opendatahub-io/opendatahub-operator/v2/pkg/componentsregistry" odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/annotations" ) @@ -24,7 +24,7 @@ import ( type componentHandler struct{} func init() { //nolint:gochecknoinits - componentsregistry.Add(&componentHandler{}) + cr.Add(&componentHandler{}) } func (s *componentHandler) GetName() string { diff --git a/controllers/datasciencecluster/datasciencecluster_controller.go b/controllers/datasciencecluster/datasciencecluster_controller.go index d306ccb9a05..44843c66e70 100644 --- a/controllers/datasciencecluster/datasciencecluster_controller.go +++ b/controllers/datasciencecluster/datasciencecluster_controller.go @@ -23,12 +23,9 @@ import ( "slices" "strings" - operatorv1 "github.com/openshift/api/operator/v1" conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1" corev1 "k8s.io/api/core/v1" k8serr "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" @@ -39,12 +36,10 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" - "github.com/opendatahub-io/opendatahub-operator/v2/apis/common" componentApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1alpha1" dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/status" - "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" cr "github.com/opendatahub-io/opendatahub-operator/v2/pkg/componentsregistry" odhClient "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/client" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/handlers" @@ -97,14 +92,18 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R // validate pre-requisites if err := r.validate(ctx, instance); err != nil { log.Info(err.Error()) - status.SetCondition(&instance.Status.Conditions, "Degraded", status.ReconcileFailed, err.Error(), corev1.ConditionTrue) + + status.SetCondition( + &instance.Status.Conditions, + string(conditionsv1.ConditionAvailable), + status.DegradedReason, + err.Error(), + corev1.ConditionFalse, + ) } // deploy components - if err := r.reconcileComponents(ctx, instance, cr.DefaultRegistry()); err != nil { - log.Info(err.Error()) - status.SetCondition(&instance.Status.Conditions, "Degraded", status.ReconcileFailed, err.Error(), corev1.ConditionTrue) - } + reconcileComponents(ctx, r.Client, instance, cr.DefaultRegistry()) // keep conditions sorted slices.SortFunc(instance.Status.Conditions, func(a, b conditionsv1.Condition) int { @@ -150,125 +149,6 @@ func (r *DataScienceClusterReconciler) validate(ctx context.Context, _ *dscv1.Da return nil } -func (r *DataScienceClusterReconciler) reconcileComponents( - ctx context.Context, - instance *dscv1.DataScienceCluster, - reg *cr.Registry) error { - log := logf.FromContext(ctx).WithName("DataScienceCluster") - - notReadyComponents := make([]string, 0) - - // all DSC defined components - componentErrors := reg.ForEach(func(component cr.ComponentHandler) error { - ci, err := r.reconcileComponent(ctx, instance, component) - if err != nil { - return err - } - - if !cr.IsManaged(component, instance) { - return nil - } - - if !meta.IsStatusConditionTrue(ci.GetStatus().Conditions, status.ConditionTypeReady) { - notReadyComponents = append(notReadyComponents, component.GetName()) - } - - return nil - }) - - // Process errors for components - if componentErrors != nil { - log.Info("DataScienceCluster Deployment Incomplete.") - - status.SetCompleteCondition( - &instance.Status.Conditions, - status.ReconcileCompletedWithComponentErrors, - fmt.Sprintf("DataScienceCluster resource reconciled with component errors: %v", componentErrors), - ) - - r.Recorder.Eventf(instance, corev1.EventTypeNormal, - "DataScienceClusterComponentFailures", - "DataScienceCluster instance %s created, but have some failures in component %v", instance.Name, componentErrors) - } else { - log.Info("DataScienceCluster Deployment Completed.") - - // finalize reconciliation - status.SetCompleteCondition( - &instance.Status.Conditions, - status.ReconcileCompleted, - "DataScienceCluster resource reconciled successfully", - ) - } - - if len(notReadyComponents) != 0 { - instance.Status.Phase = status.PhaseNotReady - - conditionsv1.SetStatusCondition(&instance.Status.Conditions, conditionsv1.Condition{ - Type: conditionsv1.ConditionType(status.ConditionTypeReady), - Status: corev1.ConditionFalse, - Reason: "NotReady", - Message: fmt.Sprintf("Some components are not ready: %s", strings.Join(notReadyComponents, ",")), - }) - } else { - instance.Status.Phase = status.PhaseReady - - conditionsv1.SetStatusCondition(&instance.Status.Conditions, conditionsv1.Condition{ - Type: conditionsv1.ConditionType(status.ConditionTypeReady), - Status: corev1.ConditionTrue, - Reason: "Ready", - Message: "Ready", - }) - } - - instance.Status.Release = cluster.GetRelease() - instance.Status.ObservedGeneration = instance.Generation - - if componentErrors != nil { - return componentErrors - } - - return nil -} - -func (r *DataScienceClusterReconciler) reconcileComponent( - ctx context.Context, - instance *dscv1.DataScienceCluster, - component cr.ComponentHandler, -) (common.PlatformObject, error) { - ms := component.GetManagementState(instance) - componentCR := component.NewCRObject(instance) - - switch ms { - case operatorv1.Managed: - err := ctrl.SetControllerReference(instance, componentCR, r.Scheme) - if err != nil { - return nil, err - } - err = r.Client.Apply(ctx, componentCR, client.FieldOwner(fieldOwner), client.ForceOwnership) - if err != nil { - return nil, err - } - case operatorv1.Removed: - err := r.Client.Delete(ctx, componentCR, client.PropagationPolicy(metav1.DeletePropagationForeground)) - if err != nil && !k8serr.IsNotFound(err) { - return nil, err - } - default: - return nil, fmt.Errorf("unsupported management state: %s", ms) - } - - if instance.Status.InstalledComponents == nil { - instance.Status.InstalledComponents = make(map[string]bool) - } - - err := component.UpdateDSCStatus(instance, componentCR) - if err != nil { - return nil, fmt.Errorf("failed to update status of DataScienceCluster component %s: %w", component.GetName(), err) - } - - return componentCR, nil -} - func (r *DataScienceClusterReconciler) reportError(ctx context.Context, err error, instance *dscv1.DataScienceCluster, message string) { logf.FromContext(ctx).Error(err, message, "instance.Name", instance.Name) r.Recorder.Eventf(instance, corev1.EventTypeWarning, "DataScienceClusterReconcileError", diff --git a/controllers/datasciencecluster/datasciencecluster_controller_support.go b/controllers/datasciencecluster/datasciencecluster_controller_support.go new file mode 100644 index 00000000000..840fb9ad60a --- /dev/null +++ b/controllers/datasciencecluster/datasciencecluster_controller_support.go @@ -0,0 +1,189 @@ +package datasciencecluster + +import ( + "context" + "fmt" + "slices" + "strings" + + "github.com/hashicorp/go-multierror" + operatorv1 "github.com/openshift/api/operator/v1" + conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1" + corev1 "k8s.io/api/core/v1" + k8serr "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/controllers/status" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" + cr "github.com/opendatahub-io/opendatahub-operator/v2/pkg/componentsregistry" + odhClient "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/client" +) + +// reconcileComponents reconciles the components of a DataScienceCluster instance. +// +// This function iterates over all components defined in the components registry and performs necessary reconciliation +// actions such as creating, updating, or deleting resources. It tracks errors during the reconciliation process and +// updates the status of the DSC instance accordingly. +// +// Parameters: +// - ctx: The context for the reconciliation request. +// - instance: The DataScienceCluster instance being reconciled. +func reconcileComponents( + ctx context.Context, + cli *odhClient.Client, + instance *dscv1.DataScienceCluster, + registry *cr.Registry, +) { + if instance.Status.InstalledComponents == nil { + instance.Status.InstalledComponents = make(map[string]bool) + } + + var reconcileErrors error + var nonReadyComponents []string + + // ignore the aggregate error returned by the ForEach function since we + // need to aggregate other errors + _ = registry.ForEach(func(component cr.ComponentHandler) error { + err := reconcileComponent(ctx, cli, instance, component) + if err != nil { + reconcileErrors = multierror.Append(reconcileErrors, err) + } + + ci := component.NewCRObject(instance) + + // read the component instance to get tha actual status + err = cli.Get(ctx, client.ObjectKeyFromObject(ci), ci) + if err != nil && !k8serr.IsNotFound(err) { + reconcileErrors = multierror.Append(reconcileErrors, err) + return nil + } + + if err := component.UpdateDSCStatus(instance, ci); err != nil { + reconcileErrors = multierror.Append(err) + } + + if !cr.IsManaged(component, instance) { + return nil + } + + if !meta.IsStatusConditionTrue(ci.GetStatus().Conditions, status.ConditionTypeReady) { + nonReadyComponents = append(nonReadyComponents, component.GetName()) + } + + return nil + }) + + // Process reconciliation failures and update the Available condition. + available := setAvailability(instance, reconcileErrors) + // Process component status and update the Ready condition and Phase. + ready := setReadiness(instance, nonReadyComponents) + + instance.Status.Release = cluster.GetRelease() + instance.Status.ObservedGeneration = instance.Generation + instance.Status.Phase = status.PhaseReady + + if !available || !ready { + instance.Status.Phase = status.PhaseNotReady + } +} + +// reconcileComponent reconciles a specific component within a DataScienceCluster (DSC). +// +// It handles the component based on its management state, applying or deleting the component as needed using +// the provided client. +func reconcileComponent( + ctx context.Context, + cli *odhClient.Client, + instance *dscv1.DataScienceCluster, + component cr.ComponentHandler, +) error { + ms := component.GetManagementState(instance) + ci := component.NewCRObject(instance) + + switch ms { + case operatorv1.Managed: + err := ctrl.SetControllerReference(instance, ci, cli.Scheme()) + if err != nil { + return err + } + err = cli.Apply(ctx, ci, client.FieldOwner(fieldOwner), client.ForceOwnership) + if err != nil { + return client.IgnoreNotFound(err) + } + case operatorv1.Removed: + err := cli.Delete(ctx, ci, client.PropagationPolicy(metav1.DeletePropagationForeground)) + if err != nil { + return client.IgnoreNotFound(err) + } + default: + return fmt.Errorf("unsupported management state: %s", ms) + } + + return nil +} + +// setAvailability updates the "Available" condition of the DataScienceCluster instance based on the provided error. +// If the error is nil, the condition is set to True with a success message. +// If an error is present, the condition is set to False with the error message. +// +// Parameters: +// - instance: The DataScienceCluster instance whose status conditions are being updated. +// - err: The error encountered during reconciliation, if any. +// +// Returns: +// - A boolean indicating whether the "Available" condition is True +func setAvailability(instance *dscv1.DataScienceCluster, err error) bool { + condition := conditionsv1.Condition{ + Type: conditionsv1.ConditionAvailable, + Status: corev1.ConditionTrue, + Reason: status.AvailableReason, + Message: "DataScienceCluster resource reconciled successfully", + } + + if err != nil { + condition.Status = corev1.ConditionFalse + condition.Reason = status.DegradedReason + condition.Message = fmt.Sprintf("DataScienceCluster resource reconciled with errors: %v", err) + } + + conditionsv1.SetStatusCondition(&instance.Status.Conditions, condition) + + return condition.Status == corev1.ConditionTrue +} + +// setReadiness updates the "Ready" condition and the phase of the DataScienceCluster instance based on the list of +// non-ready components. +// +// If all components are ready, the condition is set to True with a success message. +// If any components are not ready, the condition is set to False with the error message. +// +// Parameters: +// - instance: The DataScienceCluster instance whose status conditions are being updated. +// - nonReadyComponents: A slice of strings representing the names of components that are not ready. +// +// Returns: +// - A boolean indicating whether the "Ready" condition is True. +func setReadiness(instance *dscv1.DataScienceCluster, nonReadyComponents []string) bool { + condition := conditionsv1.Condition{ + Type: conditionsv1.ConditionType(status.ConditionTypeReady), + Status: corev1.ConditionTrue, + Reason: status.ReadyReason, + Message: status.ReadyReason, + } + + slices.Sort(nonReadyComponents) + + if len(nonReadyComponents) != 0 { + condition.Status = corev1.ConditionFalse + condition.Reason = status.NotReadyReason + condition.Message = fmt.Sprintf("Some components are not ready: %s", strings.Join(nonReadyComponents, ",")) + } + + conditionsv1.SetStatusCondition(&instance.Status.Conditions, condition) + + return condition.Status == corev1.ConditionTrue +} diff --git a/controllers/datasciencecluster/datasciencecluster_controller_support_test.go b/controllers/datasciencecluster/datasciencecluster_controller_support_test.go new file mode 100644 index 00000000000..7d6efaaabc3 --- /dev/null +++ b/controllers/datasciencecluster/datasciencecluster_controller_support_test.go @@ -0,0 +1,472 @@ +//nolint:testpackage +package datasciencecluster + +import ( + "context" + "errors" + "fmt" + "path/filepath" + "testing" + + operatorv1 "github.com/openshift/api/operator/v1" + conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1" + "github.com/stretchr/testify/mock" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + controllerruntime "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + + "github.com/opendatahub-io/opendatahub-operator/v2/apis/common" + componentApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1alpha1" + dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" + dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/controllers/status" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster/gvk" + cr "github.com/opendatahub-io/opendatahub-operator/v2/pkg/componentsregistry" + odhClient "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/client" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/resources" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/utils/test/matchers/jq" + "github.com/opendatahub-io/opendatahub-operator/v2/tests/envtestutil" + + . "github.com/onsi/gomega" + . "github.com/onsi/gomega/gstruct" +) + +func TestSetAvailability(t *testing.T) { + g := NewGomegaWithT(t) + + instance := &dscv1.DataScienceCluster{} + + // Case 1: When there is no error (err == nil) + t.Run("No error sets ConditionTrue", func(t *testing.T) { + result := setAvailability(instance, nil) + + g.Expect(result).To(BeTrue()) + g.Expect(instance.Status.Conditions).To(HaveLen(1)) + + g.Expect(instance.Status.Conditions[0]).To(MatchFields(IgnoreExtras, Fields{ + "Type": Equal(conditionsv1.ConditionAvailable), + "Status": Equal(corev1.ConditionTrue), + "Reason": Equal(status.AvailableReason), + "Message": Equal("DataScienceCluster resource reconciled successfully"), + })) + }) + + // Case 2: When there is an error (err != nil) + t.Run("Error sets ConditionFalse", func(t *testing.T) { + err := errors.New("some error occurred") + result := setAvailability(instance, err) + + g.Expect(result).To(BeFalse()) + g.Expect(instance.Status.Conditions).To(HaveLen(1)) + + g.Expect(instance.Status.Conditions[0]).To(MatchFields(IgnoreExtras, Fields{ + "Type": Equal(conditionsv1.ConditionAvailable), + "Status": Equal(corev1.ConditionFalse), + "Reason": Equal(status.DegradedReason), + "Message": Equal(fmt.Sprintf("DataScienceCluster resource reconciled with errors: %v", err)), + })) + }) +} + +func createEnvTest(s *runtime.Scheme) (*envtest.Environment, error) { + utilruntime.Must(corev1.AddToScheme(s)) + utilruntime.Must(appsv1.AddToScheme(s)) + utilruntime.Must(apiextensionsv1.AddToScheme(s)) + utilruntime.Must(componentApi.AddToScheme(s)) + utilruntime.Must(dscv1.AddToScheme(s)) + utilruntime.Must(dsciv1.AddToScheme(s)) + + projectDir, err := envtestutil.FindProjectRoot() + if err != nil { + return nil, err + } + + envTest := envtest.Environment{ + CRDInstallOptions: envtest.CRDInstallOptions{ + Scheme: s, + Paths: []string{ + filepath.Join(projectDir, "config", "crd", "bases"), + }, + ErrorIfPathMissing: true, + CleanUpAfterUse: false, + }, + } + + return &envTest, nil +} + +type MockComponentHandler struct { + mock.Mock +} + +func (m *MockComponentHandler) Init(platform cluster.Platform) error { + args := m.Called(platform) + return args.Error(0) +} + +func (m *MockComponentHandler) GetName() string { + args := m.Called() + return args.String(0) +} + +func (m *MockComponentHandler) GetManagementState(instance *dscv1.DataScienceCluster) operatorv1.ManagementState { + args := m.Called(instance) + + //nolint:errcheck,forcetypeassert + return args.Get(0).(operatorv1.ManagementState) +} + +func (m *MockComponentHandler) NewCRObject(instance *dscv1.DataScienceCluster) common.PlatformObject { + args := m.Called(instance) + + //nolint:errcheck,forcetypeassert + return args.Get(0).(common.PlatformObject) +} + +func (m *MockComponentHandler) NewComponentReconciler(ctx context.Context, mgr controllerruntime.Manager) error { + args := m.Called(ctx, mgr) + return args.Error(0) +} + +func (m *MockComponentHandler) UpdateDSCStatus(dsc *dscv1.DataScienceCluster, obj client.Object) error { + args := m.Called(dsc, obj) + return args.Error(0) +} + +func TestReconcileComponent(t *testing.T) { + ctx := context.Background() + + g := NewWithT(t) + s := runtime.NewScheme() + + envTest, err := createEnvTest(s) + g.Expect(err).NotTo(HaveOccurred()) + + t.Cleanup(func() { + _ = envTest.Stop() + }) + + cfg, err := envTest.Start() + g.Expect(err).NotTo(HaveOccurred()) + + envTestClient, err := client.New(cfg, client.Options{Scheme: s}) + g.Expect(err).NotTo(HaveOccurred()) + + cli, err := odhClient.NewFromConfig(cfg, envTestClient) + g.Expect(err).NotTo(HaveOccurred()) + + // Create a DataScienceCluster instance + instance := &dscv1.DataScienceCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + }, + } + + err = cli.Create(ctx, instance) + g.Expect(err).ShouldNot(HaveOccurred()) + + component := &componentApi.Dashboard{ + ObjectMeta: metav1.ObjectMeta{ + Name: componentApi.DashboardInstanceName, + }, + } + + err = resources.EnsureGroupVersionKind(cli.Scheme(), instance) + g.Expect(err).ShouldNot(HaveOccurred()) + + err = resources.EnsureGroupVersionKind(cli.Scheme(), component) + g.Expect(err).ShouldNot(HaveOccurred()) + + t.Run(string(operatorv1.Managed), func(t *testing.T) { + g := NewWithT(t) + + mockHandler := new(MockComponentHandler) + mockHandler.On("GetManagementState", mock.Anything).Return(operatorv1.Managed) + mockHandler.On("NewCRObject", mock.Anything).Return(component.DeepCopy()) + + err = reconcileComponent(ctx, cli, instance, mockHandler) + g.Expect(err).ShouldNot(HaveOccurred()) + + mockHandler.AssertExpectations(t) + + err = cli.Get(ctx, client.ObjectKeyFromObject(component), component) + g.Expect(err).ShouldNot(HaveOccurred()) + g.Expect(component).Should(And( + jq.Match(`.metadata.ownerReferences[0].kind == "%s"`, gvk.DataScienceCluster.Kind), + )) + }) + + t.Run(string(operatorv1.Removed), func(t *testing.T) { + g := NewWithT(t) + + mockHandler := new(MockComponentHandler) + mockHandler.On("GetManagementState", mock.Anything).Return(operatorv1.Removed) + mockHandler.On("NewCRObject", mock.Anything).Return(component.DeepCopy()) + + err = reconcileComponent(ctx, cli, instance, mockHandler) + g.Expect(err).ShouldNot(HaveOccurred()) + + mockHandler.AssertExpectations(t) + + g.Expect(component).Should( + // when using testenv, there are no controller so background propagation policy + // does not work, hence to check if the object as been marked to be deleted, we + // can rely on the deletionTimestamp + jq.Match(`.metadata.deletionTimestamp != 0`), + ) + }) + + t.Run(string(operatorv1.Unmanaged), func(t *testing.T) { + g := NewWithT(t) + + mockHandler := new(MockComponentHandler) + mockHandler.On("GetManagementState", mock.Anything).Return(operatorv1.Unmanaged) + mockHandler.On("NewCRObject", mock.Anything).Return(component.DeepCopy()) + + err = reconcileComponent(ctx, cli, instance, mockHandler) + g.Expect(err).Should( + MatchError(ContainSubstring("unsupported management state: " + string(operatorv1.Unmanaged))), + ) + + mockHandler.AssertExpectations(t) + }) +} + +func TestReconcileComponents(t *testing.T) { + ctx := context.Background() + + g := NewWithT(t) + s := runtime.NewScheme() + + envTest, err := createEnvTest(s) + g.Expect(err).NotTo(HaveOccurred()) + + t.Cleanup(func() { + _ = envTest.Stop() + }) + + cfg, err := envTest.Start() + g.Expect(err).NotTo(HaveOccurred()) + + envTestClient, err := client.New(cfg, client.Options{Scheme: s}) + g.Expect(err).NotTo(HaveOccurred()) + + cli, err := odhClient.NewFromConfig(cfg, envTestClient) + g.Expect(err).NotTo(HaveOccurred()) + + // Create a DataScienceCluster instance + instance := &dscv1.DataScienceCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + }, + } + + err = cli.Create(ctx, instance) + g.Expect(err).ShouldNot(HaveOccurred()) + + component := &componentApi.Dashboard{ + ObjectMeta: metav1.ObjectMeta{ + Name: componentApi.DashboardInstanceName, + }, + } + + err = resources.EnsureGroupVersionKind(cli.Scheme(), instance) + g.Expect(err).ShouldNot(HaveOccurred()) + + err = resources.EnsureGroupVersionKind(cli.Scheme(), component) + g.Expect(err).ShouldNot(HaveOccurred()) + + t.Run("reconcileComponent succeed", func(t *testing.T) { + g := NewWithT(t) + + t.Cleanup(func() { + err := cli.Delete(ctx, component) + g.Expect(client.IgnoreNotFound(err)).ShouldNot(HaveOccurred()) + }) + + mockHandler := new(MockComponentHandler) + mockHandler.On("GetManagementState", mock.Anything).Return(operatorv1.Managed) + mockHandler.On("NewCRObject", mock.Anything).Return(component.DeepCopy()) + mockHandler.On("UpdateDSCStatus", mock.Anything, mock.Anything).Return(nil) + + r := cr.Registry{} + r.Add(mockHandler) + + c := component.DeepCopy() + + err := cli.Create(ctx, c) + g.Expect(err).ShouldNot(HaveOccurred()) + + meta.SetStatusCondition(&c.Status.Conditions, metav1.Condition{ + Type: status.ConditionTypeReady, + Status: metav1.ConditionTrue, + Reason: status.ReadyReason, + Message: status.ReadyReason, + }) + + err = cli.Status().Update(ctx, c) + g.Expect(err).ShouldNot(HaveOccurred()) + + reconcileComponents(ctx, cli, instance, &r) + + mockHandler.AssertExpectations(t) + g.Expect(instance).Should(And( + WithTransform( + jq.Extract(`.status.conditions[] | select(.type == "%s")`, conditionsv1.ConditionAvailable), And( + jq.Match(`.status == "%s"`, metav1.ConditionTrue), + jq.Match(`.reason == "%s"`, status.AvailableReason), + jq.Match(`.message | contains("reconciled successfully")`), + )), + WithTransform( + jq.Extract(`.status.conditions[] | select(.type == "%s")`, status.ConditionTypeReady), And( + jq.Match(`.status == "%s"`, metav1.ConditionTrue), + jq.Match(`.reason == "%s"`, status.ReadyReason), + jq.Match(`.message == "%s"`, status.ReadyReason), + )), + )) + }) + + t.Run("reconcileComponent component not ready", func(t *testing.T) { + g := NewWithT(t) + + t.Cleanup(func() { + err := cli.Delete(ctx, component) + g.Expect(client.IgnoreNotFound(err)).ShouldNot(HaveOccurred()) + }) + + mockHandler := new(MockComponentHandler) + mockHandler.On("GetName").Return(componentApi.DashboardComponentName) + mockHandler.On("GetManagementState", mock.Anything).Return(operatorv1.Managed) + mockHandler.On("NewCRObject", mock.Anything).Return(component.DeepCopy()) + mockHandler.On("UpdateDSCStatus", mock.Anything, mock.Anything).Return(nil) + + r := cr.Registry{} + r.Add(mockHandler) + + c := component.DeepCopy() + + err := cli.Create(ctx, c) + g.Expect(err).ShouldNot(HaveOccurred()) + + meta.SetStatusCondition(&c.Status.Conditions, metav1.Condition{ + Type: status.ConditionTypeReady, + Status: metav1.ConditionFalse, + Reason: status.ReadyReason, + Message: status.ReadyReason, + }) + + err = cli.Status().Update(ctx, c) + g.Expect(err).ShouldNot(HaveOccurred()) + + reconcileComponents(ctx, cli, instance, &r) + + mockHandler.AssertExpectations(t) + g.Expect(instance).Should(And( + WithTransform( + jq.Extract(`.status.conditions[] | select(.type == "%s")`, conditionsv1.ConditionAvailable), And( + jq.Match(`.status == "%s"`, metav1.ConditionTrue), + jq.Match(`.reason == "%s"`, status.AvailableReason), + jq.Match(`.message | contains("reconciled successfully")`), + )), + WithTransform( + jq.Extract(`.status.conditions[] | select(.type == "%s")`, status.ConditionTypeReady), And( + jq.Match(`.status == "%s"`, metav1.ConditionFalse), + jq.Match(`.reason == "%s"`, status.NotReadyReason), + jq.Match(`.message | contains("dashboard")`), + )), + )) + }) + + t.Run("reconcileComponent reconcile failure", func(t *testing.T) { + g := NewWithT(t) + + t.Cleanup(func() { + err := cli.Delete(ctx, component) + g.Expect(client.IgnoreNotFound(err)).ShouldNot(HaveOccurred()) + }) + + mockHandler := new(MockComponentHandler) + mockHandler.On("GetManagementState", mock.Anything).Return(operatorv1.Unmanaged) + mockHandler.On("NewCRObject", mock.Anything).Return(component.DeepCopy()) + mockHandler.On("UpdateDSCStatus", mock.Anything, mock.Anything).Return(nil) + + r := cr.Registry{} + r.Add(mockHandler) + + reconcileComponents(ctx, cli, instance, &r) + + mockHandler.AssertExpectations(t) + g.Expect(instance).Should(And( + WithTransform( + jq.Extract(`.status.conditions[] | select(.type == "%s")`, conditionsv1.ConditionAvailable), And( + jq.Match(`.status == "%s"`, metav1.ConditionFalse), + jq.Match(`.reason == "%s"`, status.DegradedReason), + jq.Match(`.message | contains("unsupported management state")`), + )), + WithTransform( + jq.Extract(`.status.conditions[] | select(.type == "%s")`, status.ConditionTypeReady), And( + jq.Match(`.status == "%s"`, metav1.ConditionTrue), + jq.Match(`.reason == "%s"`, status.ReadyReason), + jq.Match(`.message == "%s"`, status.ReadyReason), + )), + )) + }) + + t.Run("reconcileComponent update status failure", func(t *testing.T) { + g := NewWithT(t) + + t.Cleanup(func() { + err := cli.Delete(ctx, component) + g.Expect(client.IgnoreNotFound(err)).ShouldNot(HaveOccurred()) + }) + + mockHandler := new(MockComponentHandler) + mockHandler.On("GetManagementState", mock.Anything).Return(operatorv1.Managed) + mockHandler.On("NewCRObject", mock.Anything).Return(component.DeepCopy()) + mockHandler.On("UpdateDSCStatus", mock.Anything, mock.Anything).Return(errors.New("failure")) + + r := cr.Registry{} + r.Add(mockHandler) + + c := component.DeepCopy() + + err := cli.Create(ctx, c) + g.Expect(err).ShouldNot(HaveOccurred()) + + meta.SetStatusCondition(&c.Status.Conditions, metav1.Condition{ + Type: status.ConditionTypeReady, + Status: metav1.ConditionTrue, + Reason: status.ReadyReason, + Message: status.ReadyReason, + }) + + err = cli.Status().Update(ctx, c) + g.Expect(err).ShouldNot(HaveOccurred()) + + reconcileComponents(ctx, cli, instance, &r) + + mockHandler.AssertExpectations(t) + g.Expect(instance).Should(And( + WithTransform( + jq.Extract(`.status.conditions[] | select(.type == "%s")`, conditionsv1.ConditionAvailable), And( + jq.Match(`.status == "%s"`, metav1.ConditionFalse), + jq.Match(`.reason == "%s"`, status.DegradedReason), + jq.Match(`.message | contains("failure")`), + )), + WithTransform( + jq.Extract(`.status.conditions[] | select(.type == "%s")`, status.ConditionTypeReady), And( + jq.Match(`.status == "%s"`, metav1.ConditionTrue), + jq.Match(`.reason == "%s"`, status.ReadyReason), + jq.Match(`.message == "%s"`, status.ReadyReason), + )), + )) + }) +} diff --git a/controllers/status/status.go b/controllers/status/status.go index 664c8bbfcff..0027f60088a 100644 --- a/controllers/status/status.go +++ b/controllers/status/status.go @@ -83,6 +83,12 @@ const ( RemovedReason string = "Removed" CapabilityFailed string = "CapabilityFailed" ArgoWorkflowExist string = "ArgoWorkflowExist" + + DegradedReason = "Degraded" + AvailableReason = "Available" + UnknownReason = "Unknown" + NotReadyReason = "NotReady" + ReadyReason = "Ready" ) const ( diff --git a/go.mod b/go.mod index b0de9b689cf..a2024dc7d33 100644 --- a/go.mod +++ b/go.mod @@ -80,6 +80,7 @@ require ( github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring v0.61.1-rhobs1 // indirect github.com/sirupsen/logrus v1.9.2 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/stretchr/objx v0.5.2 // indirect github.com/xlab/treeprint v1.2.0 // indirect go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5 // indirect go.uber.org/multierr v1.11.0 // indirect diff --git a/pkg/utils/test/fakeclient/fakeclient.go b/pkg/utils/test/fakeclient/fakeclient.go index 4b94a7c5f14..3b6668598e3 100644 --- a/pkg/utils/test/fakeclient/fakeclient.go +++ b/pkg/utils/test/fakeclient/fakeclient.go @@ -13,6 +13,8 @@ import ( clientFake "sigs.k8s.io/controller-runtime/pkg/client/fake" componentApi "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1alpha1" + dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" + dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/client" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/resources" ) @@ -23,6 +25,8 @@ func New(objs ...ctrlClient.Object) (*client.Client, error) { utilruntime.Must(appsv1.AddToScheme(scheme)) utilruntime.Must(rbacv1.AddToScheme(scheme)) utilruntime.Must(componentApi.AddToScheme(scheme)) + utilruntime.Must(dscv1.AddToScheme(scheme)) + utilruntime.Must(dsciv1.AddToScheme(scheme)) fakeMapper := meta.NewDefaultRESTMapper(scheme.PreferredVersionAllGroups()) for gvk := range scheme.AllKnownTypes() { diff --git a/pkg/utils/test/matchers/jq/jq_support.go b/pkg/utils/test/matchers/jq/jq_support.go index 3916cb9bac5..1807584d3fc 100644 --- a/pkg/utils/test/matchers/jq/jq_support.go +++ b/pkg/utils/test/matchers/jq/jq_support.go @@ -11,6 +11,9 @@ import ( "github.com/onsi/gomega/format" "github.com/onsi/gomega/gbytes" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/resources" ) func formattedMessage(comparisonMessage string, failurePath []interface{}) string { @@ -91,6 +94,13 @@ func toType(in any) (any, error) { return v.Object, nil case *unstructured.Unstructured: return v.Object, nil + case client.Object: + u, err := resources.ToUnstructured(v) + if err != nil { + return nil, err + } + + return u.Object, nil } switch reflect.TypeOf(in).Kind() { diff --git a/pkg/utils/test/matchers/jq/jq_transform.go b/pkg/utils/test/matchers/jq/jq_transform.go index 341855c919c..8e01f8dc398 100644 --- a/pkg/utils/test/matchers/jq/jq_transform.go +++ b/pkg/utils/test/matchers/jq/jq_transform.go @@ -6,13 +6,18 @@ import ( "github.com/itchyny/gojq" ) -func Extract(expression string) func(in any) (any, error) { +func Extract(format string, args ...any) func(in any) (any, error) { return func(in any) (any, error) { - return ExtractValue[any](in, expression) + return ExtractValue[any](in, format, args...) } } -func ExtractValue[T any](in any, expression string) (T, error) { +func ExtractValue[T any](in any, format string, args ...any) (T, error) { + expression := format + if len(args) > 0 { + expression = fmt.Sprintf(format, args...) + } + var result T var ok bool