Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Implement Machine Deleting condition #11291

Merged
merged 5 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions api/v1beta1/machine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ const (
// MachineNodeConditionNotYetReportedV1Beta2Reason surfaces when a Machine's Node doesn't have a condition reported yet.
MachineNodeConditionNotYetReportedV1Beta2Reason = "NodeConditionNotYetReported"

// MachineNodeInternalErrorV1Beta2Reason surfaces unexpected failures when reading a Node object.
MachineNodeInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason

// MachineNodeDoesNotExistV1Beta2Reason surfaces when the node hosted on the machine does not exist.
// Note: this could happen when creating the machine. However, this state should be treated as an error if it lasts indefinitely.
MachineNodeDoesNotExistV1Beta2Reason = ObjectDoesNotExistV1Beta2Reason
Expand All @@ -204,6 +207,11 @@ const (
// Note: controllers can't identify if the Node was deleted by the controller itself, e.g.
// during the deletion workflow, or by a users.
MachineNodeDeletedV1Beta2Reason = ObjectDeletedV1Beta2Reason

// MachineNodeRemoteConnectionFailedV1Beta2Reason surfaces that the remote connection failed.
// If the remote connection probe failed for longer than remote conditions grace period,
// this reason is used when setting NodeHealthy and NodeReady conditions to `Unknown`.
MachineNodeRemoteConnectionFailedV1Beta2Reason = RemoteConnectionFailedV1Beta2Reason
)

// Machine's HealthCheckSucceeded condition and corresponding reasons that will be used in v1Beta2 API version.
Expand Down Expand Up @@ -271,6 +279,57 @@ const (
const (
// MachineDeletingV1Beta2Condition surfaces details about progress in the machine deletion workflow.
MachineDeletingV1Beta2Condition = DeletingV1Beta2Condition

// MachineDeletingV1Beta2Reason surfaces when the Machine is deleting.
// This reason is only used for the MachineDeletingV1Beta2Condition when calculating the
// Ready condition when the deletionTimestamp on a Machine is set.
MachineDeletingV1Beta2Reason = "Deleting"

// MachineDeletingDeletionTimestampNotSetV1Beta2Reason surfaces when the Machine is not deleting because the
// DeletionTimestamp is not set.
MachineDeletingDeletionTimestampNotSetV1Beta2Reason = DeletionTimestampNotSetV1Beta2Reason

// MachineDeletingDeletionTimestampSetV1Beta2Reason surfaces when the Machine is deleting because the
// DeletionTimestamp is set. This reason is used if none of the more specific reasons apply.
MachineDeletingDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason

// MachineDeletingInternalErrorV1Beta2Reason surfaces unexpected failures when deleting a Machine.
MachineDeletingInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason

// MachineDeletingWaitingForPreDrainHookV1Beta2Reason surfaces when the Machine deletion
// waits for pre-drain hooks to complete. I.e. it waits until there are no annotations
// with the `pre-drain.delete.hook.machine.cluster.x-k8s.io` prefix on the Machine anymore.
MachineDeletingWaitingForPreDrainHookV1Beta2Reason = "WaitingForPreDrainHook"

// MachineDeletingDrainingNodeV1Beta2Reason surfaces when the Machine deletion is draining the Node.
MachineDeletingDrainingNodeV1Beta2Reason = "DrainingNode"

// MachineDeletingWaitingForVolumeDetachV1Beta2Reason surfaces when the Machine deletion is
// waiting for volumes to detach from the Node.
MachineDeletingWaitingForVolumeDetachV1Beta2Reason = "WaitingForVolumeDetach"

// MachineDeletingWaitingForPreTerminateHookV1Beta2Reason surfaces when the Machine deletion
// waits for pre-terminate hooks to complete. I.e. it waits until there are no annotations
// with the `pre-terminate.delete.hook.machine.cluster.x-k8s.io` prefix on the Machine anymore.
MachineDeletingWaitingForPreTerminateHookV1Beta2Reason = "WaitingForPreTerminateHook"

// MachineDeletingWaitingForInfrastructureDeletionV1Beta2Reason surfaces when the Machine deletion
// waits for InfraMachine deletion to complete.
MachineDeletingWaitingForInfrastructureDeletionV1Beta2Reason = "WaitingForInfrastructureDeletion"

// MachineDeletingWaitingForBootstrapDeletionV1Beta2Reason surfaces when the Machine deletion
// waits for BootstrapConfig deletion to complete.
MachineDeletingWaitingForBootstrapDeletionV1Beta2Reason = "WaitingForBootstrapDeletion"

// MachineDeletingDeletingNodeV1Beta2Reason surfaces when the Machine deletion is
// deleting the Node.
MachineDeletingDeletingNodeV1Beta2Reason = "DeletingNode"

// MachineDeletingDeletionCompletedV1Beta2Reason surfaces when the Machine deletion has been completed.
// This reason is set right after the `machine.cluster.x-k8s.io` finalizer is removed.
// This means that the object will go away (i.e. be removed from etcd), except if there are other
// finalizers on the Machine object.
MachineDeletingDeletionCompletedV1Beta2Reason = DeletionCompletedV1Beta2Reason
)

// ANCHOR: MachineSpec
Expand Down
19 changes: 19 additions & 0 deletions api/v1beta1/v1beta2_condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,25 @@ const (

// PausedV1Beta2Reason surfaces when an object is paused.
PausedV1Beta2Reason = "Paused"

// RemoteConnectionFailedV1Beta2Reason surfaces that the remote connection failed.
// This is typically used when setting remote conditions (e.g. `NodeHealthy`) to `Unknown`
// after the remote connection probe didn't succeed for remote conditions grace period.
RemoteConnectionFailedV1Beta2Reason = "RemoteConnectionFailed"

// DeletionTimestampNotSetV1Beta2Reason surfaces when an object is not deleting because the
// DeletionTimestamp is not set.
DeletionTimestampNotSetV1Beta2Reason = "DeletionTimestampNotSet"

// DeletionTimestampSetV1Beta2Reason surfaces when an object is deleting because the
// DeletionTimestamp is set. This reason is used if none of the more specific reasons apply.
DeletionTimestampSetV1Beta2Reason = "DeletionTimestampSet"

// DeletionCompletedV1Beta2Reason surfaces when the deletion process has been completed.
// This reason is set right after the corresponding finalizer is removed.
// This means that the object will go away (i.e. be removed from etcd), except if there are other
// finalizers on the object.
DeletionCompletedV1Beta2Reason = "DeletionCompleted"
)

// Conditions that will be used for the MachineSet object in v1Beta2 API version.
Expand Down
11 changes: 7 additions & 4 deletions controllers/alias.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,17 @@ type MachineReconciler struct {

// WatchFilterValue is the label value used to filter events prior to reconciliation.
WatchFilterValue string

RemoteConditionsGracePeriod time.Duration
}

func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
return (&machinecontroller.Reconciler{
Client: r.Client,
APIReader: r.APIReader,
ClusterCache: r.ClusterCache,
WatchFilterValue: r.WatchFilterValue,
Client: r.Client,
APIReader: r.APIReader,
ClusterCache: r.ClusterCache,
WatchFilterValue: r.WatchFilterValue,
RemoteConditionsGracePeriod: r.RemoteConditionsGracePeriod,
}).SetupWithManager(ctx, mgr, options)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
predicateLog := ctrl.LoggerFrom(ctx).WithValues("controller", "clusterclass")
err := ctrl.NewControllerManagedBy(mgr).
For(&clusterv1.ClusterClass{}).
Named("clusterclass").
sbueringer marked this conversation as resolved.
Show resolved Hide resolved
WithOptions(options).
Watches(
&runtimev1.ExtensionConfig{},
Expand Down
97 changes: 0 additions & 97 deletions internal/controllers/machine/drain/cache.go

This file was deleted.

4 changes: 2 additions & 2 deletions internal/controllers/machine/drain/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,12 +301,12 @@ func (r EvictionResult) DrainCompleted() bool {
}

// ConditionMessage returns a condition message for the case where a drain is not completed.
func (r EvictionResult) ConditionMessage() string {
func (r EvictionResult) ConditionMessage(nodeDrainStartTime *metav1.Time) string {
if r.DrainCompleted() {
return ""
}

conditionMessage := "Drain not completed yet:"
conditionMessage := fmt.Sprintf("Drain not completed yet (started at %s):", nodeDrainStartTime.Format(time.RFC3339))
fabriziopandini marked this conversation as resolved.
Show resolved Hide resolved
if len(r.PodsDeletionTimestampSet) > 0 {
conditionMessage = fmt.Sprintf("%s\n* Pods with deletionTimestamp that still exist: %s",
conditionMessage, PodListToString(r.PodsDeletionTimestampSet, 5))
Expand Down
13 changes: 9 additions & 4 deletions internal/controllers/machine/drain/drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,8 @@ func TestEvictPods(t *testing.T) {
}

func TestEvictionResult_ConditionMessage(t *testing.T) {
g := NewWithT(t)

tests := []struct {
name string
evictionResult EvictionResult
Expand Down Expand Up @@ -819,7 +821,7 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
},
},
},
wantConditionMessage: `Drain not completed yet:
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
* Pods with deletionTimestamp that still exist: pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1
* Pods with eviction failed:
* Cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently: pod-5-to-trigger-eviction-pdb-violated-1
Expand Down Expand Up @@ -949,7 +951,7 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
},
},
},
wantConditionMessage: `Drain not completed yet:
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
* Pods with deletionTimestamp that still exist: pod-2-deletionTimestamp-set-1, pod-2-deletionTimestamp-set-2, pod-2-deletionTimestamp-set-3, pod-3-to-trigger-eviction-successfully-1, pod-3-to-trigger-eviction-successfully-2, ... (2 more)
* Pods with eviction failed:
* Cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently: pod-5-to-trigger-eviction-pdb-violated-1, pod-5-to-trigger-eviction-pdb-violated-2, pod-5-to-trigger-eviction-pdb-violated-3, ... (3 more)
Expand Down Expand Up @@ -1024,7 +1026,7 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
},
},
},
wantConditionMessage: `Drain not completed yet:
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
* Pods with eviction failed:
* some other error 1: pod-1-to-trigger-eviction-some-other-error
* some other error 2: pod-2-to-trigger-eviction-some-other-error
Expand All @@ -1035,11 +1037,14 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
},
}

nodeDrainStartTime, err := time.Parse(time.RFC3339, "2024-10-09T16:13:59Z")
g.Expect(err).ToNot(HaveOccurred())

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

g.Expect(tt.evictionResult.ConditionMessage()).To(Equal(tt.wantConditionMessage))
g.Expect(tt.evictionResult.ConditionMessage(&metav1.Time{Time: nodeDrainStartTime})).To(Equal(tt.wantConditionMessage))
})
}
}
Expand Down
Loading
Loading