Skip to content

Commit

Permalink
Implement Machine Deleting condition
Browse files Browse the repository at this point in the history
  • Loading branch information
sbueringer committed Oct 16, 2024
1 parent ff13a64 commit 8bd926d
Show file tree
Hide file tree
Showing 20 changed files with 1,314 additions and 322 deletions.
56 changes: 56 additions & 0 deletions api/v1beta1/machine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ const (
// MachineNodeConditionNotYetReportedV1Beta2Reason surfaces when a Machine's Node doesn't have a condition reported yet.
MachineNodeConditionNotYetReportedV1Beta2Reason = "NodeConditionNotYetReported"

// MachineNodeInternalErrorV1Beta2Reason surfaces unexpected failures when reading a Node object.
MachineNodeInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason

// MachineNodeDoesNotExistV1Beta2Reason surfaces when the node hosted on the machine does not exist.
// Note: this could happen when creating the machine. However, this state should be treated as an error if it lasts indefinitely.
MachineNodeDoesNotExistV1Beta2Reason = ObjectDoesNotExistV1Beta2Reason
Expand All @@ -204,6 +207,11 @@ const (
// Note: controllers can't identify if the Node was deleted by the controller itself, e.g.
// during the deletion workflow, or by a users.
MachineNodeDeletedV1Beta2Reason = ObjectDeletedV1Beta2Reason

// MachineNodeRemoteConnectionFailedV1Beta2Reason surfaces that the remote connection failed.
// If the remote connection probe failed for longer than remote conditions grace period,
// this reason is used when setting NodeHealthy and NodeReady conditions to `Unknown`.
MachineNodeRemoteConnectionFailedV1Beta2Reason = RemoteConnectionFailedV1Beta2Reason
)

// Machine's HealthCheckSucceeded condition and corresponding reasons that will be used in v1Beta2 API version.
Expand Down Expand Up @@ -271,6 +279,54 @@ const (
const (
// MachineDeletingV1Beta2Condition surfaces details about progress in the machine deletion workflow.
MachineDeletingV1Beta2Condition = DeletingV1Beta2Condition

// MachineDeletingDeletingV1Beta2Reason surfaces when the Machine is deleting.
// This reason is only used for the MachineDeletingV1Beta2Condition when calculating the
// Ready condition when the deletionTimestamp on a Machine is set.
MachineDeletingDeletingV1Beta2Reason = DeletingV1Beta2Condition

// MachineDeletingDeletionTimestampNotSetV1Beta2Reason surfaces when the Machine is not deleting because the
// DeletionTimestamp is not set.
MachineDeletingDeletionTimestampNotSetV1Beta2Reason = DeletionTimestampNotSetV1Beta2Reason

// MachineDeletingDeletionTimestampSetV1Beta2Reason surfaces when the Machine is deleting because the
// DeletionTimestamp is set. This reason is used if none of the more specific reasons apply.
MachineDeletingDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason

// MachineDeletingWaitingForPreDrainHookV1Beta2Reason surfaces when the Machine deletion
// waits for pre-drain hooks to complete. I.e. it waits until there are no annotations
// with the `pre-drain.delete.hook.machine.cluster.x-k8s.io` prefix on the Machine anymore.
MachineDeletingWaitingForPreDrainHookV1Beta2Reason = "WaitingForPreDrainHook"

// MachineDeletingDrainingNodeV1Beta2Reason surfaces when the Machine deletion is draining the Node.
MachineDeletingDrainingNodeV1Beta2Reason = "DrainingNode"

// MachineDeletingWaitingForVolumeDetachV1Beta2Reason surfaces when the Machine deletion is
// waiting for volumes to detach from the Node.
MachineDeletingWaitingForVolumeDetachV1Beta2Reason = "WaitingForVolumeDetach"

// MachineDeletingWaitingForPreTerminateHookV1Beta2Reason surfaces when the Machine deletion
// waits for pre-terminate hooks to complete. I.e. it waits until there are no annotations
// with the `pre-terminate.delete.hook.machine.cluster.x-k8s.io` prefix on the Machine anymore.
MachineDeletingWaitingForPreTerminateHookV1Beta2Reason = "WaitingForPreTerminateHook"

// MachineDeletingWaitingForInfrastructureDeletionV1Beta2Reason surfaces when the Machine deletion
// waits for InfraMachine deletion to complete.
MachineDeletingWaitingForInfrastructureDeletionV1Beta2Reason = "WaitingForInfrastructureDeletion"

// MachineDeletingWaitingForBootstrapDeletionV1Beta2Reason surfaces when the Machine deletion
// waits for BootstrapConfig deletion to complete.
MachineDeletingWaitingForBootstrapDeletionV1Beta2Reason = "WaitingForBootstrapDeletion"

// MachineDeletingDeletingNodeV1Beta2Reason surfaces when the Machine deletion is
// deleting the Node.
MachineDeletingDeletingNodeV1Beta2Reason = "DeletingNode"

// MachineDeletingDeletionCompletedV1Beta2Reason surfaces when the Machine deletion has been completed.
// This reason is set right after the `machine.cluster.x-k8s.io` finalizer is removed.
// This means that the object will go away (i.e. be removed from etcd), except if there are other
// finalizers on the Machine object.
MachineDeletingDeletionCompletedV1Beta2Reason = DeletionCompletedV1Beta2Reason
)

// Machine's Paused condition and corresponding reasons that will be used in v1Beta2 API version.
Expand Down
19 changes: 19 additions & 0 deletions api/v1beta1/v1beta2_condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,25 @@ const (

// PausedV1Beta2Reason surfaces when an object is paused.
PausedV1Beta2Reason = "Paused"

// RemoteConnectionFailedV1Beta2Reason surfaces that the remote connection failed.
// This is typically used when setting remote conditions (e.g. `NodeHealthy`) to `Unknown`
// after the remote connection probe didn't succeed for remote conditions grace period.
RemoteConnectionFailedV1Beta2Reason = "RemoteConnectionFailed"

// DeletionTimestampNotSetV1Beta2Reason surfaces when an object is not deleting because the
// DeletionTimestamp is not set.
DeletionTimestampNotSetV1Beta2Reason = "DeletionTimestampNotSet"

// DeletionTimestampSetV1Beta2Reason surfaces when an object is deleting because the
// DeletionTimestamp is set. This reason is used if none of the more specific reasons apply.
DeletionTimestampSetV1Beta2Reason = "DeletionTimestampSet"

// DeletionCompletedV1Beta2Reason surfaces when the deletion process has been completed.
// This reason is set right after the corresponding finalizer is removed.
// This means that the object will go away (i.e. be removed from etcd), except if there are other
// finalizers on the object.
DeletionCompletedV1Beta2Reason = "DeletionCompleted"
)

// Conditions that will be used for the MachineSet object in v1Beta2 API version.
Expand Down
11 changes: 7 additions & 4 deletions controllers/alias.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,17 @@ type MachineReconciler struct {

// WatchFilterValue is the label value used to filter events prior to reconciliation.
WatchFilterValue string

RemoteConditionsGracePeriod time.Duration
}

func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
return (&machinecontroller.Reconciler{
Client: r.Client,
APIReader: r.APIReader,
ClusterCache: r.ClusterCache,
WatchFilterValue: r.WatchFilterValue,
Client: r.Client,
APIReader: r.APIReader,
ClusterCache: r.ClusterCache,
WatchFilterValue: r.WatchFilterValue,
RemoteConditionsGracePeriod: r.RemoteConditionsGracePeriod,
}).SetupWithManager(ctx, mgr, options)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
predicateLog := ctrl.LoggerFrom(ctx).WithValues("controller", "clusterclass")
err := ctrl.NewControllerManagedBy(mgr).
For(&clusterv1.ClusterClass{}).
Named("clusterclass").
WithOptions(options).
Watches(
&runtimev1.ExtensionConfig{},
Expand Down
97 changes: 0 additions & 97 deletions internal/controllers/machine/drain/cache.go

This file was deleted.

4 changes: 2 additions & 2 deletions internal/controllers/machine/drain/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,12 +301,12 @@ func (r EvictionResult) DrainCompleted() bool {
}

// ConditionMessage returns a condition message for the case where a drain is not completed.
func (r EvictionResult) ConditionMessage() string {
func (r EvictionResult) ConditionMessage(nodeDrainStartTime *metav1.Time) string {
if r.DrainCompleted() {
return ""
}

conditionMessage := "Drain not completed yet:"
conditionMessage := fmt.Sprintf("Drain not completed yet (started at %s):", nodeDrainStartTime.Format(time.RFC3339))
if len(r.PodsDeletionTimestampSet) > 0 {
conditionMessage = fmt.Sprintf("%s\n* Pods with deletionTimestamp that still exist: %s",
conditionMessage, PodListToString(r.PodsDeletionTimestampSet, 5))
Expand Down
13 changes: 9 additions & 4 deletions internal/controllers/machine/drain/drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,8 @@ func TestEvictPods(t *testing.T) {
}

func TestEvictionResult_ConditionMessage(t *testing.T) {
g := NewWithT(t)

tests := []struct {
name string
evictionResult EvictionResult
Expand Down Expand Up @@ -819,7 +821,7 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
},
},
},
wantConditionMessage: `Drain not completed yet:
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
* Pods with deletionTimestamp that still exist: pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1
* Pods with eviction failed:
* Cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently: pod-5-to-trigger-eviction-pdb-violated-1
Expand Down Expand Up @@ -949,7 +951,7 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
},
},
},
wantConditionMessage: `Drain not completed yet:
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
* Pods with deletionTimestamp that still exist: pod-2-deletionTimestamp-set-1, pod-2-deletionTimestamp-set-2, pod-2-deletionTimestamp-set-3, pod-3-to-trigger-eviction-successfully-1, pod-3-to-trigger-eviction-successfully-2, ... (2 more)
* Pods with eviction failed:
* Cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently: pod-5-to-trigger-eviction-pdb-violated-1, pod-5-to-trigger-eviction-pdb-violated-2, pod-5-to-trigger-eviction-pdb-violated-3, ... (3 more)
Expand Down Expand Up @@ -1024,7 +1026,7 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
},
},
},
wantConditionMessage: `Drain not completed yet:
wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
* Pods with eviction failed:
* some other error 1: pod-1-to-trigger-eviction-some-other-error
* some other error 2: pod-2-to-trigger-eviction-some-other-error
Expand All @@ -1035,11 +1037,14 @@ func TestEvictionResult_ConditionMessage(t *testing.T) {
},
}

nodeDrainStartTime, err := time.Parse(time.RFC3339, "2024-10-09T16:13:59Z")
g.Expect(err).ToNot(HaveOccurred())

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

g.Expect(tt.evictionResult.ConditionMessage()).To(Equal(tt.wantConditionMessage))
g.Expect(tt.evictionResult.ConditionMessage(&metav1.Time{Time: nodeDrainStartTime})).To(Equal(tt.wantConditionMessage))
})
}
}
Expand Down
Loading

0 comments on commit 8bd926d

Please sign in to comment.