Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Refine v1beta2 ScalingUp conditions #11432

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 30 additions & 27 deletions controlplane/kubeadm/internal/controllers/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ func setScalingUpCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
if currentReplicas >= desiredReplicas {
var message string
if missingReferencesMessage != "" {
message = fmt.Sprintf("Scaling up would be blocked %s", missingReferencesMessage)
message = fmt.Sprintf("Scaling up would be blocked because %s", missingReferencesMessage)
}
v1beta2conditions.Set(kcp, metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Expand All @@ -242,28 +242,21 @@ func setScalingUpCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
}

message := fmt.Sprintf("Scaling up from %d to %d replicas", currentReplicas, desiredReplicas)
if missingReferencesMessage != "" {
message = fmt.Sprintf("%s is blocked %s", message, missingReferencesMessage)
}
messages := []string{message}

if preflightChecks.HasDeletingMachine {
messages = append(messages, "waiting for Machine being deleted")
}

if preflightChecks.ControlPlaneComponentsNotHealthy {
messages = append(messages, "waiting for control plane components to be healthy")
additionalMessages := getPreflightMessages(preflightChecks)
if missingReferencesMessage != "" {
additionalMessages = append(additionalMessages, fmt.Sprintf("* %s", missingReferencesMessage))
}

if preflightChecks.EtcdClusterNotHealthy {
messages = append(messages, "waiting for etcd cluster to be healthy")
if len(additionalMessages) > 0 {
message += fmt.Sprintf(" is blocked because:\n%s", strings.Join(additionalMessages, "\n"))
}

v1beta2conditions.Set(kcp, metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
Message: strings.Join(messages, "; "),
Message: message,
})
}

Expand Down Expand Up @@ -292,28 +285,22 @@ func setScalingDownCondition(_ context.Context, kcp *controlplanev1.KubeadmContr
return
}

messages := []string{fmt.Sprintf("Scaling down from %d to %d replicas", currentReplicas, desiredReplicas)}
if preflightChecks.HasDeletingMachine {
messages = append(messages, "waiting for Machine being deleted")
}
message := fmt.Sprintf("Scaling down from %d to %d replicas", currentReplicas, desiredReplicas)

additionalMessages := getPreflightMessages(preflightChecks)
if staleMessage := aggregateStaleMachines(machines); staleMessage != "" {
messages = append(messages, staleMessage)
}

if preflightChecks.ControlPlaneComponentsNotHealthy {
messages = append(messages, "waiting for control plane components to be healthy")
additionalMessages = append(additionalMessages, fmt.Sprintf("* %s", staleMessage))
}

if preflightChecks.EtcdClusterNotHealthy {
messages = append(messages, "waiting for etcd cluster to be healthy")
if len(additionalMessages) > 0 {
message += fmt.Sprintf(" is blocked because:\n%s", strings.Join(additionalMessages, "\n"))
}

v1beta2conditions.Set(kcp, metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: strings.Join(messages, "; "),
Message: message,
})
}

Expand Down Expand Up @@ -399,7 +386,7 @@ func setMachinesUpToDateCondition(ctx context.Context, kcp *controlplanev1.Kubea

func calculateMissingReferencesMessage(kcp *controlplanev1.KubeadmControlPlane, infraMachineTemplateNotFound bool) string {
if infraMachineTemplateNotFound {
return fmt.Sprintf("because %s does not exist", kcp.Spec.MachineTemplate.InfrastructureRef.Kind)
return fmt.Sprintf("%s does not exist", kcp.Spec.MachineTemplate.InfrastructureRef.Kind)
}
return ""
}
Expand Down Expand Up @@ -684,6 +671,22 @@ func minTime(t1, t2 time.Time) time.Time {
return t1
}

func getPreflightMessages(preflightChecks internal.PreflightCheckResults) []string {
additionalMessages := []string{}
if preflightChecks.HasDeletingMachine {
additionalMessages = append(additionalMessages, "* waiting for a control plane Machine to complete deletion")
}

if preflightChecks.ControlPlaneComponentsNotHealthy {
additionalMessages = append(additionalMessages, "* waiting for control plane components to be healthy")
}

if preflightChecks.EtcdClusterNotHealthy {
additionalMessages = append(additionalMessages, "* waiting for etcd cluster to be healthy")
}
return additionalMessages
}

func aggregateStaleMachines(machines collections.Machines) string {
if len(machines) == 0 {
return ""
Expand Down
49 changes: 29 additions & 20 deletions controlplane/kubeadm/internal/controllers/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,11 @@ func Test_setScalingUpCondition(t *testing.T) {
InfraMachineTemplateIsNotFound: true,
},
expectCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
Message: "Scaling up from 3 to 5 replicas is blocked because AWSTemplate does not exist",
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
Message: "Scaling up from 3 to 5 replicas is blocked because:\n" +
"* AWSTemplate does not exist",
},
},
{
Expand All @@ -258,10 +259,13 @@ func Test_setScalingUpCondition(t *testing.T) {
},
},
expectCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
Message: "Scaling up from 3 to 5 replicas; waiting for Machine being deleted; waiting for control plane components to be healthy; waiting for etcd cluster to be healthy",
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
Message: "Scaling up from 3 to 5 replicas is blocked because:\n" +
"* waiting for a control plane Machine to complete deletion\n" +
"* waiting for control plane components to be healthy\n" +
"* waiting for etcd cluster to be healthy",
},
},
}
Expand Down Expand Up @@ -373,10 +377,11 @@ func Test_setScalingDownCondition(t *testing.T) {
),
},
expectCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas; Machine m1 is in deletion since more than 30m",
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas is blocked because:\n" +
"* Machine m1 is in deletion since more than 30m",
},
},
{
Expand All @@ -393,10 +398,11 @@ func Test_setScalingDownCondition(t *testing.T) {
),
},
expectCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas; Machines m1, m2 are in deletion since more than 30m",
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas is blocked because:\n" +
"* Machines m1, m2 are in deletion since more than 30m",
},
},
{
Expand All @@ -418,10 +424,13 @@ func Test_setScalingDownCondition(t *testing.T) {
},
},
expectCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas; waiting for Machine being deleted; waiting for control plane components to be healthy; waiting for etcd cluster to be healthy",
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas is blocked because:\n" +
"* waiting for a control plane Machine to complete deletion\n" +
"* waiting for control plane components to be healthy\n" +
"* waiting for etcd cluster to be healthy",
},
},
}
Expand Down
52 changes: 32 additions & 20 deletions internal/controllers/machineset/machineset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ type scope struct {
infrastructureObjectNotFound bool
getAndAdoptMachinesForMachineSetSucceeded bool
owningMachineDeployment *clusterv1.MachineDeployment
scaleUpPreflightCheckErrMessage string
scaleUpPreflightCheckErrMessages []string
reconciliationTime time.Time
}

Expand Down Expand Up @@ -673,15 +673,19 @@ func (r *Reconciler) syncReplicas(ctx context.Context, s *scope) (ctrl.Result, e
}
}

result, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Scale up")
if err != nil || !result.IsZero() {
preflightCheckErrMessages, err := r.runPreflightChecks(ctx, cluster, ms, "Scale up")
if err != nil || len(preflightCheckErrMessages) > 0 {
if err != nil {
// If the error is not nil use that as the message for the condition.
preflightCheckErrMessage = err.Error()
// If err is not nil use that as the preflightCheckErrMessage
preflightCheckErrMessages = append(preflightCheckErrMessages, err.Error())
}
s.scaleUpPreflightCheckErrMessage = preflightCheckErrMessage
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityError, preflightCheckErrMessage)
return result, err

s.scaleUpPreflightCheckErrMessages = preflightCheckErrMessages
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityError, strings.Join(preflightCheckErrMessages, "; "))
if err != nil {
return ctrl.Result{}, err
}
return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, nil
}

var (
Expand Down Expand Up @@ -1418,31 +1422,39 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) (
}

// Run preflight checks.
preflightChecksResult, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Machine remediation")
if err != nil {
// If err is not nil use that as the preflightCheckErrMessage
preflightCheckErrMessage = err.Error()
}
preflightCheckErrMessages, err := r.runPreflightChecks(ctx, cluster, ms, "Machine remediation")
if err != nil || len(preflightCheckErrMessages) > 0 {
if err != nil {
// If err is not nil use that as the preflightCheckErrMessage
preflightCheckErrMessages = append(preflightCheckErrMessages, err.Error())
}

listMessages := make([]string, len(preflightCheckErrMessages))
for i, msg := range preflightCheckErrMessages {
listMessages[i] = fmt.Sprintf("* %s", msg)
}

preflightChecksFailed := err != nil || !preflightChecksResult.IsZero()
if preflightChecksFailed {
// PreflightChecks did not pass. Update the MachineOwnerRemediated condition on the unhealthy Machines with
// WaitingForRemediationReason reason.
if err := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{
if patchErr := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{
Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.MachineSetMachineRemediationDeferredV1Beta2Reason,
Message: preflightCheckErrMessage,
Message: strings.Join(listMessages, "\n"),
}, &clusterv1.Condition{
Type: clusterv1.MachineOwnerRemediatedCondition,
Status: corev1.ConditionFalse,
Reason: clusterv1.WaitingForRemediationReason,
Severity: clusterv1.ConditionSeverityWarning,
Message: preflightCheckErrMessage,
}); err != nil {
Message: strings.Join(preflightCheckErrMessages, "; "),
sbueringer marked this conversation as resolved.
Show resolved Hide resolved
}); patchErr != nil {
return ctrl.Result{}, kerrors.NewAggregate([]error{err, patchErr})
}

if err != nil {
return ctrl.Result{}, err
}
return preflightChecksResult, nil
return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, nil
}

// PreflightChecks passed, so it is safe to remediate unhealthy machines by deleting them.
Expand Down
19 changes: 11 additions & 8 deletions internal/controllers/machineset/machineset_controller_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package machineset
import (
"context"
"fmt"
"slices"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -48,7 +47,7 @@ func (r *Reconciler) updateStatus(ctx context.Context, s *scope) {
// Conditions

// Update the ScalingUp and ScalingDown condition.
setScalingUpCondition(ctx, s.machineSet, s.machines, s.bootstrapObjectNotFound, s.infrastructureObjectNotFound, s.getAndAdoptMachinesForMachineSetSucceeded, s.scaleUpPreflightCheckErrMessage)
setScalingUpCondition(ctx, s.machineSet, s.machines, s.bootstrapObjectNotFound, s.infrastructureObjectNotFound, s.getAndAdoptMachinesForMachineSetSucceeded, s.scaleUpPreflightCheckErrMessages)
setScalingDownCondition(ctx, s.machineSet, s.machines, s.getAndAdoptMachinesForMachineSetSucceeded)

// MachinesReady condition: aggregate the Machine's Ready condition.
Expand Down Expand Up @@ -93,7 +92,7 @@ func setReplicas(_ context.Context, ms *clusterv1.MachineSet, machines []*cluste
ms.Status.V1Beta2.UpToDateReplicas = ptr.To(upToDateReplicas)
}

func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachinesForMachineSetSucceeded bool, scaleUpPreflightCheckErrMessage string) {
func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachinesForMachineSetSucceeded bool, scaleUpPreflightCheckErrMessages []string) {
// If we got unexpected errors in listing the machines (this should never happen), surface them.
if !getAndAdoptMachinesForMachineSetSucceeded {
v1beta2conditions.Set(ms, metav1.Condition{
Expand Down Expand Up @@ -140,11 +139,15 @@ func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines

// Scaling up.
message := fmt.Sprintf("Scaling up from %d to %d replicas", currentReplicas, desiredReplicas)
if missingReferencesMessage != "" || scaleUpPreflightCheckErrMessage != "" {
blockMessages := slices.DeleteFunc([]string{missingReferencesMessage, scaleUpPreflightCheckErrMessage}, func(s string) bool {
return s == ""
})
message += fmt.Sprintf(" is blocked because %s", strings.Join(blockMessages, " and "))
if missingReferencesMessage != "" || len(scaleUpPreflightCheckErrMessages) > 0 {
listMessages := make([]string, len(scaleUpPreflightCheckErrMessages))
for i, msg := range scaleUpPreflightCheckErrMessages {
listMessages[i] = fmt.Sprintf("* %s", msg)
}
if missingReferencesMessage != "" {
listMessages = append(listMessages, fmt.Sprintf("* %s", missingReferencesMessage))
}
message += fmt.Sprintf(" is blocked because:\n%s", strings.Join(listMessages, "\n"))
sbueringer marked this conversation as resolved.
Show resolved Hide resolved
}
v1beta2conditions.Set(ms, metav1.Condition{
Type: clusterv1.MachineSetScalingUpV1Beta2Condition,
Expand Down
Loading