Skip to content

Commit

Permalink
Refine v1beta2 ScalingUp conditions
Browse files Browse the repository at this point in the history
  • Loading branch information
fabriziopandini committed Nov 18, 2024
1 parent b12ad96 commit f262b53
Show file tree
Hide file tree
Showing 8 changed files with 201 additions and 150 deletions.
57 changes: 30 additions & 27 deletions controlplane/kubeadm/internal/controllers/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ func setScalingUpCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
if currentReplicas >= desiredReplicas {
var message string
if missingReferencesMessage != "" {
message = fmt.Sprintf("Scaling up would be blocked %s", missingReferencesMessage)
message = fmt.Sprintf("Scaling up would be blocked because %s", missingReferencesMessage)
}
v1beta2conditions.Set(kcp, metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Expand All @@ -242,28 +242,21 @@ func setScalingUpCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
}

message := fmt.Sprintf("Scaling up from %d to %d replicas", currentReplicas, desiredReplicas)
if missingReferencesMessage != "" {
message = fmt.Sprintf("%s is blocked %s", message, missingReferencesMessage)
}
messages := []string{message}

if preflightChecks.HasDeletingMachine {
messages = append(messages, "waiting for Machine being deleted")
}

if preflightChecks.ControlPlaneComponentsNotHealthy {
messages = append(messages, "waiting for control plane components to be healthy")
additionalMessages := getPreflightMessages(preflightChecks)
if missingReferencesMessage != "" {
additionalMessages = append(additionalMessages, fmt.Sprintf("* %s", missingReferencesMessage))
}

if preflightChecks.EtcdClusterNotHealthy {
messages = append(messages, "waiting for etcd cluster to be healthy")
if len(additionalMessages) > 0 {
message += fmt.Sprintf(" is blocked because:\n%s", strings.Join(additionalMessages, "\n"))
}

v1beta2conditions.Set(kcp, metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
Message: strings.Join(messages, "; "),
Message: message,
})
}

Expand Down Expand Up @@ -292,28 +285,22 @@ func setScalingDownCondition(_ context.Context, kcp *controlplanev1.KubeadmContr
return
}

messages := []string{fmt.Sprintf("Scaling down from %d to %d replicas", currentReplicas, desiredReplicas)}
if preflightChecks.HasDeletingMachine {
messages = append(messages, "waiting for Machine being deleted")
}
message := fmt.Sprintf("Scaling down from %d to %d replicas", currentReplicas, desiredReplicas)

additionalMessages := getPreflightMessages(preflightChecks)
if staleMessage := aggregateStaleMachines(machines); staleMessage != "" {
messages = append(messages, staleMessage)
}

if preflightChecks.ControlPlaneComponentsNotHealthy {
messages = append(messages, "waiting for control plane components to be healthy")
additionalMessages = append(additionalMessages, fmt.Sprintf("* %s", staleMessage))
}

if preflightChecks.EtcdClusterNotHealthy {
messages = append(messages, "waiting for etcd cluster to be healthy")
if len(additionalMessages) > 0 {
message += fmt.Sprintf(" is blocked because:\n%s", strings.Join(additionalMessages, "\n"))
}

v1beta2conditions.Set(kcp, metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: strings.Join(messages, "; "),
Message: message,
})
}

Expand Down Expand Up @@ -399,7 +386,7 @@ func setMachinesUpToDateCondition(ctx context.Context, kcp *controlplanev1.Kubea

func calculateMissingReferencesMessage(kcp *controlplanev1.KubeadmControlPlane, infraMachineTemplateNotFound bool) string {
if infraMachineTemplateNotFound {
return fmt.Sprintf("because %s does not exist", kcp.Spec.MachineTemplate.InfrastructureRef.Kind)
return fmt.Sprintf("%s does not exist", kcp.Spec.MachineTemplate.InfrastructureRef.Kind)
}
return ""
}
Expand Down Expand Up @@ -684,6 +671,22 @@ func minTime(t1, t2 time.Time) time.Time {
return t1
}

func getPreflightMessages(preflightChecks internal.PreflightCheckResults) []string {
additionalMessages := []string{}
if preflightChecks.HasDeletingMachine {
additionalMessages = append(additionalMessages, "* waiting for a control plane Machine to complete deletion")
}

if preflightChecks.ControlPlaneComponentsNotHealthy {
additionalMessages = append(additionalMessages, "* waiting for control plane components to be healthy")
}

if preflightChecks.EtcdClusterNotHealthy {
additionalMessages = append(additionalMessages, "* waiting for etcd cluster to be healthy")
}
return additionalMessages
}

func aggregateStaleMachines(machines collections.Machines) string {
if len(machines) == 0 {
return ""
Expand Down
49 changes: 29 additions & 20 deletions controlplane/kubeadm/internal/controllers/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,11 @@ func Test_setScalingUpCondition(t *testing.T) {
InfraMachineTemplateIsNotFound: true,
},
expectCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
Message: "Scaling up from 3 to 5 replicas is blocked because AWSTemplate does not exist",
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
Message: "Scaling up from 3 to 5 replicas is blocked because:\n" +
"* AWSTemplate does not exist",
},
},
{
Expand All @@ -258,10 +259,13 @@ func Test_setScalingUpCondition(t *testing.T) {
},
},
expectCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
Message: "Scaling up from 3 to 5 replicas; waiting for Machine being deleted; waiting for control plane components to be healthy; waiting for etcd cluster to be healthy",
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
Message: "Scaling up from 3 to 5 replicas is blocked because:\n" +
"* waiting for a control plane Machine to complete deletion\n" +
"* waiting for control plane components to be healthy\n" +
"* waiting for etcd cluster to be healthy",
},
},
}
Expand Down Expand Up @@ -373,10 +377,11 @@ func Test_setScalingDownCondition(t *testing.T) {
),
},
expectCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas; Machine m1 is in deletion since more than 30m",
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas is blocked because:\n" +
"* Machine m1 is in deletion since more than 30m",
},
},
{
Expand All @@ -393,10 +398,11 @@ func Test_setScalingDownCondition(t *testing.T) {
),
},
expectCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas; Machines m1, m2 are in deletion since more than 30m",
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas is blocked because:\n" +
"* Machines m1, m2 are in deletion since more than 30m",
},
},
{
Expand All @@ -418,10 +424,13 @@ func Test_setScalingDownCondition(t *testing.T) {
},
},
expectCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas; waiting for Machine being deleted; waiting for control plane components to be healthy; waiting for etcd cluster to be healthy",
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
Message: "Scaling down from 3 to 1 replicas is blocked because:\n" +
"* waiting for a control plane Machine to complete deletion\n" +
"* waiting for control plane components to be healthy\n" +
"* waiting for etcd cluster to be healthy",
},
},
}
Expand Down
52 changes: 32 additions & 20 deletions internal/controllers/machineset/machineset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ type scope struct {
infrastructureObjectNotFound bool
getAndAdoptMachinesForMachineSetSucceeded bool
owningMachineDeployment *clusterv1.MachineDeployment
scaleUpPreflightCheckErrMessage string
scaleUpPreflightCheckErrMessages []string
reconciliationTime time.Time
}

Expand Down Expand Up @@ -673,15 +673,19 @@ func (r *Reconciler) syncReplicas(ctx context.Context, s *scope) (ctrl.Result, e
}
}

result, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Scale up")
if err != nil || !result.IsZero() {
preflightCheckErrMessages, err := r.runPreflightChecks(ctx, cluster, ms, "Scale up")
if err != nil || len(preflightCheckErrMessages) > 0 {
if err != nil {
// If the error is not nil use that as the message for the condition.
preflightCheckErrMessage = err.Error()
// If err is not nil use that as the preflightCheckErrMessage
preflightCheckErrMessages = append(preflightCheckErrMessages, err.Error())
}
s.scaleUpPreflightCheckErrMessage = preflightCheckErrMessage
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityError, preflightCheckErrMessage)
return result, err

s.scaleUpPreflightCheckErrMessages = preflightCheckErrMessages
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityError, strings.Join(preflightCheckErrMessages, "; "))
if err != nil {
return ctrl.Result{}, err
}
return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, nil
}

var (
Expand Down Expand Up @@ -1418,31 +1422,39 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) (
}

// Run preflight checks.
preflightChecksResult, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Machine remediation")
if err != nil {
// If err is not nil use that as the preflightCheckErrMessage
preflightCheckErrMessage = err.Error()
}
preflightCheckErrMessages, err := r.runPreflightChecks(ctx, cluster, ms, "Machine remediation")
if err != nil || len(preflightCheckErrMessages) > 0 {
if err != nil {
// If err is not nil use that as the preflightCheckErrMessage
preflightCheckErrMessages = append(preflightCheckErrMessages, err.Error())
}

listMessages := make([]string, len(preflightCheckErrMessages))
for i, msg := range preflightCheckErrMessages {
listMessages[i] = fmt.Sprintf("* %s", msg)
}

preflightChecksFailed := err != nil || !preflightChecksResult.IsZero()
if preflightChecksFailed {
// PreflightChecks did not pass. Update the MachineOwnerRemediated condition on the unhealthy Machines with
// WaitingForRemediationReason reason.
if err := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{
if patchErr := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{
Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: clusterv1.MachineSetMachineRemediationDeferredV1Beta2Reason,
Message: preflightCheckErrMessage,
Message: strings.Join(listMessages, "\n"),
}, &clusterv1.Condition{
Type: clusterv1.MachineOwnerRemediatedCondition,
Status: corev1.ConditionFalse,
Reason: clusterv1.WaitingForRemediationReason,
Severity: clusterv1.ConditionSeverityWarning,
Message: preflightCheckErrMessage,
}); err != nil {
Message: strings.Join(preflightCheckErrMessages, "; "),
}); patchErr != nil {
return ctrl.Result{}, kerrors.NewAggregate([]error{patchErr, err})
}

if err != nil {
return ctrl.Result{}, err
}
return preflightChecksResult, nil
return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, nil
}

// PreflightChecks passed, so it is safe to remediate unhealthy machines by deleting them.
Expand Down
19 changes: 11 additions & 8 deletions internal/controllers/machineset/machineset_controller_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package machineset
import (
"context"
"fmt"
"slices"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -48,7 +47,7 @@ func (r *Reconciler) updateStatus(ctx context.Context, s *scope) {
// Conditions

// Update the ScalingUp and ScalingDown condition.
setScalingUpCondition(ctx, s.machineSet, s.machines, s.bootstrapObjectNotFound, s.infrastructureObjectNotFound, s.getAndAdoptMachinesForMachineSetSucceeded, s.scaleUpPreflightCheckErrMessage)
setScalingUpCondition(ctx, s.machineSet, s.machines, s.bootstrapObjectNotFound, s.infrastructureObjectNotFound, s.getAndAdoptMachinesForMachineSetSucceeded, s.scaleUpPreflightCheckErrMessages)
setScalingDownCondition(ctx, s.machineSet, s.machines, s.getAndAdoptMachinesForMachineSetSucceeded)

// MachinesReady condition: aggregate the Machine's Ready condition.
Expand Down Expand Up @@ -93,7 +92,7 @@ func setReplicas(_ context.Context, ms *clusterv1.MachineSet, machines []*cluste
ms.Status.V1Beta2.UpToDateReplicas = ptr.To(upToDateReplicas)
}

func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachinesForMachineSetSucceeded bool, scaleUpPreflightCheckErrMessage string) {
func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachinesForMachineSetSucceeded bool, scaleUpPreflightCheckErrMessages []string) {
// If we got unexpected errors in listing the machines (this should never happen), surface them.
if !getAndAdoptMachinesForMachineSetSucceeded {
v1beta2conditions.Set(ms, metav1.Condition{
Expand Down Expand Up @@ -140,11 +139,15 @@ func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines

// Scaling up.
message := fmt.Sprintf("Scaling up from %d to %d replicas", currentReplicas, desiredReplicas)
if missingReferencesMessage != "" || scaleUpPreflightCheckErrMessage != "" {
blockMessages := slices.DeleteFunc([]string{missingReferencesMessage, scaleUpPreflightCheckErrMessage}, func(s string) bool {
return s == ""
})
message += fmt.Sprintf(" is blocked because %s", strings.Join(blockMessages, " and "))
if missingReferencesMessage != "" || len(scaleUpPreflightCheckErrMessages) > 0 {
listMessages := make([]string, len(scaleUpPreflightCheckErrMessages))
for i, msg := range scaleUpPreflightCheckErrMessages {
listMessages[i] = fmt.Sprintf("* %s", msg)
}
if missingReferencesMessage != "" {
listMessages = append(listMessages, fmt.Sprintf("* %s", missingReferencesMessage))
}
message += fmt.Sprintf(" is blocked because:\n%s", strings.Join(listMessages, "\n"))
}
v1beta2conditions.Set(ms, metav1.Condition{
Type: clusterv1.MachineSetScalingUpV1Beta2Condition,
Expand Down
Loading

0 comments on commit f262b53

Please sign in to comment.