Skip to content

Commit

Permalink
cluster-autoscaler/dead-nodes: (temp) emulate TaintBasedEvictions
Browse files Browse the repository at this point in the history
On our k8s v1.10 (configured without TaintBasedEvictions feature gate),
recent CA versions fails to consider dead nodes (unkown status) as
unschedulable. Likely Reason is newer CA versions relies on the
/unreachable taint (provided by TaintBasedEvictions) for the node to
be considered unschedulable.

Our best, middle term fix will be to enable TaintBasedEvictions feature
gate on 1.10 clusters. This will require a sufficient testing period, so
until then, this (hopefuly) temporary patch should cover the case.
  • Loading branch information
bpineau committed Feb 1, 2021
1 parent 33a161b commit 8d1473a
Showing 1 changed file with 36 additions and 2 deletions.
38 changes: 36 additions & 2 deletions cluster-autoscaler/core/filter_out_schedulable.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/core/utils"
"k8s.io/autoscaler/cluster-autoscaler/metrics"
"k8s.io/autoscaler/cluster-autoscaler/simulator"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"

apiv1 "k8s.io/api/core/v1"
klog "k8s.io/klog/v2"
Expand Down Expand Up @@ -143,7 +144,8 @@ func (p *filterOutSchedulablePodListProcessor) filterOutSchedulableByPacking(
for _, pod := range unschedulableCandidates {
scheduledOnHintedNode := false
if hintedNodeName, hintFound := p.schedulablePodsNodeHints[pod.UID]; hintFound {
if predicateChecker.CheckPredicates(clusterSnapshot, pod, hintedNodeName) == nil {
nodeInfo, _ := clusterSnapshot.NodeInfos().Get(hintedNodeName)
if predicateChecker.CheckPredicates(clusterSnapshot, pod, hintedNodeName) == nil && isLivingNode(nodeInfo) {
// We treat predicate error and missing node error here in the same way
scheduledOnHintedNode = true
podsFilteredUsingHints++
Expand Down Expand Up @@ -184,7 +186,9 @@ func (p *filterOutSchedulablePodListProcessor) filterOutSchedulableByPacking(
unschedulePodsCacheHitCounter++
continue
}
nodeName, err := predicateChecker.FitsAnyNode(clusterSnapshot, pod)
nodeName, err := predicateChecker.FitsAnyNodeMatching(clusterSnapshot, pod, func(nodeInfo *schedulerframework.NodeInfo) bool {
return isLivingNode(nodeInfo)
})
if err == nil {
klog.V(4).Infof("Pod %s.%s marked as unschedulable can be scheduled on node %s. Ignoring"+
" in scale up.", pod.Namespace, pod.Name, nodeName)
Expand All @@ -204,6 +208,36 @@ func (p *filterOutSchedulablePodListProcessor) filterOutSchedulableByPacking(
return unschedulablePods, nil
}

// filter out dead nodes (having "unknown" NodeReady condition for over 10mn), so we can ignore them if hinted.
// Needed for 1.10 clusters, until we set TaintBasedEvictions feature gate to "true" there (already enabled
// by default on clusters using k8s v1.14 and up): TaintBasedEvictions places a node.kubernetes.io/unreachable
// taint on dead nodes, that helps the CA to consider them unschedulable (unless explicitely tolerated).
func isLivingNode(nodeInfo *schedulerframework.NodeInfo) bool {
if nodeInfo == nil {
// we only care about filtering out nodes having "unknown" status.
return true
}

node := nodeInfo.Node()
if node == nil && node.Status.Conditions == nil {
return true
}

for _, cond := range node.Status.Conditions {
if cond.Type != apiv1.NodeReady {
continue
}
if cond.Status != apiv1.ConditionUnknown {
continue
}
if cond.LastTransitionTime.Time.Add(10 * time.Minute).Before(time.Now()) {
return false
}
}

return true
}

func moreImportantPod(pod1, pod2 *apiv1.Pod) bool {
// based on schedulers MoreImportantPod but does not compare Pod.Status.StartTime which does not make sense
// for unschedulable pods
Expand Down

0 comments on commit 8d1473a

Please sign in to comment.