Skip to content

Commit

Permalink
Set different hostname label for upcoming nodes
Browse files Browse the repository at this point in the history
Function copying template node to use for upcoming nodes was
not chaning hostname label, meaning that features relying on
this label (ex. pod antiaffinity on hostname topology) would
treat all upcoming nodes as a single node.
This resulted in triggering too many scale-ups for pods
using such features. Analogous function in binpacking didn't
have the same bug (but it didn't set unique UID or pod names).
I extracted the functionality to a util function used in both
places to avoid the two functions getting out of sync again.
  • Loading branch information
MaciekPytel authored and bpineau committed Mar 3, 2021
1 parent 0166a3d commit 68c4689
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 31 deletions.
19 changes: 2 additions & 17 deletions cluster-autoscaler/core/static_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (

apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/uuid"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
Expand All @@ -41,6 +40,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/utils/backoff"
"k8s.io/autoscaler/cluster-autoscaler/utils/deletetaint"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
scheduler_utils "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
"k8s.io/autoscaler/cluster-autoscaler/utils/tpu"
"k8s.io/autoscaler/cluster-autoscaler/utils/volume"
Expand Down Expand Up @@ -802,21 +802,6 @@ func allPodsAreNew(pods []*apiv1.Pod, currentTime time.Time) bool {
return found && oldest.Add(unschedulablePodWithGpuTimeBuffer).After(currentTime)
}

func deepCopyNodeInfo(nodeTemplate *schedulerframework.NodeInfo, index int) *schedulerframework.NodeInfo {
node := nodeTemplate.Node().DeepCopy()
node.Name = fmt.Sprintf("%s-%d", node.Name, index)
node.UID = uuid.NewUUID()
nodeInfo := schedulerframework.NewNodeInfo()
nodeInfo.SetNode(node)
for _, podInfo := range nodeTemplate.Pods {
pod := podInfo.Pod.DeepCopy()
pod.Name = fmt.Sprintf("%s-%d", podInfo.Pod.Name, index)
pod.UID = uuid.NewUUID()
nodeInfo.AddPod(pod)
}
return nodeInfo
}

func getUpcomingNodeInfos(registry *clusterstate.ClusterStateRegistry, nodeInfos map[string]*schedulerframework.NodeInfo) []*schedulerframework.NodeInfo {
upcomingNodes := make([]*schedulerframework.NodeInfo, 0)
for nodeGroup, numberOfNodes := range registry.GetUpcomingNodes() {
Expand All @@ -835,7 +820,7 @@ func getUpcomingNodeInfos(registry *clusterstate.ClusterStateRegistry, nodeInfos
// Ensure new nodes have different names because nodeName
// will be used as a map key. Also deep copy pods (daemonsets &
// any pods added by cloud provider on template).
upcomingNodes = append(upcomingNodes, deepCopyNodeInfo(nodeTemplate, i))
upcomingNodes = append(upcomingNodes, scheduler_utils.DeepCopyTemplateNode(nodeTemplate, i))
}
}
return upcomingNodes
Expand Down
20 changes: 6 additions & 14 deletions cluster-autoscaler/estimator/binpacking_estimator.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@ limitations under the License.
package estimator

import (
"fmt"
"sort"
"time"

apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/autoscaler/cluster-autoscaler/simulator"
"k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
klog "k8s.io/klog/v2"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)
Expand Down Expand Up @@ -75,7 +74,6 @@ func (estimator *BinpackingNodeEstimator) Estimate(
}
}()

newNodeNameTimestamp := time.Now()
newNodeNameIndex := 0

for _, podInfo := range podInfos {
Expand All @@ -94,7 +92,7 @@ func (estimator *BinpackingNodeEstimator) Estimate(

if !found {
// Add new node
newNodeName, err := estimator.addNewNodeToSnapshot(nodeTemplate, newNodeNameTimestamp, newNodeNameIndex)
newNodeName, err := estimator.addNewNodeToSnapshot(nodeTemplate, newNodeNameIndex)
if err != nil {
klog.Errorf("Error while adding new node for template to ClusterSnapshot; %v", err)
return 0
Expand All @@ -113,23 +111,17 @@ func (estimator *BinpackingNodeEstimator) Estimate(

func (estimator *BinpackingNodeEstimator) addNewNodeToSnapshot(
template *schedulerframework.NodeInfo,
nameTimestamp time.Time,
nameIndex int) (string, error) {

newNode := template.Node().DeepCopy()
newNode.Name = fmt.Sprintf("%s-%d-%d", newNode.Name, nameTimestamp.Unix(), nameIndex)
if newNode.Labels == nil {
newNode.Labels = make(map[string]string)
}
newNode.Labels["kubernetes.io/hostname"] = newNode.Name
newNodeInfo := scheduler.DeepCopyTemplateNode(template, nameIndex)
var pods []*apiv1.Pod
for _, podInfo := range template.Pods {
for _, podInfo := range newNodeInfo.Pods {
pods = append(pods, podInfo.Pod)
}
if err := estimator.clusterSnapshot.AddNodeWithPods(newNode, pods); err != nil {
if err := estimator.clusterSnapshot.AddNodeWithPods(newNodeInfo.Node(), pods); err != nil {
return "", err
}
return newNode.Name, nil
return newNodeInfo.Node().Name, nil
}

// Calculates score for all pods and returns podInfo structure.
Expand Down
25 changes: 25 additions & 0 deletions cluster-autoscaler/utils/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ limitations under the License.
package scheduler

import (
"fmt"

apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/uuid"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)

Expand Down Expand Up @@ -57,3 +60,25 @@ func CreateNodeNameToInfoMap(pods []*apiv1.Pod, nodes []*apiv1.Node) map[string]

return nodeNameToNodeInfo
}

// DeepCopyTemplateNode copies NodeInfo object used as a template. It changes
// names of UIDs of both node and pods running on it, so that copies can be used
// to represent multiple nodes.
func DeepCopyTemplateNode(nodeTemplate *schedulerframework.NodeInfo, index int) *schedulerframework.NodeInfo {
node := nodeTemplate.Node().DeepCopy()
node.Name = fmt.Sprintf("%s-%d", node.Name, index)
node.UID = uuid.NewUUID()
if node.Labels == nil {
node.Labels = make(map[string]string)
}
node.Labels["kubernetes.io/hostname"] = node.Name
nodeInfo := schedulerframework.NewNodeInfo()
nodeInfo.SetNode(node)
for _, podInfo := range nodeTemplate.Pods {
pod := podInfo.Pod.DeepCopy()
pod.Name = fmt.Sprintf("%s-%d", podInfo.Pod.Name, index)
pod.UID = uuid.NewUUID()
nodeInfo.AddPod(pod)
}
return nodeInfo
}

0 comments on commit 68c4689

Please sign in to comment.