Skip to content

Commit 01a56a8

Browse files
committed
Add GKE-specific NodeGroupSet processor
Also refactor Balancing processor a bit to make it easily extensible.
1 parent 6f5e6aa commit 01a56a8

File tree

7 files changed

+186
-21
lines changed

7 files changed

+186
-21
lines changed

cluster-autoscaler/main.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ import (
3838
"k8s.io/autoscaler/cluster-autoscaler/estimator"
3939
"k8s.io/autoscaler/cluster-autoscaler/expander"
4040
"k8s.io/autoscaler/cluster-autoscaler/metrics"
41+
ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors"
42+
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset"
4143
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
4244
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
4345
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
@@ -256,9 +258,16 @@ func buildAutoscaler() (core.Autoscaler, error) {
256258
// Create basic config from flags.
257259
autoscalingOptions := createAutoscalingOptions()
258260
kubeClient := createKubeClient(getKubeConfig())
261+
processors := ca_processors.DefaultProcessors()
262+
if autoscalingOptions.CloudProviderName == "gke" {
263+
processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
264+
Comparator: nodegroupset.IsGkeNodeInfoSimilar}
265+
266+
}
259267
opts := core.AutoscalerOptions{
260268
AutoscalingOptions: autoscalingOptions,
261269
KubeClient: kubeClient,
270+
Processors: processors,
262271
}
263272

264273
// This metric should be published only once.

cluster-autoscaler/processors/nodegroupset/balancing_processor.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929

3030
// BalancingNodeGroupSetProcessor tries to keep similar node groups balanced on scale-up.
3131
type BalancingNodeGroupSetProcessor struct {
32+
Comparator NodeInfoComparator
3233
}
3334

3435
// FindSimilarNodeGroups returns a list of NodeGroups similar to the given one.
@@ -55,7 +56,11 @@ func (b *BalancingNodeGroupSetProcessor) FindSimilarNodeGroups(context *context.
5556
glog.Warningf("Failed to find nodeInfo for group %v", ngId)
5657
continue
5758
}
58-
if IsNodeInfoSimilar(nodeInfo, ngNodeInfo) {
59+
comparator := b.Comparator
60+
if comparator == nil {
61+
comparator = IsNodeInfoSimilar
62+
}
63+
if comparator(nodeInfo, ngNodeInfo) {
5964
result = append(result, ng)
6065
}
6166
}

cluster-autoscaler/processors/nodegroupset/balancing_processor_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ import (
2828
"github.com/stretchr/testify/assert"
2929
)
3030

31-
func TestFindSimilarNodeGroups(t *testing.T) {
32-
processor := &BalancingNodeGroupSetProcessor{}
31+
func basicSimilarNodeGroupsTest(t *testing.T, processor NodeGroupSetProcessor) {
3332
context := &context.AutoscalingContext{}
3433

3534
n1 := BuildTestNode("n1", 1000, 1000)
@@ -72,6 +71,11 @@ func TestFindSimilarNodeGroups(t *testing.T) {
7271
assert.Equal(t, similar, []cloudprovider.NodeGroup{})
7372
}
7473

74+
func TestFindSimilarNodeGroups(t *testing.T) {
75+
processor := &BalancingNodeGroupSetProcessor{}
76+
basicSimilarNodeGroupsTest(t, processor)
77+
}
78+
7579
func TestBalanceSingleGroup(t *testing.T) {
7680
processor := &BalancingNodeGroupSetProcessor{}
7781
context := &context.AutoscalingContext{}

cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ const (
3434
MaxFreeDifferenceRatio = 0.05
3535
)
3636

37+
// NodeInfoComparator is a function that tells if two nodes are from NodeGroups
38+
// similar enough to be considered a part of a single NodeGroupSet.
39+
type NodeInfoComparator func(n1, n2 *schedulercache.NodeInfo) bool
40+
3741
func compareResourceMapsWithTolerance(resources map[apiv1.ResourceName][]resource.Quantity,
3842
maxDifferenceRatio float64) bool {
3943
for _, qtyList := range resources {

cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,22 @@ import (
2929
"github.com/stretchr/testify/assert"
3030
)
3131

32-
func checkNodesSimilar(t *testing.T, n1, n2 *apiv1.Node, shouldEqual bool) {
33-
checkNodesSimilarWithPods(t, n1, n2, []*apiv1.Pod{}, []*apiv1.Pod{}, shouldEqual)
32+
func checkNodesSimilar(t *testing.T, n1, n2 *apiv1.Node, comparator NodeInfoComparator, shouldEqual bool) {
33+
checkNodesSimilarWithPods(t, n1, n2, []*apiv1.Pod{}, []*apiv1.Pod{}, comparator, shouldEqual)
3434
}
3535

36-
func checkNodesSimilarWithPods(t *testing.T, n1, n2 *apiv1.Node, pods1, pods2 []*apiv1.Pod, shouldEqual bool) {
36+
func checkNodesSimilarWithPods(t *testing.T, n1, n2 *apiv1.Node, pods1, pods2 []*apiv1.Pod, comparator NodeInfoComparator, shouldEqual bool) {
3737
ni1 := schedulercache.NewNodeInfo(pods1...)
3838
ni1.SetNode(n1)
3939
ni2 := schedulercache.NewNodeInfo(pods2...)
4040
ni2.SetNode(n2)
41-
assert.Equal(t, shouldEqual, IsNodeInfoSimilar(ni1, ni2))
41+
assert.Equal(t, shouldEqual, comparator(ni1, ni2))
4242
}
4343

4444
func TestIdenticalNodesSimilar(t *testing.T) {
4545
n1 := BuildTestNode("node1", 1000, 2000)
4646
n2 := BuildTestNode("node2", 1000, 2000)
47-
checkNodesSimilar(t, n1, n2, true)
47+
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)
4848
}
4949

5050
func TestNodesSimilarVariousRequirements(t *testing.T) {
@@ -53,23 +53,23 @@ func TestNodesSimilarVariousRequirements(t *testing.T) {
5353
// Different CPU capacity
5454
n2 := BuildTestNode("node2", 1000, 2000)
5555
n2.Status.Capacity[apiv1.ResourceCPU] = *resource.NewMilliQuantity(1001, resource.DecimalSI)
56-
checkNodesSimilar(t, n1, n2, false)
56+
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, false)
5757

5858
// Same CPU capacity, but slightly different allocatable
5959
n3 := BuildTestNode("node3", 1000, 2000)
6060
n3.Status.Allocatable[apiv1.ResourceCPU] = *resource.NewMilliQuantity(999, resource.DecimalSI)
61-
checkNodesSimilar(t, n1, n3, true)
61+
checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, true)
6262

6363
// Same CPU capacity, significantly different allocatable
6464
n4 := BuildTestNode("node4", 1000, 2000)
6565
n4.Status.Allocatable[apiv1.ResourceCPU] = *resource.NewMilliQuantity(500, resource.DecimalSI)
66-
checkNodesSimilar(t, n1, n4, false)
66+
checkNodesSimilar(t, n1, n4, IsNodeInfoSimilar, false)
6767

6868
// One with GPU, one without
6969
n5 := BuildTestNode("node5", 1000, 2000)
7070
n5.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(1, resource.DecimalSI)
7171
n5.Status.Allocatable[gpu.ResourceNvidiaGPU] = n5.Status.Capacity[gpu.ResourceNvidiaGPU]
72-
checkNodesSimilar(t, n1, n5, false)
72+
checkNodesSimilar(t, n1, n5, IsNodeInfoSimilar, false)
7373
}
7474

7575
func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) {
@@ -81,20 +81,20 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) {
8181
n2 := BuildTestNode("node2", 1000, 2000)
8282
n2.Status.Allocatable[apiv1.ResourceCPU] = *resource.NewMilliQuantity(500, resource.DecimalSI)
8383
n2.Status.Allocatable[apiv1.ResourceMemory] = *resource.NewQuantity(1000, resource.DecimalSI)
84-
checkNodesSimilarWithPods(t, n1, n2, []*apiv1.Pod{p1}, []*apiv1.Pod{}, false)
84+
checkNodesSimilarWithPods(t, n1, n2, []*apiv1.Pod{p1}, []*apiv1.Pod{}, IsNodeInfoSimilar, false)
8585

8686
// Same requests of pods
8787
n3 := BuildTestNode("node3", 1000, 2000)
8888
p3 := BuildTestPod("pod3", 500, 1000)
8989
p3.Spec.NodeName = "node3"
90-
checkNodesSimilarWithPods(t, n1, n3, []*apiv1.Pod{p1}, []*apiv1.Pod{p3}, true)
90+
checkNodesSimilarWithPods(t, n1, n3, []*apiv1.Pod{p1}, []*apiv1.Pod{p3}, IsNodeInfoSimilar, true)
9191

9292
// Similar allocatable, similar pods
9393
n4 := BuildTestNode("node4", 1000, 2000)
9494
n4.Status.Allocatable[apiv1.ResourceCPU] = *resource.NewMilliQuantity(999, resource.DecimalSI)
9595
p4 := BuildTestPod("pod4", 501, 1001)
9696
p4.Spec.NodeName = "node4"
97-
checkNodesSimilarWithPods(t, n1, n4, []*apiv1.Pod{p1}, []*apiv1.Pod{p4}, true)
97+
checkNodesSimilarWithPods(t, n1, n4, []*apiv1.Pod{p1}, []*apiv1.Pod{p4}, IsNodeInfoSimilar, true)
9898
}
9999

100100
func TestNodesSimilarVariousLabels(t *testing.T) {
@@ -106,27 +106,27 @@ func TestNodesSimilarVariousLabels(t *testing.T) {
106106
n2.ObjectMeta.Labels["test-label"] = "test-value"
107107

108108
// Missing character label
109-
checkNodesSimilar(t, n1, n2, false)
109+
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, false)
110110

111111
n2.ObjectMeta.Labels["character"] = "winnie the pooh"
112-
checkNodesSimilar(t, n1, n2, true)
112+
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)
113113

114114
// Different hostname labels shouldn't matter
115115
n1.ObjectMeta.Labels[kubeletapis.LabelHostname] = "node1"
116116
n2.ObjectMeta.Labels[kubeletapis.LabelHostname] = "node2"
117-
checkNodesSimilar(t, n1, n2, true)
117+
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)
118118

119119
// Different zone shouldn't matter either
120120
n1.ObjectMeta.Labels[kubeletapis.LabelZoneFailureDomain] = "mars-olympus-mons1-b"
121121
n2.ObjectMeta.Labels[kubeletapis.LabelZoneFailureDomain] = "us-houston1-a"
122-
checkNodesSimilar(t, n1, n2, true)
122+
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)
123123

124124
// Different beta.kubernetes.io/fluentd-ds-ready should not matter
125125
n1.ObjectMeta.Labels["beta.kubernetes.io/fluentd-ds-ready"] = "true"
126126
n2.ObjectMeta.Labels["beta.kubernetes.io/fluentd-ds-ready"] = "false"
127-
checkNodesSimilar(t, n1, n2, true)
127+
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)
128128

129129
n1.ObjectMeta.Labels["beta.kubernetes.io/fluentd-ds-ready"] = "true"
130130
delete(n2.ObjectMeta.Labels, "beta.kubernetes.io/fluentd-ds-ready")
131-
checkNodesSimilar(t, n1, n2, true)
131+
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)
132132
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
Copyright 2018 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package nodegroupset
18+
19+
import (
20+
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
21+
)
22+
23+
// GkeNodepoolLabel is a label specifying GKE node pool particular node belongs to.
24+
const GkeNodepoolLabel = "cloud.google.com/gke-nodepool"
25+
26+
func nodesFromSameGkeNodePool(n1, n2 *schedulercache.NodeInfo) bool {
27+
n1GkeNodePool := n1.Node().Labels[GkeNodepoolLabel]
28+
n2GkeNodePool := n2.Node().Labels[GkeNodepoolLabel]
29+
return n1GkeNodePool != "" && n1GkeNodePool == n2GkeNodePool
30+
}
31+
32+
// IsGkeNodeInfoSimilar compares if two nodes should be considered part of the
33+
// same NodeGroupSet. This is true if they either belong to the same GKE nodepool
34+
// or match usual conditions checked by IsNodeInfoSimilar.
35+
func IsGkeNodeInfoSimilar(n1, n2 *schedulercache.NodeInfo) bool {
36+
if nodesFromSameGkeNodePool(n1, n2) {
37+
return true
38+
}
39+
return IsNodeInfoSimilar(n1, n2)
40+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
Copyright 2018 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package nodegroupset
18+
19+
import (
20+
"testing"
21+
22+
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
23+
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
24+
"k8s.io/autoscaler/cluster-autoscaler/context"
25+
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
26+
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
27+
28+
"github.com/stretchr/testify/assert"
29+
)
30+
31+
func TestIsGkeNodeInfoSimilar(t *testing.T) {
32+
n1 := BuildTestNode("node1", 1000, 2000)
33+
n1.ObjectMeta.Labels["test-label"] = "test-value"
34+
n1.ObjectMeta.Labels["character"] = "winnie the pooh"
35+
n2 := BuildTestNode("node2", 1000, 2000)
36+
n2.ObjectMeta.Labels["test-label"] = "test-value"
37+
// No node-pool labels.
38+
checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, false)
39+
// Empty node-pool labels
40+
n1.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = ""
41+
n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = ""
42+
checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, false)
43+
// Only one non empty
44+
n1.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = ""
45+
n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah"
46+
checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, false)
47+
// Only one present
48+
delete(n1.ObjectMeta.Labels, "cloud.google.com/gke-nodepool")
49+
n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah"
50+
checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, false)
51+
// Different vales
52+
n1.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah1"
53+
n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah2"
54+
checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, false)
55+
// Same values
56+
n1.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah"
57+
n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah"
58+
checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, true)
59+
}
60+
61+
func TestFindSimilarNodeGroupsGkeBasic(t *testing.T) {
62+
processor := &BalancingNodeGroupSetProcessor{Comparator: IsGkeNodeInfoSimilar}
63+
basicSimilarNodeGroupsTest(t, processor)
64+
}
65+
66+
func TestFindSimilarNodeGroupsGkeByLabel(t *testing.T) {
67+
processor := &BalancingNodeGroupSetProcessor{Comparator: IsGkeNodeInfoSimilar}
68+
context := &context.AutoscalingContext{}
69+
70+
n1 := BuildTestNode("n1", 1000, 1000)
71+
n2 := BuildTestNode("n2", 2000, 2000)
72+
73+
provider := testprovider.NewTestCloudProvider(nil, nil)
74+
provider.AddNodeGroup("ng1", 1, 10, 1)
75+
provider.AddNodeGroup("ng2", 1, 10, 1)
76+
provider.AddNode("ng1", n1)
77+
provider.AddNode("ng2", n2)
78+
79+
ni1 := schedulercache.NewNodeInfo()
80+
ni1.SetNode(n1)
81+
ni2 := schedulercache.NewNodeInfo()
82+
ni2.SetNode(n2)
83+
84+
nodeInfosForGroups := map[string]*schedulercache.NodeInfo{
85+
"ng1": ni1, "ng2": ni2,
86+
}
87+
88+
ng1, _ := provider.NodeGroupForNode(n1)
89+
ng2, _ := provider.NodeGroupForNode(n2)
90+
context.CloudProvider = provider
91+
92+
// Groups with different cpu and mem are not similar
93+
similar, err := processor.FindSimilarNodeGroups(context, ng1, nodeInfosForGroups)
94+
assert.NoError(t, err)
95+
assert.Equal(t, similar, []cloudprovider.NodeGroup{})
96+
97+
// Unless we give them nodepool label
98+
n1.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah"
99+
n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah"
100+
similar, err = processor.FindSimilarNodeGroups(context, ng1, nodeInfosForGroups)
101+
assert.NoError(t, err)
102+
assert.Equal(t, similar, []cloudprovider.NodeGroup{ng2})
103+
}

0 commit comments

Comments
 (0)