diff --git a/cluster-autoscaler/cloudprovider/azure/README.md b/cluster-autoscaler/cloudprovider/azure/README.md index e8f2c9b14b3e..abf9c9635ff1 100644 --- a/cluster-autoscaler/cloudprovider/azure/README.md +++ b/cluster-autoscaler/cloudprovider/azure/README.md @@ -68,6 +68,14 @@ or to autoscale multiple VM scale sets: - --nodes=1:10:k8s-nodepool-2-vmss ``` +To allow scaling similar node pools simultaneously, or when using separate node groups per zone and to keep nodes balanced across zones, use the `--balance-similar-node-groups` flag. Add it to the `command` section to enable it: + +```yaml + - --balance-similar-node-groups=true +``` + +See the [FAQ](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#im-running-cluster-with-nodes-in-multiple-zones-for-ha-purposes-is-that-supported-by-cluster-autoscaler) for more details. + Save the updated deployment manifest, then deploy cluster-autoscaler by running: ```sh diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index d0854ff6d207..51695be496d8 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -30,6 +30,7 @@ import ( "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/azure" cloudBuilder "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/builder" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/core" @@ -37,6 +38,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/expander" "k8s.io/autoscaler/cluster-autoscaler/metrics" ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" "k8s.io/autoscaler/cluster-autoscaler/utils/units" @@ -285,6 +287,10 @@ func buildAutoscaler() (core.Autoscaler, error) { processors := ca_processors.DefaultProcessors() processors.PodListProcessor = core.NewFilterOutSchedulablePodListProcessor() + if autoscalingOptions.CloudProviderName == azure.ProviderName { + processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{ + Comparator: nodegroupset.IsAzureNodeInfoSimilar} + } opts := core.AutoscalerOptions{ AutoscalingOptions: autoscalingOptions, diff --git a/cluster-autoscaler/processors/nodegroupset/azure_compare_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/azure_compare_nodegroups.go new file mode 100644 index 000000000000..5a2fe88db166 --- /dev/null +++ b/cluster-autoscaler/processors/nodegroupset/azure_compare_nodegroups.go @@ -0,0 +1,45 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodegroupset + +import ( + schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" +) + +// AzureNodepoolLabel is a label specifying which Azure node pool a particular node belongs to. +const AzureNodepoolLabel = "agentpool" + +func nodesFromSameAzureNodePool(n1, n2 *schedulernodeinfo.NodeInfo) bool { + n1AzureNodePool := n1.Node().Labels[AzureNodepoolLabel] + n2AzureNodePool := n2.Node().Labels[AzureNodepoolLabel] + return n1AzureNodePool != "" && n1AzureNodePool == n2AzureNodePool +} + +// IsAzureNodeInfoSimilar compares if two nodes should be considered part of the +// same NodeGroupSet. This is true if they either belong to the same Azure agentpool +// or match usual conditions checked by IsNodeInfoSimilar, even if they have different agentpool labels. +func IsAzureNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool { + if nodesFromSameAzureNodePool(n1, n2) { + return true + } + azureIgnoredLabels := make(map[string]bool) + for k, v := range ignoredLabels { + azureIgnoredLabels[k] = v + } + azureIgnoredLabels[AzureNodepoolLabel] = true + return IsNodeInfoSimilarIgnoreLabels(n1, n2, azureIgnoredLabels) +} diff --git a/cluster-autoscaler/processors/nodegroupset/azure_compare_nodegroups_test.go b/cluster-autoscaler/processors/nodegroupset/azure_compare_nodegroups_test.go new file mode 100644 index 000000000000..bb7a24ede8bf --- /dev/null +++ b/cluster-autoscaler/processors/nodegroupset/azure_compare_nodegroups_test.go @@ -0,0 +1,125 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodegroupset + +import ( + "testing" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" + "k8s.io/autoscaler/cluster-autoscaler/context" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" + + "github.com/stretchr/testify/assert" +) + +func TestIsAzureNodeInfoSimilar(t *testing.T) { + n1 := BuildTestNode("node1", 1000, 2000) + n1.ObjectMeta.Labels["test-label"] = "test-value" + n1.ObjectMeta.Labels["character"] = "thing" + n2 := BuildTestNode("node2", 1000, 2000) + n2.ObjectMeta.Labels["test-label"] = "test-value" + // No node-pool labels. + checkNodesSimilar(t, n1, n2, IsAzureNodeInfoSimilar, false) + // Empty agentpool labels + n1.ObjectMeta.Labels["agentpool"] = "" + n2.ObjectMeta.Labels["agentpool"] = "" + checkNodesSimilar(t, n1, n2, IsAzureNodeInfoSimilar, false) + // Only one non empty + n1.ObjectMeta.Labels["agentpool"] = "" + n2.ObjectMeta.Labels["agentpool"] = "foo" + checkNodesSimilar(t, n1, n2, IsAzureNodeInfoSimilar, false) + // Only one present + delete(n1.ObjectMeta.Labels, "agentpool") + n2.ObjectMeta.Labels["agentpool"] = "foo" + checkNodesSimilar(t, n1, n2, IsAzureNodeInfoSimilar, false) + // Different vales + n1.ObjectMeta.Labels["agentpool"] = "foo1" + n2.ObjectMeta.Labels["agentpool"] = "foo2" + checkNodesSimilar(t, n1, n2, IsAzureNodeInfoSimilar, false) + // Same values + n1.ObjectMeta.Labels["agentpool"] = "foo" + n2.ObjectMeta.Labels["agentpool"] = "foo" + checkNodesSimilar(t, n1, n2, IsAzureNodeInfoSimilar, true) + // Same labels except for agentpool + delete(n1.ObjectMeta.Labels, "character") + n1.ObjectMeta.Labels["agentpool"] = "foo" + n2.ObjectMeta.Labels["agentpool"] = "bar" + checkNodesSimilar(t, n1, n2, IsAzureNodeInfoSimilar, true) +} + +func TestFindSimilarNodeGroupsAzureBasic(t *testing.T) { + processor := &BalancingNodeGroupSetProcessor{Comparator: IsAzureNodeInfoSimilar} + basicSimilarNodeGroupsTest(t, processor) +} + +func TestFindSimilarNodeGroupsAzureByLabel(t *testing.T) { + processor := &BalancingNodeGroupSetProcessor{Comparator: IsAzureNodeInfoSimilar} + context := &context.AutoscalingContext{} + + n1 := BuildTestNode("n1", 1000, 1000) + n2 := BuildTestNode("n2", 2000, 2000) + + provider := testprovider.NewTestCloudProvider(nil, nil) + provider.AddNodeGroup("ng1", 1, 10, 1) + provider.AddNodeGroup("ng2", 1, 10, 1) + provider.AddNode("ng1", n1) + provider.AddNode("ng2", n2) + + ni1 := schedulernodeinfo.NewNodeInfo() + ni1.SetNode(n1) + ni2 := schedulernodeinfo.NewNodeInfo() + ni2.SetNode(n2) + + nodeInfosForGroups := map[string]*schedulernodeinfo.NodeInfo{ + "ng1": ni1, "ng2": ni2, + } + + ng1, _ := provider.NodeGroupForNode(n1) + ng2, _ := provider.NodeGroupForNode(n2) + context.CloudProvider = provider + + // Groups with different cpu and mem are not similar. + similar, err := processor.FindSimilarNodeGroups(context, ng1, nodeInfosForGroups) + assert.NoError(t, err) + assert.Equal(t, similar, []cloudprovider.NodeGroup{}) + + // Unless we give them nodepool label. + n1.ObjectMeta.Labels["agentpool"] = "foobar" + n2.ObjectMeta.Labels["agentpool"] = "foobar" + similar, err = processor.FindSimilarNodeGroups(context, ng1, nodeInfosForGroups) + assert.NoError(t, err) + assert.Equal(t, similar, []cloudprovider.NodeGroup{ng2}) + + // Groups with the same cpu and mem are similar if they belong to different pools. + n3 := BuildTestNode("n1", 1000, 1000) + provider.AddNodeGroup("ng3", 1, 10, 1) + provider.AddNode("ng3", n3) + ni3 := schedulernodeinfo.NewNodeInfo() + ni3.SetNode(n3) + nodeInfosForGroups["ng3"] = ni3 + ng3, _ := provider.NodeGroupForNode(n3) + + n1.ObjectMeta.Labels["agentpool"] = "foobar1" + n2.ObjectMeta.Labels["agentpool"] = "foobar2" + n3.ObjectMeta.Labels["agentpool"] = "foobar3" + + similar, err = processor.FindSimilarNodeGroups(context, ng1, nodeInfosForGroups) + assert.NoError(t, err) + assert.Equal(t, similar, []cloudprovider.NodeGroup{ng3}) +} diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go index cf6c662ce9c2..0c016eb7c615 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go @@ -33,6 +33,13 @@ const ( MaxFreeDifferenceRatio = 0.05 ) +var ignoredLabels = map[string]bool{ + apiv1.LabelHostname: true, + apiv1.LabelZoneFailureDomain: true, + apiv1.LabelZoneRegion: true, + "beta.kubernetes.io/fluentd-ds-ready": true, // this is internal label used for determining if fluentd should be installed as deamon set. Used for migration 1.8 to 1.9. +} + // NodeInfoComparator is a function that tells if two nodes are from NodeGroups // similar enough to be considered a part of a single NodeGroupSet. type NodeInfoComparator func(n1, n2 *schedulernodeinfo.NodeInfo) bool @@ -52,12 +59,35 @@ func compareResourceMapsWithTolerance(resources map[apiv1.ResourceName][]resourc return true } +func compareLabels(nodes []*schedulernodeinfo.NodeInfo, ignoredLabels map[string]bool) bool { + labels := make(map[string][]string) + for _, node := range nodes { + for label, value := range node.Node().ObjectMeta.Labels { + ignore, _ := ignoredLabels[label] + if !ignore { + labels[label] = append(labels[label], value) + } + } + } + for _, labelValues := range labels { + if len(labelValues) != 2 || labelValues[0] != labelValues[1] { + return false + } + } + return true +} + // IsNodeInfoSimilar returns true if two NodeInfos are similar enough to consider // that the NodeGroups they come from are part of the same NodeGroupSet. The criteria are // somewhat arbitrary, but generally we check if resources provided by both nodes // are similar enough to likely be the same type of machine and if the set of labels // is the same (except for a pre-defined set of labels like hostname or zone). func IsNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool { + return IsNodeInfoSimilarIgnoreLabels(n1, n2, ignoredLabels) +} + +// IsNodeInfoSimilarIgnoreLabels returns true if two NodeInfos are similar while ignoring the set of labels provided. +func IsNodeInfoSimilarIgnoreLabels(n1, n2 *schedulernodeinfo.NodeInfo, ignoredLabels map[string]bool) bool { capacity := make(map[apiv1.ResourceName][]resource.Quantity) allocatable := make(map[apiv1.ResourceName][]resource.Quantity) free := make(map[apiv1.ResourceName][]resource.Quantity) @@ -91,27 +121,9 @@ func IsNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool { if !compareResourceMapsWithTolerance(free, MaxFreeDifferenceRatio) { return false } - - ignoredLabels := map[string]bool{ - apiv1.LabelHostname: true, - apiv1.LabelZoneFailureDomain: true, - apiv1.LabelZoneRegion: true, - "beta.kubernetes.io/fluentd-ds-ready": true, // this is internal label used for determining if fluentd should be installed as deamon set. Used for migration 1.8 to 1.9. + if !compareLabels(nodes, ignoredLabels) { + return false } - labels := make(map[string][]string) - for _, node := range nodes { - for label, value := range node.Node().ObjectMeta.Labels { - ignore, _ := ignoredLabels[label] - if !ignore { - labels[label] = append(labels[label], value) - } - } - } - for _, labelValues := range labels { - if len(labelValues) != 2 || labelValues[0] != labelValues[1] { - return false - } - } return true }