Skip to content
6 changes: 3 additions & 3 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,10 @@ type AutoscalingOptions struct {
MaxBulkSoftTaintTime time.Duration
// MaxPodEvictionTime sets the maximum time CA tries to evict a pod before giving up.
MaxPodEvictionTime time.Duration
// IgnoredTaints is a list of taints CA considers to reflect transient node
// StartupTaints is a list of taints CA considers to reflect transient node
// status that should be removed when creating a node template for scheduling.
// The ignored taints are expected to appear during node startup.
IgnoredTaints []string
// startup taints are expected to appear during node startup.
StartupTaints []string
// StatusTaints is a list of taints CA considers to reflect transient node
// status that should be removed when creating a node template for scheduling.
// The status taints are expected to appear during node lifetime, after startup.
Expand Down
4 changes: 2 additions & 2 deletions cluster-autoscaler/core/scaledown/actuation/actuator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1260,8 +1260,8 @@ func TestStartDeletion(t *testing.T) {
break taintsLoop
}
}
ignoreTaintValue := cmpopts.IgnoreFields(apiv1.Taint{}, "Value")
if diff := cmp.Diff(tc.wantTaintUpdates, gotTaintUpdates, ignoreTaintValue, cmpopts.EquateEmpty()); diff != "" {
startupTaintValue := cmpopts.IgnoreFields(apiv1.Taint{}, "Value")
if diff := cmp.Diff(tc.wantTaintUpdates, gotTaintUpdates, startupTaintValue, cmpopts.EquateEmpty()); diff != "" {
t.Errorf("taintUpdates diff (-want +got):\n%s", diff)
}

Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/core/static_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,7 @@ func (a *StaticAutoscaler) obtainNodeLists(cp cloudprovider.CloudProvider) ([]*a
// our normal handling for booting up nodes deal with this.
// TODO: Remove this call when we handle dynamically provisioned resources.
allNodes, readyNodes = a.processors.CustomResourcesProcessor.FilterOutNodesWithUnreadyResources(a.AutoscalingContext, allNodes, readyNodes)
allNodes, readyNodes = taints.FilterOutNodesWithIgnoredTaints(a.taintConfig.IgnoredTaints, allNodes, readyNodes)
allNodes, readyNodes = taints.FilterOutNodesWithStartupTaints(a.taintConfig, allNodes, readyNodes)
return allNodes, readyNodes, nil
}

Expand Down
7 changes: 5 additions & 2 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ var (
regional = flag.Bool("regional", false, "Cluster is regional.")
newPodScaleUpDelay = flag.Duration("new-pod-scale-up-delay", 0*time.Second, "Pods less than this old will not be considered for scale-up. Can be increased for individual pods through annotation 'cluster-autoscaler.kubernetes.io/pod-scale-up-delay'.")

ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group (Deprecated, use startup-taints instead)")
startupTaintsFlag = multiStringFlag("startup-taint", "Specifies a taint to ignore in node templates when considering to scale a node group (Equivalent to ignore-taint)")
statusTaintsFlag = multiStringFlag("status-taint", "Specifies a taint to ignore in node templates when considering to scale a node group but nodes will not be treated as unready")
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
balancingLabelsFlag = multiStringFlag("balancing-label", "Specifies a label to use for comparing if two node groups are similar, rather than the built in heuristics. Setting this flag disables all other comparison logic, and cannot be combined with --balancing-ignore-label.")
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
Expand Down Expand Up @@ -346,7 +348,8 @@ func createAutoscalingOptions() config.AutoscalingOptions {
ExpendablePodsPriorityCutoff: *expendablePodsPriorityCutoff,
Regional: *regional,
NewPodScaleUpDelay: *newPodScaleUpDelay,
IgnoredTaints: *ignoreTaintsFlag,
StartupTaints: append(*ignoreTaintsFlag, *startupTaintsFlag...),
StatusTaints: *statusTaintsFlag,
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
BalancingLabels: *balancingLabelsFlag,
KubeConfigPath: *kubeConfigFile,
Expand Down
6 changes: 3 additions & 3 deletions cluster-autoscaler/utils/kubernetes/ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ const (
// still upcoming due to a missing resource (e.g. GPU).
ResourceUnready NodeNotReadyReason = "cluster-autoscaler.kubernetes.io/resource-not-ready"

// IgnoreTaint is a fake identifier used internally by Cluster Autoscaler
// StartupNodes is a fake identifier used internally by Cluster Autoscaler
// to indicate nodes that appear Ready in the API, but are treated as
// still upcoming due to applied ignore taint.
IgnoreTaint NodeNotReadyReason = "cluster-autoscaler.kubernetes.io/ignore-taint"
// still upcoming due to applied startup taint.
StartupNodes NodeNotReadyReason = "cluster-autoscaler.kubernetes.io/startup-taint"
)

// IsNodeReadyAndSchedulable returns true if the node is ready and schedulable.
Expand Down
59 changes: 39 additions & 20 deletions cluster-autoscaler/utils/taints/taints.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ const (
// IgnoreTaintPrefix any taint starting with it will be filtered out from autoscaler template node.
IgnoreTaintPrefix = "ignore-taint.cluster-autoscaler.kubernetes.io/"

// StartupTaintPrefix (Same as IgnoreTaintPrefix) any taint starting with it will be filtered out from autoscaler template node.
StartupTaintPrefix = "startup-taint.cluster-autoscaler.kubernetes.io/"

// StatusTaintPrefix any taint starting with it will be filtered out from autoscaler template node but unlike IgnoreTaintPrefix & StartupTaintPrefix it should not be trated as unready.
StatusTaintPrefix = "status-taint.cluster-autoscaler.kubernetes.io/"

gkeNodeTerminationHandlerTaint = "cloud.google.com/impending-node-termination"

// AWS: Indicates that a node has volumes stuck in attaching state and hence it is not fit for scheduling more pods
Expand All @@ -55,16 +61,18 @@ type TaintKeySet map[string]bool

// TaintConfig is a config of taints that require special handling
type TaintConfig struct {
IgnoredTaints TaintKeySet
StatusTaints TaintKeySet
StartupTaints TaintKeySet
StatusTaints TaintKeySet
StartupTaintPrefixes []string
StatusTaintPrefixes []string
}

// NewTaintConfig returns the taint config extracted from options
func NewTaintConfig(opts config.AutoscalingOptions) TaintConfig {
ignoredTaints := make(TaintKeySet)
for _, taintKey := range opts.IgnoredTaints {
klog.V(4).Infof("Ignoring taint %s on all NodeGroups", taintKey)
ignoredTaints[taintKey] = true
startupTaints := make(TaintKeySet)
for _, taintKey := range opts.StartupTaints {
klog.V(4).Infof("Startup taint %s on all NodeGroups", taintKey)
startupTaints[taintKey] = true
}

statusTaints := make(TaintKeySet)
Expand All @@ -74,8 +82,10 @@ func NewTaintConfig(opts config.AutoscalingOptions) TaintConfig {
}

return TaintConfig{
IgnoredTaints: ignoredTaints,
StatusTaints: statusTaints,
StartupTaints: startupTaints,
StatusTaints: statusTaints,
StartupTaintPrefixes: []string{IgnoreTaintPrefix, StartupTaintPrefix},
StatusTaintPrefixes: []string{StatusTaintPrefix},
}
}

Expand Down Expand Up @@ -323,6 +333,15 @@ func CleanAllTaints(nodes []*apiv1.Node, client kube_client.Interface, recorder
}
}

func matchesAnyPrefix(prefixes []string, key string) bool {
for _, prefix := range prefixes {
if strings.HasPrefix(key, prefix) {
return true
}
}
return false
}

// SanitizeTaints returns filtered taints
func SanitizeTaints(taints []apiv1.Taint, taintConfig TaintConfig) []apiv1.Taint {
var newTaints []apiv1.Taint
Expand All @@ -342,12 +361,12 @@ func SanitizeTaints(taints []apiv1.Taint, taintConfig TaintConfig) []apiv1.Taint
continue
}

if _, exists := taintConfig.IgnoredTaints[taint.Key]; exists {
klog.V(4).Infof("Removing ignored taint %s, when creating template from node", taint.Key)
if _, exists := taintConfig.StartupTaints[taint.Key]; exists {
klog.V(4).Infof("Removing startup taint %s, when creating template from node", taint.Key)
continue
}

if strings.HasPrefix(taint.Key, IgnoreTaintPrefix) {
shouldRemoveBasedOnPrefix := matchesAnyPrefix(taintConfig.StartupTaintPrefixes, taint.Key) || matchesAnyPrefix(taintConfig.StatusTaintPrefixes, taint.Key)
if shouldRemoveBasedOnPrefix {
klog.V(4).Infof("Removing taint %s based on prefix, when creation template from node", taint.Key)
continue
}
Expand All @@ -362,24 +381,24 @@ func SanitizeTaints(taints []apiv1.Taint, taintConfig TaintConfig) []apiv1.Taint
return newTaints
}

// FilterOutNodesWithIgnoredTaints override the condition status of the given nodes to mark them as NotReady when they have
// FilterOutNodesWithStartupTaints override the condition status of the given nodes to mark them as NotReady when they have
// filtered taints.
func FilterOutNodesWithIgnoredTaints(ignoredTaints TaintKeySet, allNodes, readyNodes []*apiv1.Node) ([]*apiv1.Node, []*apiv1.Node) {
func FilterOutNodesWithStartupTaints(taintConfig TaintConfig, allNodes, readyNodes []*apiv1.Node) ([]*apiv1.Node, []*apiv1.Node) {
newAllNodes := make([]*apiv1.Node, 0)
newReadyNodes := make([]*apiv1.Node, 0)
nodesWithIgnoredTaints := make(map[string]*apiv1.Node)
nodesWithStartupTaints := make(map[string]*apiv1.Node)
for _, node := range readyNodes {
if len(node.Spec.Taints) == 0 {
newReadyNodes = append(newReadyNodes, node)
continue
}
ready := true
for _, t := range node.Spec.Taints {
_, hasIgnoredTaint := ignoredTaints[t.Key]
if hasIgnoredTaint || strings.HasPrefix(t.Key, IgnoreTaintPrefix) {
_, hasStartupTaint := taintConfig.StartupTaints[t.Key]
if hasStartupTaint || matchesAnyPrefix(taintConfig.StartupTaintPrefixes, t.Key) {
ready = false
nodesWithIgnoredTaints[node.Name] = kubernetes.GetUnreadyNodeCopy(node, kubernetes.IgnoreTaint)
klog.V(3).Infof("Overriding status of node %v, which seems to have ignored taint %q", node.Name, t.Key)
nodesWithStartupTaints[node.Name] = kubernetes.GetUnreadyNodeCopy(node, kubernetes.StartupNodes)
klog.V(3).Infof("Overriding status of node %v, which seems to have startup taint %q", node.Name, t.Key)
break
}
}
Expand All @@ -389,7 +408,7 @@ func FilterOutNodesWithIgnoredTaints(ignoredTaints TaintKeySet, allNodes, readyN
}
// Override any node with ignored taint with its "unready" copy
for _, node := range allNodes {
if newNode, found := nodesWithIgnoredTaints[node.Name]; found {
if newNode, found := nodesWithStartupTaints[node.Name]; found {
newAllNodes = append(newAllNodes, newNode)
} else {
newAllNodes = append(newAllNodes, node)
Expand Down
90 changes: 66 additions & 24 deletions cluster-autoscaler/utils/taints/taints_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ func buildFakeClientWithConflicts(t *testing.T, nodes ...*apiv1.Node) *fake.Clie
return fakeClient
}

func TestFilterOutNodesWithIgnoredTaints(t *testing.T) {
func TestFilterOutNodesWithStartupTaints(t *testing.T) {
isReady := func(t *testing.T, node *apiv1.Node) bool {
for _, condition := range node.Status.Conditions {
if condition.Type == apiv1.NodeReady {
Expand All @@ -325,29 +325,30 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) {
}

for name, tc := range map[string]struct {
readyNodes int
allNodes int
ignoredTaints TaintKeySet
node *apiv1.Node
readyNodes int
allNodes int
startupTaints TaintKeySet
startupTaintsPrefixes []string
node *apiv1.Node
}{
"empty ignored taints, no node": {
"empty startup taints, no node": {
readyNodes: 0,
allNodes: 0,
ignoredTaints: map[string]bool{},
startupTaints: map[string]bool{},
node: nil,
},
"one ignored taint, no node": {
"one startup taint, no node": {
readyNodes: 0,
allNodes: 0,
ignoredTaints: map[string]bool{
startupTaints: map[string]bool{
"my-taint": true,
},
node: nil,
},
"one ignored taint, one ready untainted node": {
"one startup taint, one ready untainted node": {
readyNodes: 1,
allNodes: 1,
ignoredTaints: map[string]bool{
startupTaints: map[string]bool{
"my-taint": true,
},
node: &apiv1.Node{
Expand All @@ -363,10 +364,10 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) {
},
},
},
"one ignored taint, one unready tainted node": {
"one startup taint, one unready tainted node": {
readyNodes: 0,
allNodes: 1,
ignoredTaints: map[string]bool{
startupTaints: map[string]bool{
"my-taint": true,
},
node: &apiv1.Node{
Expand All @@ -388,10 +389,11 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) {
},
},
},
"no ignored taint, one unready prefixed tainted node": {
readyNodes: 0,
allNodes: 1,
ignoredTaints: map[string]bool{},
"no startup taint, one node unready prefixed with startup taint prefix (Compatibility)": {
readyNodes: 0,
allNodes: 1,
startupTaints: map[string]bool{},
startupTaintsPrefixes: []string{IgnoreTaintPrefix},
node: &apiv1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "notReadyTainted",
Expand All @@ -411,10 +413,34 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) {
},
},
},
"no ignored taint, two taints": {
"no startup taint, one node unready prefixed with startup taint prefix": {
readyNodes: 0,
allNodes: 1,
startupTaints: map[string]bool{},
startupTaintsPrefixes: []string{StartupTaintPrefix},
node: &apiv1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "notReadyTainted",
CreationTimestamp: metav1.NewTime(time.Now()),
},
Spec: apiv1.NodeSpec{
Taints: []apiv1.Taint{
{
Key: StartupTaintPrefix + "another-taint",
Value: "myValue",
Effect: apiv1.TaintEffectNoSchedule,
},
},
},
Status: apiv1.NodeStatus{
Conditions: []apiv1.NodeCondition{readyCondition},
},
},
},
"no startup taint, two taints": {
readyNodes: 1,
allNodes: 1,
ignoredTaints: map[string]bool{},
startupTaints: map[string]bool{},
node: &apiv1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "ReadyTainted",
Expand Down Expand Up @@ -445,7 +471,11 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) {
if tc.node != nil {
nodes = append(nodes, tc.node)
}
allNodes, readyNodes := FilterOutNodesWithIgnoredTaints(tc.ignoredTaints, nodes, nodes)
taintConfig := TaintConfig{
StartupTaints: tc.startupTaints,
StartupTaintPrefixes: tc.startupTaintsPrefixes,
}
allNodes, readyNodes := FilterOutNodesWithStartupTaints(taintConfig, nodes, nodes)
assert.Equal(t, tc.allNodes, len(allNodes))
assert.Equal(t, tc.readyNodes, len(readyNodes))

Expand Down Expand Up @@ -485,6 +515,16 @@ func TestSanitizeTaints(t *testing.T) {
Value: "myValue",
Effect: apiv1.TaintEffectNoSchedule,
},
{
Key: StatusTaintPrefix + "some-taint",
Value: "myValue",
Effect: apiv1.TaintEffectNoSchedule,
},
{
Key: StartupTaintPrefix + "some-taint",
Value: "myValue",
Effect: apiv1.TaintEffectNoSchedule,
},
{
Key: "test-taint",
Value: "test2",
Expand Down Expand Up @@ -522,11 +562,13 @@ func TestSanitizeTaints(t *testing.T) {
},
}
taintConfig := TaintConfig{
IgnoredTaints: map[string]bool{"ignore-me": true},
StatusTaints: map[string]bool{"status-me": true},
StartupTaints: map[string]bool{"ignore-me": true},
StatusTaints: map[string]bool{"status-me": true},
StartupTaintPrefixes: []string{IgnoreTaintPrefix, StartupTaintPrefix},
}

newTaints := SanitizeTaints(node.Spec.Taints, taintConfig)
require.Equal(t, len(newTaints), 1)
assert.Equal(t, newTaints[0].Key, "test-taint")
require.Equal(t, 2, len(newTaints))
assert.Equal(t, newTaints[0].Key, StatusTaintPrefix+"some-taint")
assert.Equal(t, newTaints[1].Key, "test-taint")
}