Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ func (ali *aliCloudProvider) Cleanup() error {
return nil
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (ali *aliCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// AliRef contains a reference to ECS instance or .
type AliRef struct {
ID string
Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,11 @@ func (aws *awsCloudProvider) Refresh() error {
return aws.awsManager.Refresh()
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (aws *awsCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// AwsRef contains a reference to some entity in AWS world.
type AwsRef struct {
Name string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,11 @@ func (azure *AzureCloudProvider) Refresh() error {
return azure.azureManager.Refresh()
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (azure *AzureCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// azureRef contains a reference to some entity in Azure world.
type azureRef struct {
Name string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,11 @@ func (baiducloud *baiducloudCloudProvider) Refresh() error {
return nil
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (baiducloud *baiducloudCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BaiducloudRef contains a reference to some entity in baiducloud world.
type BaiducloudRef struct {
Name string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ func (d *bizflycloudCloudProvider) Refresh() error {
return d.manager.Refresh()
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (d *bizflycloudCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildBizflyCloud builds the Bizflycloud cloud provider.
func BuildBizflyCloud(
opts config.AutoscalingOptions,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,11 @@ func (b *brightboxCloudProvider) Cleanup() error {
return nil
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (b *brightboxCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildBrightbox builds the Brightbox provider
func BuildBrightbox(
opts config.AutoscalingOptions,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ func (ccp *cherryCloudProvider) Cleanup() error {
return nil
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (ccp *cherryCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildCherry is called by the autoscaler to build a Cherry Servers cloud provider.
//
// The cherryManager is created here, and the node groups are created
Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/cloudprovider/civo/civo_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,11 @@ func (d *civoCloudProvider) Refresh() error {
return d.manager.Refresh()
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (d *civoCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildCivo builds the Civo cloud provider.
func BuildCivo(
opts config.AutoscalingOptions,
Expand Down
8 changes: 8 additions & 0 deletions cluster-autoscaler/cloudprovider/cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ type CloudProvider interface {
// Refresh is called before every main loop and can be used to dynamically update cloud provider state.
// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh().
Refresh() error

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion. This function
// will be called during prefiltering of nodes for scaledown to allow cloud providers the opportunity
// to reject a node for deletion. This may be used in cases where nodes are undergoing upgrades or other
// cloud-specific behavior where the cluster autoscaler should not begin cordoning, draining, and tainting
// the node.
// Returns true if the node can be safely deleted or false otherwise.
IsNodeCandidateForDeletion(*apiv1.Node) (bool, error)
}

// ErrNotImplemented is returned if a method is not implemented.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ func (provider *cloudStackCloudProvider) Pricing() (cloudprovider.PricingModel,
return nil, cloudprovider.ErrNotImplemented
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (provider *cloudStackCloudProvider) IsNodeCandidateForDeletion(node *v1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
// created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
func (provider *cloudStackCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string, taints []v1.Taint, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,27 @@ func (c *machineController) listMachinesForScalableResource(r *unstructured.Unst
}
}

func (c *machineController) listMachineSetsForMachineDeployment(r *unstructured.Unstructured) ([]*unstructured.Unstructured, error) {
selector := labels.SelectorFromSet(map[string]string{
"cluster.x-k8s.io/deployment-name": r.GetName(),
})
objs, err := c.machineSetInformer.Lister().ByNamespace(r.GetNamespace()).List(selector)
if err != nil {
return nil, err
}

results := make([]*unstructured.Unstructured, 0, len(objs))
for _, x := range objs {
u, ok := x.(*unstructured.Unstructured)
if !ok {
return nil, fmt.Errorf("expected unstructured resource from lister, not %T", x)
}
results = append(results, u.DeepCopy())
}

return results, nil
}

func (c *machineController) listScalableResources() ([]*unstructured.Unstructured, error) {
scalableResources, err := c.listResources(c.machineSetInformer.Lister())
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,68 @@ func (ng *nodegroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*c
return &defaults, nil
}

func (ng *nodegroup) IsMachineDeploymentAndRollingOut() (bool, error) {
if ng.scalableResource.Kind() != machineDeploymentKind {
// Not a MachineDeployment.
return false, nil
}

machineSets, err := ng.machineController.listMachineSetsForMachineDeployment(ng.scalableResource.unstructured)
if err != nil {
return false, err
}

if len(machineSets) == 0 {
// No MachineSets => MD is not rolling out.
return false, nil
}

// Find the latest revision, the MachineSet with the latest revision is the MachineSet that
// matches the MachineDeployment spec.
var latestMSRevisionInt int64
for _, ms := range machineSets {
msRevision, ok := ms.GetAnnotations()[machineDeploymentRevisionAnnotation]
if !ok {
continue
}

msRevisionInt, err := strconv.ParseInt(msRevision, 10, 64)
if err != nil {
return false, errors.Wrapf(err, "failed to parse current revision on MachineSet %s", klog.KObj(ms))
}
latestMSRevisionInt = max(latestMSRevisionInt, msRevisionInt)
}
maxMSRevision := strconv.FormatInt(latestMSRevisionInt, 10)

for _, ms := range machineSets {
if ms.GetAnnotations()[machineDeploymentRevisionAnnotation] == maxMSRevision {
// Ignore the MachineSet with the latest revision
continue
}

// Check if any of the old MachineSets still have replicas
replicas, found, err := unstructured.NestedInt64(ms.UnstructuredContent(), "spec", "replicas")
if err != nil {
return false, errors.Wrapf(err, "failed to find spec replicas on MachineSet %s", klog.KObj(ms))
}
if found && replicas > 0 {
// Found old MachineSets that still has replicas => MD is still rolling out.
return true, nil
}
replicas, found, err = unstructured.NestedInt64(ms.UnstructuredContent(), "status", "replicas")
if err != nil {
return false, errors.Wrapf(err, "failed to find status replicas on MachineSet %s", klog.KObj(ms))
}
if found && replicas > 0 {
// Found old MachineSets that still has replicas => MD is still rolling out.
return true, nil
}
}

// Didn't find any old MachineSets that still have replicas => MD is not rolling out.
return false, nil
}

func newNodeGroupFromScalableResource(controller *machineController, unstructuredScalableResource *unstructured.Unstructured) (*nodegroup, error) {
// Ensure that the resulting node group would be allowed based on the autodiscovery specs if defined
if !controller.allowedByAutoDiscoverySpecs(unstructuredScalableResource) {
Expand Down
23 changes: 21 additions & 2 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"path"
"reflect"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/client-go/discovery"
Expand All @@ -34,7 +35,7 @@ import (

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
caserrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
)

Expand Down Expand Up @@ -92,7 +93,7 @@ func (p *provider) HasInstance(node *corev1.Node) (bool, error) {
return false, fmt.Errorf("machine not found for node %s: %v", node.Name, err)
}

func (*provider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
func (*provider) Pricing() (cloudprovider.PricingModel, caserrors.AutoscalerError) {
return nil, cloudprovider.ErrNotImplemented
}

Expand Down Expand Up @@ -140,6 +141,24 @@ func (p *provider) GetNodeGpuConfig(node *corev1.Node) *cloudprovider.GpuConfig
return gpu.GetNodeGPUFromCloudProvider(p, node)
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (p *provider) IsNodeCandidateForDeletion(node *corev1.Node) (bool, error) {
ng, err := p.controller.nodeGroupForNode(node)
if err != nil {
return false, errors.Wrapf(err, "failed to determine node group for node %s", klog.KObj(node))
}
if ng == nil {
klog.V(5).Infof("node %s is not part of a node group", klog.KObj(node))
return false, nil
}
rollingout, err := ng.IsMachineDeploymentAndRollingOut()
if err != nil {
return false, errors.Wrapf(err, "failed to determine rolling out status for MachineDeployment %s", ng.scalableResource.ID())
}
// A node is a good candidate for deletion if it is not currently part of a MachineDeployment that is rolling out.
return !rollingout, nil
}

func newProvider(
name string,
rl *cloudprovider.ResourceLimiter,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ const (
draDriverKey = "capacity.cluster-autoscaler.kubernetes.io/dra-driver"
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints" // not currently used on OpenShift

machineDeploymentRevisionAnnotation = "machinedeployment.clusters.x-k8s.io/revision"
// UnknownArch is used if the Architecture is Unknown
UnknownArch SystemArchitecture = ""
// Amd64 is used if the Architecture is x86_64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ func (d *digitaloceanCloudProvider) Refresh() error {
return d.manager.Refresh()
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (d *digitaloceanCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildDigitalOcean builds the DigitalOcean cloud provider.
func BuildDigitalOcean(
opts config.AutoscalingOptions,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ func (pcp *equinixMetalCloudProvider) Cleanup() error {
return nil
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (pcp *equinixMetalCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildCloudProvider is called by the autoscaler to build an Equinix Metal cloud provider.
//
// The equinixMetalManager is created here, and the node groups are created
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,11 @@ func (e *exoscaleCloudProvider) Refresh() error {
return e.manager.Refresh()
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (e *exoscaleCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildExoscale builds the Exoscale cloud provider.
func BuildExoscale(_ config.AutoscalingOptions, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
manager, err := newManager(discoveryOpts)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,11 @@ func (e *externalGrpcCloudProvider) Refresh() error {
return nil
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (e *externalGrpcCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildExternalGrpc builds the externalgrpc cloud provider.
func BuildExternalGrpc(
opts config.AutoscalingOptions,
Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ func (gce *GceCloudProvider) Refresh() error {
return gce.gceManager.Refresh()
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (gce *GceCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// GceRef contains s reference to some entity in GCE world.
type GceRef struct {
Project string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ func (d *HetznerCloudProvider) Refresh() error {
return nil
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (d *HetznerCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildHetzner builds the Hetzner cloud provider.
func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
manager, err := newManager()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ func (hcp *huaweicloudCloudProvider) Refresh() error {
return nil
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (hcp *huaweicloudCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

func (hcp *huaweicloudCloudProvider) buildAsgs(specs []string) error {
asgs, err := hcp.cloudServiceManager.ListScalingGroups()
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,11 @@ func (ic *IonosCloudCloudProvider) Refresh() error {
return nil
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (ic *IonosCloudCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildIonosCloud builds the IonosCloud cloud provider.
func BuildIonosCloud(
opts config.AutoscalingOptions,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ func (k *kamateraCloudProvider) Refresh() error {
return k.manager.refresh()
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (k *kamateraCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildKamatera builds the Kamatera cloud provider.
func BuildKamatera(
opts config.AutoscalingOptions,
Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@ func (kubemark *KubemarkCloudProvider) Cleanup() error {
return nil
}

// IsNodeCandidateForDeletion returns whether the node is a good candidate for deletion.
func (kubemark *KubemarkCloudProvider) IsNodeCandidateForDeletion(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// NodeGroup implements NodeGroup interface.
type NodeGroup struct {
Name string
Expand Down
Loading