@@ -36,74 +36,46 @@ import (
36
36
"github.com/project-codeflare/appwrapper/pkg/config"
37
37
)
38
38
39
- // NodeHealthMonitor maintains the set of nodes that Autopilot has labelled as unhealthy
39
+ // NodeHealthMonitor watches Nodes and maintains mappings of Nodes that have either
40
+ // been marked as Unschedulable or that have been labeled to indicate that
41
+ // they have resources that Autopilot has tainted as NoSchedule or NoExeucte.
42
+ // This information is used to automate the maintenance of the lendingLimit of
43
+ // a designated slack ClusterQueue and to migrate running workloads away from NoExecute resources.
40
44
type NodeHealthMonitor struct {
41
45
client.Client
42
46
Config * config.AppWrapperConfig
43
47
}
44
48
45
49
var (
46
- // unhealthyNodes is a mapping from Node names to a set of resources that Autopilot has labeled as unhealthy on that Node
47
- unhealthyNodes = make (map [string ]sets.Set [string ])
48
- unhealthyNodesMutex sync.RWMutex
49
-
50
- // unschedulableNodes is a mapping from Node names to resource quantities than Autopilot has labeled as unschedulable on that Node
51
- unschedulableNodes = make (map [string ]map [string ]* resource.Quantity )
50
+ // noExecuteNodes is a mapping from Node names to resources with an Autopilot NoExeucte taint
51
+ noExecuteNodes = make (map [string ]sets.Set [string ])
52
+ noExecuteNodesMutex sync.RWMutex
53
+
54
+ // noScheduleNodes is a mapping from Node names to resource quantities that are unschedulable.
55
+ // A resource may be unscheduable either because:
56
+ // (a) the Node is cordoned (node.Spec.Unschedulable is true) or
57
+ // (b) Autopilot has labeled the with either a NoExecute or NoSchedule taint.
58
+ noScheduleNodes = make (map [string ]map [string ]* resource.Quantity )
52
59
)
53
60
54
61
// permission to watch nodes
55
62
//+kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch
56
63
//+kubebuilder:rbac:groups=kueue.x-k8s.io,resources=clusterqueues,verbs=get;list;watch;update;patch
57
64
58
- //gocyclo:ignore
59
65
func (r * NodeHealthMonitor ) Reconcile (ctx context.Context , req ctrl.Request ) (ctrl.Result , error ) {
60
66
node := & v1.Node {}
61
67
if err := r .Get (ctx , req .NamespacedName , node ); err != nil {
62
68
return ctrl.Result {}, nil
63
69
}
64
70
65
- flaggedResources := make (sets.Set [string ])
66
- for key , value := range node .GetLabels () {
67
- for resourceName , taints := range r .Config .Autopilot .ResourceTaints {
68
- for _ , taint := range taints {
69
- if key == taint .Key && value == taint .Value && taint .Effect == v1 .TaintEffectNoExecute {
70
- flaggedResources .Insert (resourceName )
71
- }
72
- }
73
- }
74
- }
75
-
76
- nodeChanged := false
77
- unhealthyNodesMutex .Lock () // BEGIN CRITICAL SECTION
78
- if priorEntry , ok := unhealthyNodes [node .GetName ()]; ok {
79
- if len (flaggedResources ) == 0 {
80
- delete (unhealthyNodes , node .GetName ())
81
- nodeChanged = true
82
- } else if ! priorEntry .Equal (flaggedResources ) {
83
- unhealthyNodes [node .GetName ()] = flaggedResources
84
- nodeChanged = true
85
- }
86
- } else if len (flaggedResources ) > 0 {
87
- unhealthyNodes [node .GetName ()] = flaggedResources
88
- nodeChanged = true
89
- }
90
- unhealthyNodesMutex .Unlock () // END CRITICAL SECTION
71
+ r .updateNoExecuteNodes (ctx , node )
91
72
92
- // Unsynchronized reads of unhealthyNodes below are safe because this method
93
- // is the only writer to the map and the controller runtime is configured to
94
- // not allow concurrent execution of this method.
95
-
96
- if nodeChanged {
97
- log .FromContext (ctx ).Info ("Updated node health information" , "Number Unhealthy Nodes" , len (unhealthyNodes ), "Unhealthy Resource Details" , unhealthyNodes )
98
- }
99
-
100
- // update lending limits on slack quota if configured
73
+ // If there is a slack ClusterQueue, update its lending limits
101
74
102
75
if r .Config .SlackQueueName == "" {
103
76
return ctrl.Result {}, nil
104
77
}
105
78
106
- // get slack quota
107
79
cq := & kueue.ClusterQueue {}
108
80
if err := r .Get (ctx , types.NamespacedName {Name : r .Config .SlackQueueName }, cq ); err != nil {
109
81
if errors .IsNotFound (err ) {
@@ -112,36 +84,80 @@ func (r *NodeHealthMonitor) Reconcile(ctx context.Context, req ctrl.Request) (ct
112
84
return ctrl.Result {}, err
113
85
}
114
86
87
+ r .updateNoScheduleNodes (ctx , cq , node )
88
+
89
+ return r .updateLendingLimits (ctx , cq )
90
+ }
91
+
92
+ func (r * NodeHealthMonitor ) updateNoExecuteNodes (ctx context.Context , node * v1.Node ) {
93
+ noExecuteResources := make (sets.Set [string ])
94
+ for key , value := range node .GetLabels () {
95
+ for resourceName , taints := range r .Config .Autopilot .ResourceTaints {
96
+ for _ , taint := range taints {
97
+ if key == taint .Key && value == taint .Value && taint .Effect == v1 .TaintEffectNoExecute {
98
+ noExecuteResources .Insert (resourceName )
99
+ }
100
+ }
101
+ }
102
+ }
103
+
104
+ noExecuteNodesChanged := false
105
+ noExecuteNodesMutex .Lock () // BEGIN CRITICAL SECTION
106
+ if priorEntry , ok := noExecuteNodes [node .GetName ()]; ok {
107
+ if len (noExecuteResources ) == 0 {
108
+ delete (noExecuteNodes , node .GetName ())
109
+ noExecuteNodesChanged = true
110
+ } else if ! priorEntry .Equal (noExecuteResources ) {
111
+ noExecuteNodes [node .GetName ()] = noExecuteResources
112
+ noExecuteNodesChanged = true
113
+ }
114
+ } else if len (noExecuteResources ) > 0 {
115
+ noExecuteNodes [node .GetName ()] = noExecuteResources
116
+ noExecuteNodesChanged = true
117
+ }
118
+ noExecuteNodesMutex .Unlock () // END CRITICAL SECTION
119
+
120
+ // Safe to log outside the mutex because because this method is the only writer of noExecuteNodes
121
+ // and the controller runtime is configured to not allow concurrent execution of this controller.
122
+ if noExecuteNodesChanged {
123
+ log .FromContext (ctx ).Info ("Updated node NoExecute information" , "Number NoExecute Nodes" , len (noExecuteNodes ), "NoExecute Resource Details" , noExecuteNodes )
124
+ }
125
+ }
126
+
127
+ func (r * NodeHealthMonitor ) updateNoScheduleNodes (_ context.Context , cq * kueue.ClusterQueue , node * v1.Node ) {
115
128
// update unschedulable resource quantities for this node
116
- flaggedQuantities := make (map [string ]* resource.Quantity )
129
+ noScheduleQuantities := make (map [string ]* resource.Quantity )
117
130
if node .Spec .Unschedulable {
118
- // flag all non-pod resources covered by cq if the node is cordoned
131
+ // add all non-pod resources covered by cq if the node is cordoned
119
132
for _ , resourceName := range cq .Spec .ResourceGroups [0 ].Flavors [0 ].Resources {
120
133
if string (resourceName .Name ) != "pods" {
121
- flaggedQuantities [string (resourceName .Name )] = node .Status .Capacity .Name (resourceName .Name , resource .DecimalSI )
134
+ noScheduleQuantities [string (resourceName .Name )] = node .Status .Capacity .Name (resourceName .Name , resource .DecimalSI )
122
135
}
123
136
}
124
137
} else {
125
138
for key , value := range node .GetLabels () {
126
139
for resourceName , taints := range r .Config .Autopilot .ResourceTaints {
127
140
for _ , taint := range taints {
128
141
if key == taint .Key && value == taint .Value {
129
- flaggedQuantities [resourceName ] = node .Status .Capacity .Name (v1 .ResourceName (resourceName ), resource .DecimalSI )
142
+ noScheduleQuantities [resourceName ] = node .Status .Capacity .Name (v1 .ResourceName (resourceName ), resource .DecimalSI )
130
143
}
131
144
}
132
145
}
133
146
}
134
147
}
135
148
136
- if len (flaggedQuantities ) > 0 {
137
- unschedulableNodes [node .GetName ()] = flaggedQuantities
149
+ if len (noScheduleQuantities ) > 0 {
150
+ noScheduleNodes [node .GetName ()] = noScheduleQuantities
138
151
} else {
139
- delete (unschedulableNodes , node .GetName ())
152
+ delete (noScheduleNodes , node .GetName ())
140
153
}
154
+ }
155
+
156
+ func (r * NodeHealthMonitor ) updateLendingLimits (ctx context.Context , cq * kueue.ClusterQueue ) (ctrl.Result , error ) {
141
157
142
158
// compute unschedulable resource totals
143
159
unschedulableQuantities := map [string ]* resource.Quantity {}
144
- for _ , quantities := range unschedulableNodes {
160
+ for _ , quantities := range noScheduleNodes {
145
161
for resourceName , quantity := range quantities {
146
162
if ! quantity .IsZero () {
147
163
if unschedulableQuantities [resourceName ] == nil {
0 commit comments