Skip to content

Commit 433d1cb

Browse files
asm582metalcycling
andauthored
add default quota to AW (#396)
* add default quota to AW * add default quota * resolve merge * Added support to query existing QuotaSubTrees. MCAD checks that there is a label associated with every quota-sub-tree for the AppWrapper; if there is none, then it adds it with 'default' value. This way, AppWrappers submitted without labels can be dispatched using the quota management library. Added tests for this, and fixed 'quota_service' sub tree to have a 'default' node. This code assumes the quota administrator created a leaf node name 'default' that can be used to schedule AppWrappers without quota labels. * Fixed bug that manifested when running MCAD without quota management. The code would think there is no quota (even without quota management) because the new code wasn't inside the proper scope. * Renamed the function 'GetTreeNames' to 'GetValidQuotaLabels' * Default quota labels code fixed. Code preserves the existing labels and updates the Kubernetes object correctly. New tests added to check for AppWrappers with no labels at all and with existing labels but no quota labels. * relax testing scenario --------- Co-authored-by: Pedro D. Bello-Maldonado <[email protected]>
1 parent af95bbe commit 433d1cb

23 files changed

+306
-69
lines changed

pkg/controller/queuejob/queuejob_controller_ex.go

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1260,11 +1260,46 @@ func (qjm *XController) ScheduleNext() {
12601260
klog.Infof("[ScheduleNext] XQJ %s with resources %v to be scheduled on aggregated idle resources %v", qj.Name, aggqj, resources)
12611261

12621262
if aggqj.LessEqual(resources) && qjm.nodeChecks(qjm.cache.GetUnallocatedHistograms(), qj) {
1263-
//Now evaluate quota
1263+
// Now evaluate quota
12641264
fits := true
12651265
klog.V(4).Infof("[ScheduleNext] HOL available resourse successful check for %s at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status)
12661266
if qjm.serverOption.QuotaEnabled {
12671267
if qjm.quotaManager != nil {
1268+
// Quota tree design:
1269+
// - All AppWrappers without quota submission will consume quota from the 'default' node.
1270+
// - All quota trees in the system should have a 'default' node so AppWrappers without
1271+
// quota specification can be dispatched
1272+
// - If the AppWrapper doesn't have a quota label, then one is added for every tree with the 'default' value
1273+
// - Depending on how the 'default' node is configured, AppWrappers that don't specify quota could be
1274+
// preemptable by default (e.g., 'default' node with 'cpu: 0m' and 'memory: 0Mi' quota and 'hardLimit: false'
1275+
// such node borrows quota from other nodes already in the system)
1276+
apiCacheAWJob, err := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name)
1277+
if err != nil {
1278+
klog.Errorf("[ScheduleNext] Failed to get AppWrapper from API Cache %v/%v: %v",
1279+
qj.Namespace, qj.Name, err)
1280+
continue
1281+
}
1282+
allTrees := qjm.quotaManager.GetValidQuotaLabels()
1283+
newLabels := make(map[string]string)
1284+
for key, value := range apiCacheAWJob.Labels {
1285+
newLabels[key] = value
1286+
}
1287+
updateLabels := false
1288+
for _, treeName := range allTrees {
1289+
if _, quotaSetForAW := newLabels[treeName]; !quotaSetForAW {
1290+
newLabels[treeName] = "default"
1291+
updateLabels = true
1292+
}
1293+
}
1294+
if updateLabels {
1295+
apiCacheAWJob.SetLabels(newLabels)
1296+
if err := qjm.updateEtcd(apiCacheAWJob, "ScheduleNext - setDefaultQuota"); err == nil {
1297+
klog.V(3).Infof("[ScheduleNext] Default quota added to AW %v", qj.Name)
1298+
} else {
1299+
klog.V(3).Infof("[ScheduleNext] Failed to added default quota to AW %v, skipping dispatch of AW", qj.Name)
1300+
return
1301+
}
1302+
}
12681303
var msg string
12691304
var preemptAWs []*arbv1.AppWrapper
12701305
quotaFits, preemptAWs, msg = qjm.quotaManager.Fits(qj, aggqj, proposedPreemptions)

pkg/controller/quota/quota_manager_interface.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
// ------------------------------------------------------ {COPYRIGHT-TOP} ---
22
// Copyright 2019, 2021, 2022, 2023 The Multi-Cluster App Dispatcher Authors.
3-
//
3+
//
44
// Licensed under the Apache License, Version 2.0 (the "License");
55
// you may not use this file except in compliance with the License.
66
// You may obtain a copy of the License at
7-
//
8-
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
99
//
1010
// Unless required by applicable law or agreed to in writing, software
1111
// distributed under the License is distributed on an "AS IS" BASIS,
@@ -23,4 +23,5 @@ import (
2323
type QuotaManagerInterface interface {
2424
Fits(aw *arbv1.AppWrapper, resources *clusterstateapi.Resource, proposedPremptions []*arbv1.AppWrapper) (bool, []*arbv1.AppWrapper, string)
2525
Release(aw *arbv1.AppWrapper) bool
26+
GetValidQuotaLabels() []string
2627
}

pkg/controller/quota/quotaforestmanager/qm_lib_backend_with_quotasubt_mgr.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,3 +624,6 @@ func (qm *QuotaManager) removeConsumer(consumerID string) {
624624
klog.Warningf("Failed to remove consumer %s", consumerID)
625625
}
626626
}
627+
func (qm *QuotaManager) GetValidQuotaLabels() []string {
628+
return qm.quotaManagerBackend.GetTreeNames()
629+
}

pkg/quotaplugins/quota-forest/quota-manager/quota/core/quotatree.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License.
66
You may obtain a copy of the License at
77
8-
http://www.apache.org/licenses/LICENSE-2.0
8+
http://www.apache.org/licenses/LICENSE-2.0
99
1010
Unless required by applicable law or agreed to in writing, software
1111
distributed under the License is distributed on an "AS IS" BASIS,

pkg/quotaplugins/quota-forest/quota-manager/quota/quotamanager.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ func (m *Manager) String() string {
529529
defer m.mutex.RUnlock()
530530

531531
var b bytes.Buffer
532-
b.WriteString("QuotaManger: \n")
532+
b.WriteString("QuotaManager: \n")
533533
b.WriteString("Mode: " + m.GetModeString() + "\n")
534534
b.WriteString("\n")
535535

pkg/quotaplugins/quota-simple-rest/quota_rest_manager.go

Lines changed: 41 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1+
//go:build !private
12
// +build !private
3+
24
// ------------------------------------------------------ {COPYRIGHT-TOP} ---
35
// Copyright 2022, 2023 The Multi-Cluster App Dispatcher Authors.
4-
//
6+
//
57
// Licensed under the Apache License, Version 2.0 (the "License");
68
// you may not use this file except in compliance with the License.
79
// You may obtain a copy of the License at
8-
//
10+
//
911
// http://www.apache.org/licenses/LICENSE-2.0
1012
//
1113
// Unless required by applicable law or agreed to in writing, software
@@ -21,6 +23,7 @@ import (
2123
"bytes"
2224
"encoding/json"
2325
"fmt"
26+
2427
"github.com/project-codeflare/multi-cluster-app-dispatcher/cmd/kar-controllers/app/options"
2528
arbv1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1"
2629
listersv1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/client/listers/controller/v1"
@@ -29,26 +32,27 @@ import (
2932
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/quotaplugins/util"
3033

3134
"io/ioutil"
32-
"k8s.io/client-go/rest"
33-
"k8s.io/klog/v2"
3435
"math"
3536
"net/http"
3637
"net/http/httputil"
3738
"reflect"
3839
"strings"
3940
"time"
41+
42+
"k8s.io/client-go/rest"
43+
"k8s.io/klog/v2"
4044
)
4145

4246
// QuotaManager implements a QuotaManagerInterface.
4347
type QuotaManager struct {
44-
url string
45-
appwrapperLister listersv1.AppWrapperLister
46-
preemptionEnabled bool
48+
url string
49+
appwrapperLister listersv1.AppWrapperLister
50+
preemptionEnabled bool
4751
}
4852

4953
type QuotaGroup struct {
50-
GroupContext string `json:"groupcontext"`
51-
GroupId string `json:"groupid"`
54+
GroupContext string `json:"groupcontext"`
55+
GroupId string `json:"groupid"`
5256
}
5357

5458
type Request struct {
@@ -70,19 +74,17 @@ type QuotaResponse struct {
7074
}
7175

7276
type TreeNode struct {
73-
Allocation string `json:"allocation"`
74-
Quota string `json:"quota"`
75-
Name string `json:"name"`
76-
Hard bool `json:"hard"`
77-
Children []TreeNode `json:"children"`
78-
Parent string `json:"parent"`
77+
Allocation string `json:"allocation"`
78+
Quota string `json:"quota"`
79+
Name string `json:"name"`
80+
Hard bool `json:"hard"`
81+
Children []TreeNode `json:"children"`
82+
Parent string `json:"parent"`
7983
}
8084

81-
8285
// Making sure that PriorityQueue implements SchedulingQueue.
8386
var _ = quota.QuotaManagerInterface(&QuotaManager{})
8487

85-
8688
func parseId(id string) (string, string) {
8789
ns := ""
8890
n := ""
@@ -115,24 +117,24 @@ func createId(ns string, n string) string {
115117
}
116118

117119
func NewQuotaManager(dispatchedAWDemands map[string]*clusterstateapi.Resource, dispatchedAWs map[string]*arbv1.AppWrapper,
118-
awJobLister listersv1.AppWrapperLister, config *rest.Config,
119-
serverOptions *options.ServerOption) (*QuotaManager, error) {
120+
awJobLister listersv1.AppWrapperLister, config *rest.Config,
121+
serverOptions *options.ServerOption) (*QuotaManager, error) {
120122
if serverOptions.QuotaEnabled == false {
121123
klog.Infof("[NewQuotaManager] Quota management is not enabled.")
122124
return nil, nil
123125
}
124126

125127
qm := &QuotaManager{
126-
url: serverOptions.QuotaRestURL,
127-
appwrapperLister: awJobLister,
128-
preemptionEnabled: serverOptions.Preemption,
128+
url: serverOptions.QuotaRestURL,
129+
appwrapperLister: awJobLister,
130+
preemptionEnabled: serverOptions.Preemption,
129131
}
130132

131133
return qm, nil
132134
}
133135

134136
// Recrusive call to add names of Tree
135-
func (qm *QuotaManager) addChildrenNodes(parentNode TreeNode, treeIDs []string) ([]string) {
137+
func (qm *QuotaManager) addChildrenNodes(parentNode TreeNode, treeIDs []string) []string {
136138
if len(parentNode.Children) > 0 {
137139
for _, childNode := range parentNode.Children {
138140
klog.V(10).Infof("[getQuotaTreeIDs] Quota tree response child node from quota mananger: %s", childNode.Name)
@@ -143,7 +145,7 @@ func (qm *QuotaManager) addChildrenNodes(parentNode TreeNode, treeIDs []string)
143145
return treeIDs
144146
}
145147

146-
func (qm *QuotaManager) getQuotaTreeIDs() ([]string) {
148+
func (qm *QuotaManager) getQuotaTreeIDs() []string {
147149
var treeIDs []string
148150
// If a url does not exists then assume fits quota
149151
if len(qm.url) < 1 {
@@ -173,7 +175,7 @@ func (qm *QuotaManager) getQuotaTreeIDs() ([]string) {
173175
body, err := ioutil.ReadAll(response.Body)
174176
if err != nil {
175177
klog.Errorf("[getQuotaTreeIDs] Failed to read quota tree from the quota manager body: %s, error=%#v",
176-
string(body), err)
178+
string(body), err)
177179
return treeIDs
178180
}
179181

@@ -205,24 +207,24 @@ func isValidQuota(quotaGroup QuotaGroup, qmTreeIDs []string) bool {
205207
return false
206208
}
207209

208-
func (qm *QuotaManager) getQuotaDesignation(aw *arbv1.AppWrapper) ([]QuotaGroup) {
210+
func (qm *QuotaManager) getQuotaDesignation(aw *arbv1.AppWrapper) []QuotaGroup {
209211
var groups []QuotaGroup
210212

211213
// Get list of quota management tree IDs
212214
qmTreeIDs := qm.getQuotaTreeIDs()
213215
if len(qmTreeIDs) < 1 {
214216
klog.Warningf("[getQuotaDesignation] No valid quota management IDs found for AppWrapper Job: %s/%s",
215-
aw.Namespace, aw.Name)
217+
aw.Namespace, aw.Name)
216218
}
217219

218220
labels := aw.GetLabels()
219-
if ( labels != nil) {
221+
if labels != nil {
220222
keys := reflect.ValueOf(labels).MapKeys()
221-
for _, key := range keys {
223+
for _, key := range keys {
222224
strkey := key.String()
223225
quotaGroup := QuotaGroup{
224226
GroupContext: strkey,
225-
GroupId: labels[strkey],
227+
GroupId: labels[strkey],
226228
}
227229
if isValidQuota(quotaGroup, qmTreeIDs) {
228230
groups = append(groups, quotaGroup)
@@ -236,7 +238,7 @@ func (qm *QuotaManager) getQuotaDesignation(aw *arbv1.AppWrapper) ([]QuotaGroup)
236238
}
237239
} else {
238240
klog.V(4).Infof("[getQuotaDesignation] AppWrapper: %s/%s does not any context quota labels.",
239-
aw.Namespace, aw.Name)
241+
aw.Namespace, aw.Name)
240242
}
241243

242244
if len(groups) > 0 {
@@ -246,8 +248,8 @@ func (qm *QuotaManager) getQuotaDesignation(aw *arbv1.AppWrapper) ([]QuotaGroup)
246248
klog.V(4).Infof("[getQuotaDesignation] AppWrapper: %s/%s does not have any quota labels, using default.",
247249
aw.Namespace, aw.Name)
248250
var defaultGroup = QuotaGroup{
249-
GroupContext: "DEFAULTCONTEXT",
250-
GroupId: "DEFAULT",
251+
GroupContext: "DEFAULTCONTEXT",
252+
GroupId: "DEFAULT",
251253
}
252254
groups = append(groups, defaultGroup)
253255
}
@@ -256,7 +258,7 @@ func (qm *QuotaManager) getQuotaDesignation(aw *arbv1.AppWrapper) ([]QuotaGroup)
256258
}
257259

258260
func (qm *QuotaManager) Fits(aw *arbv1.AppWrapper, awResDemands *clusterstateapi.Resource,
259-
proposedPreemptions []*arbv1.AppWrapper) (bool, []*arbv1.AppWrapper, string) {
261+
proposedPreemptions []*arbv1.AppWrapper) (bool, []*arbv1.AppWrapper, string) {
260262

261263
// Handle uninitialized quota manager
262264
if len(qm.url) <= 0 {
@@ -271,7 +273,7 @@ func (qm *QuotaManager) Fits(aw *arbv1.AppWrapper, awResDemands *clusterstateapi
271273
groups := qm.getQuotaDesignation(aw)
272274
preemptable := qm.preemptionEnabled
273275
awCPU_Demand := int(math.Trunc(awResDemands.MilliCPU))
274-
awMem_Demand := int(math.Trunc(awResDemands.Memory)/1000000)
276+
awMem_Demand := int(math.Trunc(awResDemands.Memory) / 1000000)
275277
var demand []int
276278
demand = append(demand, awCPU_Demand)
277279
demand = append(demand, awMem_Demand)
@@ -331,8 +333,7 @@ func (qm *QuotaManager) Fits(aw *arbv1.AppWrapper, awResDemands *clusterstateapi
331333
return doesFit, preemptIds, ""
332334
}
333335

334-
335-
func (qm *QuotaManager) getAppWrappers(preemptIds []string) []*arbv1.AppWrapper{
336+
func (qm *QuotaManager) getAppWrappers(preemptIds []string) []*arbv1.AppWrapper {
336337
var aws []*arbv1.AppWrapper
337338
if len(preemptIds) <= 0 {
338339
return nil
@@ -411,3 +412,6 @@ func (qm *QuotaManager) Release(aw *arbv1.AppWrapper) bool {
411412

412413
return released
413414
}
415+
func (qm *QuotaManager) GetValidQuotaLabels() []string {
416+
return qm.getQuotaTreeIDs()
417+
}

test/e2e-kuttl/install-quota-subtree.yaml

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
---
12
apiVersion: ibm.com/v1
23
kind: QuotaSubtree
34
metadata:
@@ -54,4 +55,32 @@ spec:
5455
hardLimit: true
5556
requests:
5657
cpu: 900m
57-
memory: 300Mi
58+
memory: 300Mi
59+
- name: default
60+
quotas:
61+
hardLimit: false
62+
requests:
63+
cpu: 0m
64+
memory: 0Mi
65+
---
66+
apiVersion: ibm.com/v1
67+
kind: QuotaSubtree
68+
metadata:
69+
name: service-root-children
70+
namespace: kube-system
71+
labels:
72+
tree: quota_service
73+
spec:
74+
parent: service-root
75+
children:
76+
- name: gold
77+
quotas:
78+
requests:
79+
cpu: 1075m
80+
memory: 1045Mi
81+
- name: default
82+
quotas:
83+
hardLimit: false
84+
requests:
85+
cpu: 0m
86+
memory: 0Mi

test/e2e-kuttl/quota-errors/03-assert.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ metadata:
66
namespace: quota-errors
77
labels:
88
quota_context: "silver"
9-
quota_service: "service-root"
9+
quota_service: "gold"
1010
status:
1111
state: Running
1212
---
@@ -24,4 +24,4 @@ status:
2424
observedGeneration: 1
2525
readyReplicas: 1
2626
replicas: 1
27-
updatedReplicas: 1
27+
updatedReplicas: 1

test/e2e-kuttl/quota-errors/03-install.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ metadata:
55
namespace: quota-errors
66
labels:
77
quota_context: "silver"
8-
quota_service: "service-root"
8+
quota_service: "gold"
99
spec:
1010
resources:
1111
GenericItems:

test/e2e-kuttl/quota-forest/01-assert.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,11 @@ metadata:
2222
namespace: kube-system
2323
labels:
2424
tree: quota_context
25+
---
26+
apiVersion: ibm.com/v1
27+
kind: QuotaSubtree
28+
metadata:
29+
name: service-root-children
30+
namespace: kube-system
31+
labels:
32+
tree: quota_service

0 commit comments

Comments
 (0)