Skip to content

add default quota to AW #396

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion pkg/controller/queuejob/queuejob_controller_ex.go
Original file line number Diff line number Diff line change
Expand Up @@ -1260,11 +1260,46 @@ func (qjm *XController) ScheduleNext() {
klog.Infof("[ScheduleNext] XQJ %s with resources %v to be scheduled on aggregated idle resources %v", qj.Name, aggqj, resources)

if aggqj.LessEqual(resources) && qjm.nodeChecks(qjm.cache.GetUnallocatedHistograms(), qj) {
//Now evaluate quota
// Now evaluate quota
fits := true
klog.V(4).Infof("[ScheduleNext] HOL available resourse successful check for %s at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status)
if qjm.serverOption.QuotaEnabled {
if qjm.quotaManager != nil {
// Quota tree design:
// - All AppWrappers without quota submission will consume quota from the 'default' node.
// - All quota trees in the system should have a 'default' node so AppWrappers without
// quota specification can be dispatched
// - If the AppWrapper doesn't have a quota label, then one is added for every tree with the 'default' value
// - Depending on how the 'default' node is configured, AppWrappers that don't specify quota could be
// preemptable by default (e.g., 'default' node with 'cpu: 0m' and 'memory: 0Mi' quota and 'hardLimit: false'
// such node borrows quota from other nodes already in the system)
apiCacheAWJob, err := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name)
if err != nil {
klog.Errorf("[ScheduleNext] Failed to get AppWrapper from API Cache %v/%v: %v",
qj.Namespace, qj.Name, err)
continue
}
allTrees := qjm.quotaManager.GetValidQuotaLabels()
newLabels := make(map[string]string)
for key, value := range apiCacheAWJob.Labels {
newLabels[key] = value
}
updateLabels := false
for _, treeName := range allTrees {
if _, quotaSetForAW := newLabels[treeName]; !quotaSetForAW {
newLabels[treeName] = "default"
updateLabels = true
}
}
if updateLabels {
apiCacheAWJob.SetLabels(newLabels)
if err := qjm.updateEtcd(apiCacheAWJob, "ScheduleNext - setDefaultQuota"); err == nil {
klog.V(3).Infof("[ScheduleNext] Default quota added to AW %v", qj.Name)
} else {
klog.V(3).Infof("[ScheduleNext] Failed to added default quota to AW %v, skipping dispatch of AW", qj.Name)
return
}
}
var msg string
var preemptAWs []*arbv1.AppWrapper
quotaFits, preemptAWs, msg = qjm.quotaManager.Fits(qj, aggqj, proposedPreemptions)
Expand Down
7 changes: 4 additions & 3 deletions pkg/controller/quota/quota_manager_interface.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// ------------------------------------------------------ {COPYRIGHT-TOP} ---
// Copyright 2019, 2021, 2022, 2023 The Multi-Cluster App Dispatcher Authors.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
Expand All @@ -23,4 +23,5 @@ import (
type QuotaManagerInterface interface {
Fits(aw *arbv1.AppWrapper, resources *clusterstateapi.Resource, proposedPremptions []*arbv1.AppWrapper) (bool, []*arbv1.AppWrapper, string)
Release(aw *arbv1.AppWrapper) bool
GetValidQuotaLabels() []string
}
Original file line number Diff line number Diff line change
Expand Up @@ -624,3 +624,6 @@ func (qm *QuotaManager) removeConsumer(consumerID string) {
klog.Warningf("Failed to remove consumer %s", consumerID)
}
}
func (qm *QuotaManager) GetValidQuotaLabels() []string {
return qm.quotaManagerBackend.GetTreeNames()
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ func (m *Manager) String() string {
defer m.mutex.RUnlock()

var b bytes.Buffer
b.WriteString("QuotaManger: \n")
b.WriteString("QuotaManager: \n")
b.WriteString("Mode: " + m.GetModeString() + "\n")
b.WriteString("\n")

Expand Down
78 changes: 41 additions & 37 deletions pkg/quotaplugins/quota-simple-rest/quota_rest_manager.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
//go:build !private
// +build !private

// ------------------------------------------------------ {COPYRIGHT-TOP} ---
// Copyright 2022, 2023 The Multi-Cluster App Dispatcher Authors.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
Expand All @@ -21,6 +23,7 @@ import (
"bytes"
"encoding/json"
"fmt"

"github.com/project-codeflare/multi-cluster-app-dispatcher/cmd/kar-controllers/app/options"
arbv1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1"
listersv1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/client/listers/controller/v1"
Expand All @@ -29,26 +32,27 @@ import (
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/quotaplugins/util"

"io/ioutil"
"k8s.io/client-go/rest"
"k8s.io/klog/v2"
"math"
"net/http"
"net/http/httputil"
"reflect"
"strings"
"time"

"k8s.io/client-go/rest"
"k8s.io/klog/v2"
)

// QuotaManager implements a QuotaManagerInterface.
type QuotaManager struct {
url string
appwrapperLister listersv1.AppWrapperLister
preemptionEnabled bool
url string
appwrapperLister listersv1.AppWrapperLister
preemptionEnabled bool
}

type QuotaGroup struct {
GroupContext string `json:"groupcontext"`
GroupId string `json:"groupid"`
GroupContext string `json:"groupcontext"`
GroupId string `json:"groupid"`
}

type Request struct {
Expand All @@ -70,19 +74,17 @@ type QuotaResponse struct {
}

type TreeNode struct {
Allocation string `json:"allocation"`
Quota string `json:"quota"`
Name string `json:"name"`
Hard bool `json:"hard"`
Children []TreeNode `json:"children"`
Parent string `json:"parent"`
Allocation string `json:"allocation"`
Quota string `json:"quota"`
Name string `json:"name"`
Hard bool `json:"hard"`
Children []TreeNode `json:"children"`
Parent string `json:"parent"`
}


// Making sure that PriorityQueue implements SchedulingQueue.
var _ = quota.QuotaManagerInterface(&QuotaManager{})


func parseId(id string) (string, string) {
ns := ""
n := ""
Expand Down Expand Up @@ -115,24 +117,24 @@ func createId(ns string, n string) string {
}

func NewQuotaManager(dispatchedAWDemands map[string]*clusterstateapi.Resource, dispatchedAWs map[string]*arbv1.AppWrapper,
awJobLister listersv1.AppWrapperLister, config *rest.Config,
serverOptions *options.ServerOption) (*QuotaManager, error) {
awJobLister listersv1.AppWrapperLister, config *rest.Config,
serverOptions *options.ServerOption) (*QuotaManager, error) {
if serverOptions.QuotaEnabled == false {
klog.Infof("[NewQuotaManager] Quota management is not enabled.")
return nil, nil
}

qm := &QuotaManager{
url: serverOptions.QuotaRestURL,
appwrapperLister: awJobLister,
preemptionEnabled: serverOptions.Preemption,
url: serverOptions.QuotaRestURL,
appwrapperLister: awJobLister,
preemptionEnabled: serverOptions.Preemption,
}

return qm, nil
}

// Recrusive call to add names of Tree
func (qm *QuotaManager) addChildrenNodes(parentNode TreeNode, treeIDs []string) ([]string) {
func (qm *QuotaManager) addChildrenNodes(parentNode TreeNode, treeIDs []string) []string {
if len(parentNode.Children) > 0 {
for _, childNode := range parentNode.Children {
klog.V(10).Infof("[getQuotaTreeIDs] Quota tree response child node from quota mananger: %s", childNode.Name)
Expand All @@ -143,7 +145,7 @@ func (qm *QuotaManager) addChildrenNodes(parentNode TreeNode, treeIDs []string)
return treeIDs
}

func (qm *QuotaManager) getQuotaTreeIDs() ([]string) {
func (qm *QuotaManager) getQuotaTreeIDs() []string {
var treeIDs []string
// If a url does not exists then assume fits quota
if len(qm.url) < 1 {
Expand Down Expand Up @@ -173,7 +175,7 @@ func (qm *QuotaManager) getQuotaTreeIDs() ([]string) {
body, err := ioutil.ReadAll(response.Body)
if err != nil {
klog.Errorf("[getQuotaTreeIDs] Failed to read quota tree from the quota manager body: %s, error=%#v",
string(body), err)
string(body), err)
return treeIDs
}

Expand Down Expand Up @@ -205,24 +207,24 @@ func isValidQuota(quotaGroup QuotaGroup, qmTreeIDs []string) bool {
return false
}

func (qm *QuotaManager) getQuotaDesignation(aw *arbv1.AppWrapper) ([]QuotaGroup) {
func (qm *QuotaManager) getQuotaDesignation(aw *arbv1.AppWrapper) []QuotaGroup {
var groups []QuotaGroup

// Get list of quota management tree IDs
qmTreeIDs := qm.getQuotaTreeIDs()
if len(qmTreeIDs) < 1 {
klog.Warningf("[getQuotaDesignation] No valid quota management IDs found for AppWrapper Job: %s/%s",
aw.Namespace, aw.Name)
aw.Namespace, aw.Name)
}

labels := aw.GetLabels()
if ( labels != nil) {
if labels != nil {
keys := reflect.ValueOf(labels).MapKeys()
for _, key := range keys {
for _, key := range keys {
strkey := key.String()
quotaGroup := QuotaGroup{
GroupContext: strkey,
GroupId: labels[strkey],
GroupId: labels[strkey],
}
if isValidQuota(quotaGroup, qmTreeIDs) {
groups = append(groups, quotaGroup)
Expand All @@ -236,7 +238,7 @@ func (qm *QuotaManager) getQuotaDesignation(aw *arbv1.AppWrapper) ([]QuotaGroup)
}
} else {
klog.V(4).Infof("[getQuotaDesignation] AppWrapper: %s/%s does not any context quota labels.",
aw.Namespace, aw.Name)
aw.Namespace, aw.Name)
}

if len(groups) > 0 {
Expand All @@ -246,8 +248,8 @@ func (qm *QuotaManager) getQuotaDesignation(aw *arbv1.AppWrapper) ([]QuotaGroup)
klog.V(4).Infof("[getQuotaDesignation] AppWrapper: %s/%s does not have any quota labels, using default.",
aw.Namespace, aw.Name)
var defaultGroup = QuotaGroup{
GroupContext: "DEFAULTCONTEXT",
GroupId: "DEFAULT",
GroupContext: "DEFAULTCONTEXT",
GroupId: "DEFAULT",
}
groups = append(groups, defaultGroup)
}
Expand All @@ -256,7 +258,7 @@ func (qm *QuotaManager) getQuotaDesignation(aw *arbv1.AppWrapper) ([]QuotaGroup)
}

func (qm *QuotaManager) Fits(aw *arbv1.AppWrapper, awResDemands *clusterstateapi.Resource,
proposedPreemptions []*arbv1.AppWrapper) (bool, []*arbv1.AppWrapper, string) {
proposedPreemptions []*arbv1.AppWrapper) (bool, []*arbv1.AppWrapper, string) {

// Handle uninitialized quota manager
if len(qm.url) <= 0 {
Expand All @@ -271,7 +273,7 @@ func (qm *QuotaManager) Fits(aw *arbv1.AppWrapper, awResDemands *clusterstateapi
groups := qm.getQuotaDesignation(aw)
preemptable := qm.preemptionEnabled
awCPU_Demand := int(math.Trunc(awResDemands.MilliCPU))
awMem_Demand := int(math.Trunc(awResDemands.Memory)/1000000)
awMem_Demand := int(math.Trunc(awResDemands.Memory) / 1000000)
var demand []int
demand = append(demand, awCPU_Demand)
demand = append(demand, awMem_Demand)
Expand Down Expand Up @@ -331,8 +333,7 @@ func (qm *QuotaManager) Fits(aw *arbv1.AppWrapper, awResDemands *clusterstateapi
return doesFit, preemptIds, ""
}


func (qm *QuotaManager) getAppWrappers(preemptIds []string) []*arbv1.AppWrapper{
func (qm *QuotaManager) getAppWrappers(preemptIds []string) []*arbv1.AppWrapper {
var aws []*arbv1.AppWrapper
if len(preemptIds) <= 0 {
return nil
Expand Down Expand Up @@ -411,3 +412,6 @@ func (qm *QuotaManager) Release(aw *arbv1.AppWrapper) bool {

return released
}
func (qm *QuotaManager) GetValidQuotaLabels() []string {
return qm.getQuotaTreeIDs()
}
31 changes: 30 additions & 1 deletion test/e2e-kuttl/install-quota-subtree.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
---
apiVersion: ibm.com/v1
kind: QuotaSubtree
metadata:
Expand Down Expand Up @@ -54,4 +55,32 @@ spec:
hardLimit: true
requests:
cpu: 900m
memory: 300Mi
memory: 300Mi
- name: default
quotas:
hardLimit: false
requests:
cpu: 0m
memory: 0Mi
---
apiVersion: ibm.com/v1
kind: QuotaSubtree
metadata:
name: service-root-children
namespace: kube-system
labels:
tree: quota_service
spec:
parent: service-root
children:
- name: gold
quotas:
requests:
cpu: 1075m
memory: 1045Mi
- name: default
quotas:
hardLimit: false
requests:
cpu: 0m
memory: 0Mi
4 changes: 2 additions & 2 deletions test/e2e-kuttl/quota-errors/03-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ metadata:
namespace: quota-errors
labels:
quota_context: "silver"
quota_service: "service-root"
quota_service: "gold"
status:
state: Running
---
Expand All @@ -24,4 +24,4 @@ status:
observedGeneration: 1
readyReplicas: 1
replicas: 1
updatedReplicas: 1
updatedReplicas: 1
2 changes: 1 addition & 1 deletion test/e2e-kuttl/quota-errors/03-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
namespace: quota-errors
labels:
quota_context: "silver"
quota_service: "service-root"
quota_service: "gold"
spec:
resources:
GenericItems:
Expand Down
8 changes: 8 additions & 0 deletions test/e2e-kuttl/quota-forest/01-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,11 @@ metadata:
namespace: kube-system
labels:
tree: quota_context
---
apiVersion: ibm.com/v1
kind: QuotaSubtree
metadata:
name: service-root-children
namespace: kube-system
labels:
tree: quota_service
Loading