Skip to content

Commit 4ba4b0b

Browse files
committed
Update resource fungibility plugin with flavor filtering
Signed-off-by: carlory <[email protected]>
1 parent 0c2aa6d commit 4ba4b0b

File tree

3 files changed

+27
-3
lines changed

3 files changed

+27
-3
lines changed

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ module github.com/inftyai/scheduler
33
go 1.24.1
44

55
require (
6-
github.com/google/go-cmp v0.7.0
76
github.com/inftyai/llmaz v0.1.3
87
k8s.io/api v0.33.1
98
k8s.io/apimachinery v0.33.1
@@ -39,6 +38,7 @@ require (
3938
github.com/google/btree v1.1.3 // indirect
4039
github.com/google/cel-go v0.23.2 // indirect
4140
github.com/google/gnostic-models v0.6.9 // indirect
41+
github.com/google/go-cmp v0.7.0 // indirect
4242
github.com/google/uuid v1.6.0 // indirect
4343
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
4444
github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 // indirect
@@ -109,6 +109,7 @@ require (
109109
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
110110
sigs.k8s.io/controller-runtime v0.20.3 // indirect
111111
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
112+
sigs.k8s.io/lws v0.5.1 // indirect
112113
sigs.k8s.io/randfill v1.0.0 // indirect
113114
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect
114115
sigs.k8s.io/yaml v1.4.0 // indirect

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,8 @@ sigs.k8s.io/controller-runtime v0.20.3 h1:I6Ln8JfQjHH7JbtCD2HCYHoIzajoRxPNuvhvcD
319319
sigs.k8s.io/controller-runtime v0.20.3/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY=
320320
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
321321
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
322+
sigs.k8s.io/lws v0.5.1 h1:eaeMNkP0manRluQZLN32atoULaGrzP611gSLdFaHZs4=
323+
sigs.k8s.io/lws v0.5.1/go.mod h1:qprXSTTFnfmPZY3V3sUfk6ZPmAodsdoKS8XVElJ9kN0=
322324
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
323325
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
324326
sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=

pkg/plugins/resource_fungibility/resource_fungibility.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import (
2020
"context"
2121
"fmt"
2222
"math"
23+
"slices"
24+
"strings"
2325

2426
v1 "k8s.io/api/core/v1"
2527
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -30,13 +32,15 @@ import (
3032
"k8s.io/kubernetes/pkg/scheduler/framework"
3133

3234
llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
35+
llmazinferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
3336
)
3437

3538
const (
3639
Name = "ResourceFungibility"
3740
stateKey = Name
3841

39-
modelNameLabelKey = llmazcoreapi.ModelNameLabelKey
42+
modelNameLabelKey = llmazcoreapi.ModelNameLabelKey
43+
inferenceServiceFlavorsAnnoKey = llmazinferenceapi.InferenceServiceFlavorsAnnoKey
4044
)
4145

4246
var (
@@ -151,7 +155,24 @@ func (rf *ResourceFungibility) calPreFilterState(ctx context.Context, pod *v1.Po
151155
return nil
152156
}
153157

154-
for _, f := range model.Spec.InferenceConfig.Flavors {
158+
// By default, all flavors configuired in the model will be used. But if the given annontation is set,
159+
// it means that the inference service overrides the default value with a subset of the model's flavors
160+
// and the scheduler should respect the order of flavors configured in the annotation.
161+
serviceFlavors := model.Spec.InferenceConfig.Flavors
162+
if v, ok := pod.Annotations[inferenceServiceFlavorsAnnoKey]; ok {
163+
serviceFlavors = nil
164+
for _, flavorName := range strings.Split(v, ",") {
165+
idx := slices.IndexFunc(model.Spec.InferenceConfig.Flavors, func(f llmazcoreapi.Flavor) bool {
166+
return string(f.Name) == flavorName
167+
})
168+
if idx == -1 {
169+
return fmt.Errorf("flavor %q not found in model %q", flavorName, modelName)
170+
}
171+
serviceFlavors = append(serviceFlavors, model.Spec.InferenceConfig.Flavors[idx])
172+
}
173+
}
174+
175+
for _, f := range serviceFlavors {
155176
if len(f.NodeSelector) == 0 {
156177
// Once nodeSelector is empty, which means all nodes are potential candidates,
157178
// so we'll skip the Filter stage.

0 commit comments

Comments
 (0)