@@ -20,6 +20,8 @@ import (
20
20
"context"
21
21
"fmt"
22
22
"math"
23
+ "slices"
24
+ "strings"
23
25
24
26
v1 "k8s.io/api/core/v1"
25
27
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -30,13 +32,15 @@ import (
30
32
"k8s.io/kubernetes/pkg/scheduler/framework"
31
33
32
34
llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
35
+ llmazinferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
33
36
)
34
37
35
38
const (
36
39
Name = "ResourceFungibility"
37
40
stateKey = Name
38
41
39
- modelNameLabelKey = llmazcoreapi .ModelNameLabelKey
42
+ modelNameLabelKey = llmazcoreapi .ModelNameLabelKey
43
+ inferenceServiceFlavorsAnnoKey = llmazinferenceapi .InferenceServiceFlavorsAnnoKey
40
44
)
41
45
42
46
var (
@@ -151,7 +155,24 @@ func (rf *ResourceFungibility) calPreFilterState(ctx context.Context, pod *v1.Po
151
155
return nil
152
156
}
153
157
154
- for _ , f := range model .Spec .InferenceConfig .Flavors {
158
+ // By default, all flavors configuired in the model will be used. But if the given annontation is set,
159
+ // it means that the inference service overrides the default value with a subset of the model's flavors
160
+ // and the scheduler should respect the order of flavors configured in the annotation.
161
+ serviceFlavors := model .Spec .InferenceConfig .Flavors
162
+ if v , ok := pod .Annotations [inferenceServiceFlavorsAnnoKey ]; ok {
163
+ serviceFlavors = nil
164
+ for _ , flavorName := range strings .Split (v , "," ) {
165
+ idx := slices .IndexFunc (model .Spec .InferenceConfig .Flavors , func (f llmazcoreapi.Flavor ) bool {
166
+ return string (f .Name ) == flavorName
167
+ })
168
+ if idx == - 1 {
169
+ return fmt .Errorf ("flavor %q not found in model %q" , flavorName , modelName )
170
+ }
171
+ serviceFlavors = append (serviceFlavors , model .Spec .InferenceConfig .Flavors [idx ])
172
+ }
173
+ }
174
+
175
+ for _ , f := range serviceFlavors {
155
176
if len (f .NodeSelector ) == 0 {
156
177
// Once nodeSelector is empty, which means all nodes are potential candidates,
157
178
// so we'll skip the Filter stage.
0 commit comments