diff --git a/go.mod b/go.mod index acb5cd1..85dde11 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,6 @@ module github.com/inftyai/scheduler go 1.24.1 require ( - github.com/google/go-cmp v0.7.0 github.com/inftyai/llmaz v0.1.3 k8s.io/api v0.33.1 k8s.io/apimachinery v0.33.1 @@ -39,6 +38,7 @@ require ( github.com/google/btree v1.1.3 // indirect github.com/google/cel-go v0.23.2 // indirect github.com/google/gnostic-models v0.6.9 // indirect + github.com/google/go-cmp v0.7.0 // indirect github.com/google/uuid v1.6.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 // indirect @@ -109,6 +109,7 @@ require ( sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect sigs.k8s.io/controller-runtime v0.20.3 // indirect sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect + sigs.k8s.io/lws v0.5.1 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect sigs.k8s.io/yaml v1.4.0 // indirect diff --git a/go.sum b/go.sum index 6d787bf..d5bcb67 100644 --- a/go.sum +++ b/go.sum @@ -319,6 +319,8 @@ sigs.k8s.io/controller-runtime v0.20.3 h1:I6Ln8JfQjHH7JbtCD2HCYHoIzajoRxPNuvhvcD sigs.k8s.io/controller-runtime v0.20.3/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= +sigs.k8s.io/lws v0.5.1 h1:eaeMNkP0manRluQZLN32atoULaGrzP611gSLdFaHZs4= +sigs.k8s.io/lws v0.5.1/go.mod h1:qprXSTTFnfmPZY3V3sUfk6ZPmAodsdoKS8XVElJ9kN0= sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= diff --git a/pkg/plugins/resource_fungibility/resource_fungibility.go b/pkg/plugins/resource_fungibility/resource_fungibility.go index 2473b0f..9d80da1 100644 --- a/pkg/plugins/resource_fungibility/resource_fungibility.go +++ b/pkg/plugins/resource_fungibility/resource_fungibility.go @@ -20,6 +20,8 @@ import ( "context" "fmt" "math" + "slices" + "strings" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -30,13 +32,15 @@ import ( "k8s.io/kubernetes/pkg/scheduler/framework" llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1" + llmazinferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1" ) const ( Name = "ResourceFungibility" stateKey = Name - modelNameLabelKey = llmazcoreapi.ModelNameLabelKey + modelNameLabelKey = llmazcoreapi.ModelNameLabelKey + inferenceServiceFlavorsAnnoKey = llmazinferenceapi.InferenceServiceFlavorsAnnoKey ) var ( @@ -151,7 +155,24 @@ func (rf *ResourceFungibility) calPreFilterState(ctx context.Context, pod *v1.Po return nil } - for _, f := range model.Spec.InferenceConfig.Flavors { + // By default, all flavors configuired in the model will be used. But if the given annontation is set, + // it means that the inference service overrides the default value with a subset of the model's flavors + // and the scheduler should respect the order of flavors configured in the annotation. + serviceFlavors := model.Spec.InferenceConfig.Flavors + if v, ok := pod.Annotations[inferenceServiceFlavorsAnnoKey]; ok { + serviceFlavors = nil + for _, flavorName := range strings.Split(v, ",") { + idx := slices.IndexFunc(model.Spec.InferenceConfig.Flavors, func(f llmazcoreapi.Flavor) bool { + return string(f.Name) == flavorName + }) + if idx == -1 { + return fmt.Errorf("flavor %q not found in model %q", flavorName, modelName) + } + serviceFlavors = append(serviceFlavors, model.Spec.InferenceConfig.Flavors[idx]) + } + } + + for _, f := range serviceFlavors { if len(f.NodeSelector) == 0 { // Once nodeSelector is empty, which means all nodes are potential candidates, // so we'll skip the Filter stage.