Skip to content

[AQUA][GPT-OSS] Add Shape-Specific Env Config for GPT-OSS Models in AQUA Deployment Config Reader #1244

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions ads/aqua/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,44 @@ def get_container_params_type(container_type_name: str) -> str:
return UNKNOWN


def get_container_env_type(container_type_name: Optional[str]) -> str:
"""
Determine the container environment type based on the container type name.

This function matches the provided container type name against the known
values of `InferenceContainerType`. The check is case-insensitive and
allows for partial matches so that changes in container naming conventions
(e.g., prefixes or suffixes) will still be matched correctly.

Examples:
>>> get_container_env_type("odsc-vllm-serving")
'vllm'
>>> get_container_env_type("ODSC-TGI-Serving")
'tgi'
>>> get_container_env_type("custom-unknown-container")
'UNKNOWN'

Args:
container_type_name (Optional[str]):
The deployment container type name (e.g., "odsc-vllm-serving").

Returns:
str:
- A matching `InferenceContainerType` value string (e.g., "VLLM", "TGI", "LLAMA-CPP").
- `"UNKNOWN"` if no match is found or the input is empty/None.
"""
if not container_type_name:
return UNKNOWN

needle = container_type_name.strip().casefold()

for container_type in InferenceContainerType.values():
if container_type and container_type.casefold() in needle:
return container_type.upper()

return UNKNOWN


def get_restricted_params_by_container(container_type_name: str) -> set:
"""The utility function accepts the deployment container type name and returns a set of restricted params
for that container.
Expand Down
10 changes: 10 additions & 0 deletions ads/aqua/modeldeployment/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ class MultiModelConfig(Serializable):
gpu_count (int, optional): Number of GPUs count to this model of this shape.
parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
configure the behavior of a particular GPU shape.
env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
"""

gpu_count: Optional[int] = Field(
Expand All @@ -97,6 +98,10 @@ class MultiModelConfig(Serializable):
default_factory=dict,
description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
)
env: Optional[Dict[str, Dict[str, str]]] = Field(
default_factory=dict,
description="Environment variables grouped by namespace",
)

class Config:
extra = "allow"
Expand Down Expand Up @@ -130,6 +135,7 @@ class ConfigurationItem(Serializable):
configure the behavior of a particular GPU shape.
multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}).
"""

parameters: Optional[Dict[str, str]] = Field(
Expand All @@ -143,6 +149,10 @@ class ConfigurationItem(Serializable):
default_factory=DeploymentShapeInfo,
description="The shape information to this model for specific shape",
)
env: Optional[Dict[str, Dict[str, str]]] = Field(
default_factory=dict,
description="Environment variables grouped by namespace",
)

class Config:
extra = "allow"
Expand Down
18 changes: 13 additions & 5 deletions ads/aqua/modeldeployment/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
build_pydantic_error_message,
find_restricted_params,
get_combined_params,
get_container_env_type,
get_container_params_type,
get_ocid_substring,
get_params_list,
Expand Down Expand Up @@ -1043,6 +1044,7 @@ def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
config = self.get_config_from_metadata(
model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION
).config

if config:
logger.info(
f"Fetched {AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION} from defined metadata for model: {model_id}."
Expand Down Expand Up @@ -1127,7 +1129,7 @@ def get_deployment_default_params(
model_id: str,
instance_shape: str,
gpu_count: int = None,
) -> List[str]:
) -> Dict:
"""Gets the default params set in the deployment configs for the given model and instance shape.

Parameters
Expand All @@ -1149,6 +1151,7 @@ def get_deployment_default_params(

"""
default_params = []
default_envs = {}
config_params = {}
model = DataScienceModel.from_id(model_id)
try:
Expand All @@ -1158,16 +1161,15 @@ def get_deployment_default_params(
except ValueError:
container_type_key = UNKNOWN
logger.debug(
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field for model {model_id}."
f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the "
f"custom metadata field for model {model_id}."
)

if container_type_key:
deployment_config = self.get_deployment_config(model_id)

instance_shape_config = deployment_config.configuration.get(
instance_shape, ConfigurationItem()
)

if instance_shape_config.multi_model_deployment and gpu_count:
gpu_params = instance_shape_config.multi_model_deployment

Expand All @@ -1176,12 +1178,18 @@ def get_deployment_default_params(
config_params = gpu_config.parameters.get(
get_container_params_type(container_type_key), UNKNOWN
)
default_envs = instance_shape_config.env.get(
get_container_env_type(container_type_key), {}
)
break

else:
config_params = instance_shape_config.parameters.get(
get_container_params_type(container_type_key), UNKNOWN
)
default_envs = instance_shape_config.env.get(
get_container_env_type(container_type_key), {}
)

if config_params:
params_list = get_params_list(config_params)
Expand All @@ -1194,7 +1202,7 @@ def get_deployment_default_params(
if params.split()[0] not in restricted_params_set:
default_params.append(params)

return default_params
return {"data": default_params, "env": default_envs}

def validate_deployment_params(
self,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
{
"configuration": {
"BM.GPU.A100-v2.8": {
"env": {},
"multi_model_deployment": [
{
"env": {},
"gpu_count": 1,
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
}
},
{
"env": {},
"gpu_count": 2,
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
}
},
{
"env": {},
"gpu_count": 8,
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
Expand All @@ -26,6 +30,7 @@
}
},
"BM.GPU.H100.8": {
"env": {},
"multi_model_deployment": [
{
"gpu_count": 1
Expand All @@ -44,6 +49,7 @@
"VM.GPU.A10.2": {
"multi_model_deployment": [
{
"env": {},
"gpu_count": 2,
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
Expand All @@ -52,8 +58,10 @@
]
},
"VM.GPU.A10.4": {
"env": {},
"multi_model_deployment": [
{
"env": {},
"gpu_count": 2,
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
{
"configuration": {
"VM.GPU.A10.4": {
"env": {
"VLLM": {
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
}
},
"parameters": {
"TGI_PARAMS": "--max-stop-sequences 6",
"VLLM_PARAMS": "--max-model-len 4096"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,43 +1,58 @@
{
"shape": [
"VM.GPU.A10.1",
"VM.GPU.A10.2",
"BM.GPU.A10.4",
"BM.GPU.L40S-NC.4"
],
"configuration": {
"VM.GPU.A10.2": {
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
},
"multi_model_deployment": [
{
"gpu_count": 1
}
]
},
"BM.GPU.A10.4": {
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
},
"multi_model_deployment": [
{
"gpu_count": 1
},
{
"gpu_count": 2
}
]
"configuration": {
"BM.GPU.A10.4": {
"env": {
"VLLM": {
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
}
},
"multi_model_deployment": [
{
"gpu_count": 1
},
"BM.GPU.L40S-NC.4": {
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
},
"multi_model_deployment": [
{
"gpu_count": 2
}
]
{
"gpu_count": 2
}
],
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
}
},
"BM.GPU.L40S-NC.4": {
"env": {
"VLLM": {
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
}
},
"multi_model_deployment": [
{
"gpu_count": 2
}
],
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
}
},
"VM.GPU.A10.2": {
"env": {
"VLLM": {
"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"
}
},
"multi_model_deployment": [
{
"gpu_count": 1
}
],
"parameters": {
"VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
}
}
},
"shape": [
"VM.GPU.A10.1",
"VM.GPU.A10.2",
"BM.GPU.A10.4",
"BM.GPU.L40S-NC.4"
]
}
Loading