diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py index 2d64fd42f..99c209bfb 100644 --- a/ads/aqua/common/utils.py +++ b/ads/aqua/common/utils.py @@ -997,6 +997,44 @@ def get_container_params_type(container_type_name: str) -> str: return UNKNOWN +def get_container_env_type(container_type_name: Optional[str]) -> str: + """ + Determine the container environment type based on the container type name. + + This function matches the provided container type name against the known + values of `InferenceContainerType`. The check is case-insensitive and + allows for partial matches so that changes in container naming conventions + (e.g., prefixes or suffixes) will still be matched correctly. + + Examples: + >>> get_container_env_type("odsc-vllm-serving") + 'vllm' + >>> get_container_env_type("ODSC-TGI-Serving") + 'tgi' + >>> get_container_env_type("custom-unknown-container") + 'UNKNOWN' + + Args: + container_type_name (Optional[str]): + The deployment container type name (e.g., "odsc-vllm-serving"). + + Returns: + str: + - A matching `InferenceContainerType` value string (e.g., "VLLM", "TGI", "LLAMA-CPP"). + - `"UNKNOWN"` if no match is found or the input is empty/None. + """ + if not container_type_name: + return UNKNOWN + + needle = container_type_name.strip().casefold() + + for container_type in InferenceContainerType.values(): + if container_type and container_type.casefold() in needle: + return container_type.upper() + + return UNKNOWN + + def get_restricted_params_by_container(container_type_name: str) -> set: """The utility function accepts the deployment container type name and returns a set of restricted params for that container. diff --git a/ads/aqua/modeldeployment/config_loader.py b/ads/aqua/modeldeployment/config_loader.py index a38a28df3..3423f2739 100644 --- a/ads/aqua/modeldeployment/config_loader.py +++ b/ads/aqua/modeldeployment/config_loader.py @@ -88,6 +88,7 @@ class MultiModelConfig(Serializable): gpu_count (int, optional): Number of GPUs count to this model of this shape. parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to configure the behavior of a particular GPU shape. + env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}). """ gpu_count: Optional[int] = Field( @@ -97,6 +98,10 @@ class MultiModelConfig(Serializable): default_factory=dict, description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).", ) + env: Optional[Dict[str, Dict[str, str]]] = Field( + default_factory=dict, + description="Environment variables grouped by namespace", + ) class Config: extra = "allow" @@ -130,6 +135,7 @@ class ConfigurationItem(Serializable): configure the behavior of a particular GPU shape. multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details. shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape. + env (Dict[str, Dict[str, str]]): Environment variables grouped by namespace (e.g., "VLLM": {"VAR": "VAL"}). """ parameters: Optional[Dict[str, str]] = Field( @@ -143,6 +149,10 @@ class ConfigurationItem(Serializable): default_factory=DeploymentShapeInfo, description="The shape information to this model for specific shape", ) + env: Optional[Dict[str, Dict[str, str]]] = Field( + default_factory=dict, + description="Environment variables grouped by namespace", + ) class Config: extra = "allow" diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py index 0e38d95ed..47802c11d 100644 --- a/ads/aqua/modeldeployment/deployment.py +++ b/ads/aqua/modeldeployment/deployment.py @@ -27,6 +27,7 @@ build_pydantic_error_message, find_restricted_params, get_combined_params, + get_container_env_type, get_container_params_type, get_ocid_substring, get_params_list, @@ -1043,6 +1044,7 @@ def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig: config = self.get_config_from_metadata( model_id, AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION ).config + if config: logger.info( f"Fetched {AquaModelMetadataKeys.DEPLOYMENT_CONFIGURATION} from defined metadata for model: {model_id}." @@ -1127,7 +1129,7 @@ def get_deployment_default_params( model_id: str, instance_shape: str, gpu_count: int = None, - ) -> List[str]: + ) -> Dict: """Gets the default params set in the deployment configs for the given model and instance shape. Parameters @@ -1149,6 +1151,7 @@ def get_deployment_default_params( """ default_params = [] + default_envs = {} config_params = {} model = DataScienceModel.from_id(model_id) try: @@ -1158,16 +1161,15 @@ def get_deployment_default_params( except ValueError: container_type_key = UNKNOWN logger.debug( - f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the custom metadata field for model {model_id}." + f"{AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} key is not available in the " + f"custom metadata field for model {model_id}." ) if container_type_key: deployment_config = self.get_deployment_config(model_id) - instance_shape_config = deployment_config.configuration.get( instance_shape, ConfigurationItem() ) - if instance_shape_config.multi_model_deployment and gpu_count: gpu_params = instance_shape_config.multi_model_deployment @@ -1176,12 +1178,18 @@ def get_deployment_default_params( config_params = gpu_config.parameters.get( get_container_params_type(container_type_key), UNKNOWN ) + default_envs = instance_shape_config.env.get( + get_container_env_type(container_type_key), {} + ) break else: config_params = instance_shape_config.parameters.get( get_container_params_type(container_type_key), UNKNOWN ) + default_envs = instance_shape_config.env.get( + get_container_env_type(container_type_key), {} + ) if config_params: params_list = get_params_list(config_params) @@ -1194,7 +1202,7 @@ def get_deployment_default_params( if params.split()[0] not in restricted_params_set: default_params.append(params) - return default_params + return {"data": default_params, "env": default_envs} def validate_deployment_params( self, diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json index ac197f726..572939c9c 100644 --- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json +++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json @@ -1,20 +1,24 @@ { "configuration": { "BM.GPU.A100-v2.8": { + "env": {}, "multi_model_deployment": [ { + "env": {}, "gpu_count": 1, "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" } }, { + "env": {}, "gpu_count": 2, "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" } }, { + "env": {}, "gpu_count": 8, "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" @@ -26,6 +30,7 @@ } }, "BM.GPU.H100.8": { + "env": {}, "multi_model_deployment": [ { "gpu_count": 1 @@ -44,6 +49,7 @@ "VM.GPU.A10.2": { "multi_model_deployment": [ { + "env": {}, "gpu_count": 2, "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" @@ -52,8 +58,10 @@ ] }, "VM.GPU.A10.4": { + "env": {}, "multi_model_deployment": [ { + "env": {}, "gpu_count": 2, "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json index 824fa8541..e403f7f5c 100644 --- a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json +++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json @@ -1,6 +1,11 @@ { "configuration": { "VM.GPU.A10.4": { + "env": { + "VLLM": { + "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1" + } + }, "parameters": { "TGI_PARAMS": "--max-stop-sequences 6", "VLLM_PARAMS": "--max-model-len 4096" diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json index 8764c354b..478003bbe 100644 --- a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json +++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json @@ -1,43 +1,58 @@ { - "shape": [ - "VM.GPU.A10.1", - "VM.GPU.A10.2", - "BM.GPU.A10.4", - "BM.GPU.L40S-NC.4" - ], - "configuration": { - "VM.GPU.A10.2": { - "parameters": { - "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" - }, - "multi_model_deployment": [ - { - "gpu_count": 1 - } - ] - }, - "BM.GPU.A10.4": { - "parameters": { - "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" - }, - "multi_model_deployment": [ - { - "gpu_count": 1 - }, - { - "gpu_count": 2 - } - ] + "configuration": { + "BM.GPU.A10.4": { + "env": { + "VLLM": { + "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1" + } + }, + "multi_model_deployment": [ + { + "gpu_count": 1 }, - "BM.GPU.L40S-NC.4": { - "parameters": { - "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" - }, - "multi_model_deployment": [ - { - "gpu_count": 2 - } - ] + { + "gpu_count": 2 + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + } + }, + "BM.GPU.L40S-NC.4": { + "env": { + "VLLM": { + "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1" + } + }, + "multi_model_deployment": [ + { + "gpu_count": 2 + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + } + }, + "VM.GPU.A10.2": { + "env": { + "VLLM": { + "VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1" + } + }, + "multi_model_deployment": [ + { + "gpu_count": 1 } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + } } + }, + "shape": [ + "VM.GPU.A10.1", + "VM.GPU.A10.2", + "BM.GPU.A10.4", + "BM.GPU.L40S-NC.4" + ] } diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py index 7f129a9bd..c7ac40a71 100644 --- a/tests/unitary/with_extras/aqua/test_deployment.py +++ b/tests/unitary/with_extras/aqua/test_deployment.py @@ -12,10 +12,6 @@ from importlib import reload from unittest.mock import MagicMock, patch -from ads.aqua.modeldeployment.constants import DEFAULT_POLL_INTERVAL, DEFAULT_WAIT_TIME -from ads.model.service.oci_datascience_model_deployment import ( - OCIDataScienceModelDeployment, -) import oci import pytest from oci.data_science.models import ( @@ -46,6 +42,7 @@ ModelDeploymentConfigSummary, MultiModelDeploymentConfigLoader, ) +from ads.aqua.modeldeployment.constants import DEFAULT_POLL_INTERVAL, DEFAULT_WAIT_TIME from ads.aqua.modeldeployment.entities import ( AquaDeployment, AquaDeploymentDetail, @@ -57,6 +54,9 @@ from ads.model.datascience_model import DataScienceModel from ads.model.deployment.model_deployment import ModelDeployment from ads.model.model_metadata import ModelCustomMetadata +from ads.model.service.oci_datascience_model_deployment import ( + OCIDataScienceModelDeployment, +) from tests.unitary.with_extras.aqua.utils import ServiceManagedContainers null = None @@ -606,12 +606,14 @@ class TestDataset: "configuration": { "VM.GPU.A10.2": { "parameters": {}, + "env": {}, "multi_model_deployment": [ { "gpu_count": 2, "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" }, + "env": {}, } ], "shape_info": {"configs": [], "type": ""}, @@ -620,14 +622,16 @@ class TestDataset: "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" }, + "env": {}, "multi_model_deployment": [ { "gpu_count": 2, "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" }, + "env": {}, }, - {"gpu_count": 4, "parameters": {}}, + {"gpu_count": 4, "parameters": {}, "env": {}}, ], "shape_info": {"configs": [], "type": ""}, }, @@ -635,24 +639,28 @@ class TestDataset: "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" }, + "env": {}, "multi_model_deployment": [ { "gpu_count": 1, "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" }, + "env": {}, }, { "gpu_count": 2, "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" }, + "env": {}, }, { "gpu_count": 8, "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" }, + "env": {}, }, ], "shape_info": {"configs": [], "type": ""}, @@ -661,10 +669,11 @@ class TestDataset: "parameters": { "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" }, + "env": {}, "multi_model_deployment": [ - {"gpu_count": 1, "parameters": {}}, - {"gpu_count": 2, "parameters": {}}, - {"gpu_count": 8, "parameters": {}}, + {"gpu_count": 1, "parameters": {}, "env": {}}, + {"gpu_count": 2, "parameters": {}, "env": {}}, + {"gpu_count": 8, "parameters": {}, "env": {}}, ], "shape_info": {"configs": [], "type": ""}, }, @@ -1939,6 +1948,7 @@ def test_create_deployment_for_multi_model( 2, ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], ["--max-model-len 4096", "--trust-remote-code"], + {"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"}, ), ( "VLLM_PARAMS", @@ -1946,20 +1956,16 @@ def test_create_deployment_for_multi_model( None, ["--max-model-len 4096"], ["--max-model-len 4096"], + {"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"}, ), - ( - "TGI_PARAMS", - "odsc-tgi-serving", - 1, - [], - [], - ), + ("TGI_PARAMS", "odsc-tgi-serving", 1, [], [], {}), ( "CUSTOM_PARAMS", "custom-container-key", None, ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], + {}, ), ] ) @@ -1971,6 +1977,7 @@ def test_get_deployment_default_params( gpu_count, params, allowed_params, + deployment_env, mock_from_id, ): """Test for fetching config details for a given deployment.""" @@ -1980,6 +1987,7 @@ def test_get_deployment_default_params( ) with open(config_json, "r") as _file: config = json.load(_file) + # update config params for testing if gpu_count: # build field for multi_model_deployment @@ -1989,6 +1997,7 @@ def test_get_deployment_default_params( { "gpu_count": gpu_count, "parameters": {container_params_field: " ".join(params)}, + # "env": deployment_env } ] else: @@ -2014,9 +2023,10 @@ def test_get_deployment_default_params( ) if container_params_field in ("CUSTOM_PARAMS", "TGI_PARAMS"): - assert result == [] + assert result == {"data": [], "env": {}} else: - assert result == allowed_params + assert result["data"] == allowed_params + assert result["env"] == deployment_env @parameterized.expand( [