diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md index 4ab41a21c2a1..74937a184227 100644 --- a/docs/source/getting_started/installation/gpu/xpu.inc.md +++ b/docs/source/getting_started/installation/gpu/xpu.inc.md @@ -66,7 +66,6 @@ XPU platform supports **tensor parallel** inference/serving and also supports ** python -m vllm.entrypoints.openai.api_server \ --model=facebook/opt-13b \ --dtype=bfloat16 \ - --device=xpu \ --max_model_len=1024 \ --distributed-executor-backend=ray \ --pipeline-parallel-size=2 \ diff --git a/vllm/config.py b/vllm/config.py index 5382e9a16829..3fa1db0e8390 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2201,7 +2201,11 @@ class DeviceConfig: """Configuration for the device to use for vLLM execution.""" device: Union[Device, torch.device] = "auto" - """Device type for vLLM execution.""" + """Device type for vLLM execution. + This parameter is deprecated and will be + removed in a future release. + It will now be set automatically based + on the current platform.""" device_type: str = field(init=False) """Device type from the current platform. This is set in `__post_init__`.""" diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index f0c6b15b79da..91a34cb4dd59 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -737,7 +737,9 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: title="DeviceConfig", description=DeviceConfig.__doc__, ) - device_group.add_argument("--device", **device_kwargs["device"]) + device_group.add_argument("--device", + **device_kwargs["device"], + deprecated=True) # Speculative arguments speculative_group = parser.add_argument_group( @@ -977,7 +979,7 @@ def create_engine_config( from vllm.platforms import current_platform current_platform.pre_register_and_update() - device_config = DeviceConfig(device=self.device) + device_config = DeviceConfig(device=current_platform.device_type) model_config = self.create_model_config() # * If VLLM_USE_V1 is unset, we enable V1 for "supported features"