|
15 | 15 |
|
16 | 16 | import vllm.envs as envs
|
17 | 17 | from vllm.logger import init_logger
|
| 18 | +from vllm.platforms import current_platform |
18 | 19 | from vllm.triton_utils.importing import HAS_TRITON
|
19 | 20 | from vllm.utils import cuda_is_initialized
|
20 | 21 |
|
@@ -291,6 +292,22 @@ def set_multiprocessing_worker_envs(parallel_config):
|
291 | 292 | "VLLM_WORKER_MULTIPROC_METHOD to 'spawn'.")
|
292 | 293 | os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
293 | 294 |
|
| 295 | + if (current_platform.is_hpu() |
| 296 | + and parallel_config.distributed_executor_backend == 'mp' |
| 297 | + and envs.VLLM_WORKER_MULTIPROC_METHOD == 'fork'): |
| 298 | + if os.environ.get("VLLM_WORKER_MULTIPROC_METHOD", None) is not None: |
| 299 | + logger.warning("On HPU, VLLM_WORKER_MULTIPROC_METHOD=fork might " |
| 300 | + "cause application hangs on exit. Using " |
| 301 | + "VLLM_WORKER_MULTIPROC_METHOD=fork anyway, " |
| 302 | + "as it was explicitly requested.") |
| 303 | + else: |
| 304 | + logger.warning("On HPU, VLLM_WORKER_MULTIPROC_METHOD=fork might " |
| 305 | + "cause application hangs on exit. Setting " |
| 306 | + "VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. " |
| 307 | + "To override that behavior, please set " |
| 308 | + "VLLM_WORKER_MULTIPROC_METHOD=fork explicitly.") |
| 309 | + os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" |
| 310 | + |
294 | 311 | # Configure thread parallelism if OMP_NUM_THREADS isn't set
|
295 | 312 | #
|
296 | 313 | # Helps to avoid CPU contention. The default of spawning a thread per
|
|
0 commit comments