Skip to content

Commit c70ece6

Browse files
committed
fix graceful shutdown
Signed-off-by: Konrad Zawora <[email protected]>
1 parent 1ca44ba commit c70ece6

File tree

2 files changed

+20
-0
lines changed

2 files changed

+20
-0
lines changed

vllm/executor/multiproc_hpu_executor.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ def _check_executor_parameters(self):
3939
f"please ensure that world_size ({world_size}) "
4040
f"is less than than max local hpu count ({hpu_device_count})")
4141

42+
def __del__(self):
43+
self.shutdown()
44+
4245

4346
class MultiprocessingHPUExecutorAsync(MultiprocessingHPUExecutor,
4447
MultiprocessingGPUExecutorAsync):

vllm/executor/multiproc_worker_utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import vllm.envs as envs
1717
from vllm.logger import init_logger
18+
from vllm.platforms import current_platform
1819
from vllm.triton_utils.importing import HAS_TRITON
1920
from vllm.utils import cuda_is_initialized
2021

@@ -291,6 +292,22 @@ def set_multiprocessing_worker_envs(parallel_config):
291292
"VLLM_WORKER_MULTIPROC_METHOD to 'spawn'.")
292293
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
293294

295+
if (current_platform.is_hpu()
296+
and parallel_config.distributed_executor_backend == 'mp'
297+
and envs.VLLM_WORKER_MULTIPROC_METHOD == 'fork'):
298+
if os.environ.get("VLLM_WORKER_MULTIPROC_METHOD", None) is not None:
299+
logger.warning("On HPU, VLLM_WORKER_MULTIPROC_METHOD=fork might "
300+
"cause application hangs on exit. Using "
301+
"VLLM_WORKER_MULTIPROC_METHOD=fork anyway, "
302+
"as it was explicitly requested.")
303+
else:
304+
logger.warning("On HPU, VLLM_WORKER_MULTIPROC_METHOD=fork might "
305+
"cause application hangs on exit. Setting "
306+
"VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. "
307+
"To override that behavior, please set "
308+
"VLLM_WORKER_MULTIPROC_METHOD=fork explicitly.")
309+
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
310+
294311
# Configure thread parallelism if OMP_NUM_THREADS isn't set
295312
#
296313
# Helps to avoid CPU contention. The default of spawning a thread per

0 commit comments

Comments
 (0)