Skip to content

Commit d82b948

Browse files
committed
[Bugfix] Register serializers for V0 MQ Engine (#15009)
Signed-off-by: simon-mo <[email protected]>
1 parent be13281 commit d82b948

File tree

2 files changed

+13
-3
lines changed

2 files changed

+13
-3
lines changed

vllm/engine/multiprocessing/engine.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
# yapf: enable
3030
from vllm.logger import init_logger
3131
from vllm.outputs import RequestOutput
32+
from vllm.transformers_utils.config import (
33+
maybe_register_config_serialize_by_value)
3234
from vllm.usage.usage_lib import UsageContext
3335
from vllm.worker.model_runner_base import InputProcessingError
3436

@@ -42,12 +44,12 @@ class MQLLMEngine:
4244
"""A multiprocessing wrapper for :class:`LLMEngine`.
4345
4446
This class is used to wrap the :class:`LLMEngine` class to enable use
45-
in concurrnet manner. It runs a background loop and uses zeromq to
47+
in concurrnet manner. It runs a background loop and uses zeromq to
4648
receive new requests and stream outputs incrementally via ipc.
47-
49+
4850
The :class:`LLMEngine` generate or encode process is kicked off when a new
4951
RPCProcessRequest is received by the input_socket.
50-
52+
5153
The self.engine_loop checks the input_socket for new requests,
5254
adds them to the LLMEngine if there are any, calls the internal
5355
:class:`LLMEngine.step()`, and sends the RequestOutputs back over
@@ -428,6 +430,9 @@ def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
428430
ipc_path: str, disable_log_stats: bool,
429431
disable_log_requests: bool, engine_alive):
430432
try:
433+
# Ensure we can serialize transformer config before spawning
434+
maybe_register_config_serialize_by_value()
435+
431436
engine = MQLLMEngine.from_vllm_config(
432437
vllm_config=vllm_config,
433438
usage_context=usage_context,

vllm/entrypoints/openai/api_server.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@
8282
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
8383
from vllm.entrypoints.utils import load_aware_call, with_cancellation
8484
from vllm.logger import init_logger
85+
from vllm.transformers_utils.config import (
86+
maybe_register_config_serialize_by_value)
8587
from vllm.usage.usage_lib import UsageContext
8688
from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
8789
is_valid_ipv6_address, set_ulimit)
@@ -221,6 +223,9 @@ async def build_async_engine_client_from_engine_args(
221223
# so we need to spawn a new process
222224
context = multiprocessing.get_context("spawn")
223225

226+
# Ensure we can serialize transformer config before spawning
227+
maybe_register_config_serialize_by_value()
228+
224229
# The Process can raise an exception during startup, which may
225230
# not actually result in an exitcode being reported. As a result
226231
# we use a shared variable to communicate the information.

0 commit comments

Comments
 (0)