29
29
# yapf: enable
30
30
from vllm .logger import init_logger
31
31
from vllm .outputs import RequestOutput
32
+ from vllm .transformers_utils .config import (
33
+ maybe_register_config_serialize_by_value )
32
34
from vllm .usage .usage_lib import UsageContext
33
35
from vllm .worker .model_runner_base import InputProcessingError
34
36
@@ -42,12 +44,12 @@ class MQLLMEngine:
42
44
"""A multiprocessing wrapper for :class:`LLMEngine`.
43
45
44
46
This class is used to wrap the :class:`LLMEngine` class to enable use
45
- in concurrnet manner. It runs a background loop and uses zeromq to
47
+ in concurrnet manner. It runs a background loop and uses zeromq to
46
48
receive new requests and stream outputs incrementally via ipc.
47
-
49
+
48
50
The :class:`LLMEngine` generate or encode process is kicked off when a new
49
51
RPCProcessRequest is received by the input_socket.
50
-
52
+
51
53
The self.engine_loop checks the input_socket for new requests,
52
54
adds them to the LLMEngine if there are any, calls the internal
53
55
:class:`LLMEngine.step()`, and sends the RequestOutputs back over
@@ -428,6 +430,9 @@ def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext,
428
430
ipc_path : str , disable_log_stats : bool ,
429
431
disable_log_requests : bool , engine_alive ):
430
432
try :
433
+ # Ensure we can serialize transformer config before spawning
434
+ maybe_register_config_serialize_by_value ()
435
+
431
436
engine = MQLLMEngine .from_vllm_config (
432
437
vllm_config = vllm_config ,
433
438
usage_context = usage_context ,
0 commit comments