-
-
Notifications
You must be signed in to change notification settings - Fork 10.6k
Closed as not planned
Labels
bugSomething isn't workingSomething isn't workingstaleOver 90 days of inactivityOver 90 days of inactivity
Description
Your current environment
The output of `python collect_env.py`
Your output of `python collect_env.py` here
Model Input Dumps
model="Qwen/Qwen2.5-72B-Instruct"
guided_decoding_backend="outlines"
vllm_command_flags={
"--gpu-memory-utilization": 0.99,
"--max-num-seqs": 64,
}
max_model_len=8192,
library_overrides={
"vllm": "vllm==0.6.1.post2",
}
🐛 Describe the bug
ERROR 09-24 16:06:51 async_llm_engine.py:58] Engine background task failed
ERROR 09-24 16:06:51 async_llm_engine.py:58] Traceback (most recent call last):
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/worker/model_runner_base.py", line 112, in _wrapper
ERROR 09-24 16:06:51 async_llm_engine.py:58] return func(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/worker/model_runner.py", line 1546, in execute_model
ERROR 09-24 16:06:51 async_llm_engine.py:58] hidden_or_intermediate_states = model_executable(
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
ERROR 09-24 16:06:51 async_llm_engine.py:58] return self._call_impl(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
ERROR 09-24 16:06:51 async_llm_engine.py:58] return forward_call(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/models/qwen2.py", line 361, in forward
ERROR 09-24 16:06:51 async_llm_engine.py:58] hidden_states = self.model(input_ids, positions, kv_caches,
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
ERROR 09-24 16:06:51 async_llm_engine.py:58] return self._call_impl(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
ERROR 09-24 16:06:51 async_llm_engine.py:58] return forward_call(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/models/qwen2.py", line 277, in forward
ERROR 09-24 16:06:51 async_llm_engine.py:58] hidden_states, residual = layer(
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
ERROR 09-24 16:06:51 async_llm_engine.py:58] return self._call_impl(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
ERROR 09-24 16:06:51 async_llm_engine.py:58] return forward_call(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/models/qwen2.py", line 210, in forward
ERROR 09-24 16:06:51 async_llm_engine.py:58] hidden_states = self.self_attn(
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
ERROR 09-24 16:06:51 async_llm_engine.py:58] return self._call_impl(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
ERROR 09-24 16:06:51 async_llm_engine.py:58] return forward_call(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/models/qwen2.py", line 154, in forward
ERROR 09-24 16:06:51 async_llm_engine.py:58] qkv, _ = self.qkv_proj(hidden_states)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
ERROR 09-24 16:06:51 async_llm_engine.py:58] return self._call_impl(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
ERROR 09-24 16:06:51 async_llm_engine.py:58] return forward_call(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py", line 367, in forward
ERROR 09-24 16:06:51 async_llm_engine.py:58] output_parallel = self.quant_method.apply(self, input_, bias)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py", line 135, in apply
ERROR 09-24 16:06:51 async_llm_engine.py:58] return F.linear(x, layer.weight, bias)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`
ERROR 09-24 16:06:51 async_llm_engine.py:58]
ERROR 09-24 16:06:51 async_llm_engine.py:58] During handling of the above exception, another exception occurred:
ERROR 09-24 16:06:51 async_llm_engine.py:58]
ERROR 09-24 16:06:51 async_llm_engine.py:58] Traceback (most recent call last):
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 48, in _log_task_completion
ERROR 09-24 16:06:51 async_llm_engine.py:58] return_value = task.result()
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 733, in run_engine_loop
ERROR 09-24 16:06:51 async_llm_engine.py:58] result = task.result()
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 673, in engine_step
ERROR 09-24 16:06:51 async_llm_engine.py:58] request_outputs = await self.engine.step_async(virtual_engine)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 340, in step_async
ERROR 09-24 16:06:51 async_llm_engine.py:58] outputs = await self.model_executor.execute_model_async(
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/executor/distributed_gpu_executor.py", line 177, in execute_model_async
ERROR 09-24 16:06:51 async_llm_engine.py:58] return await self._driver_execute_model_async(execute_model_req)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/executor/multiproc_gpu_executor.py", line 231, in _driver_execute_model_async
ERROR 09-24 16:06:51 async_llm_engine.py:58] return await self.driver_exec_model(execute_model_req)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/usr/lib/python3.11/concurrent/futures/thread.py", line 58, in run
ERROR 09-24 16:06:51 async_llm_engine.py:58] result = self.fn(*self.args, **self.kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/worker/worker_base.py", line 327, in execute_model
ERROR 09-24 16:06:51 async_llm_engine.py:58] output = self.model_runner.execute_model(
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
ERROR 09-24 16:06:51 async_llm_engine.py:58] return func(*args, **kwargs)
ERROR 09-24 16:06:51 async_llm_engine.py:58] ^^^^^^^^^^^^^^^^^^^^^
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/worker/model_runner_base.py", line 125, in _wrapper
ERROR 09-24 16:06:51 async_llm_engine.py:58] pickle.dump(dumped_inputs, filep)
ERROR 09-24 16:06:51 async_llm_engine.py:58] File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 563, in __reduce__
ERROR 09-24 16:06:51 async_llm_engine.py:58] raise RuntimeError("LLMEngine should not be pickled!")
ERROR 09-24 16:06:51 async_llm_engine.py:58] RuntimeError: LLMEngine should not be pickled!
Exception in callback functools.partial(<function _log_task_completion at 0x7f0fd60ebb00>, error_callback=<bound method AsyncLLMEngine._error_callback of <vllm.engine.async_llm_engine.AsyncLLMEngine object at 0x7f0fce97a590>>)
handle: <Handle functools.partial(<function _log_task_completion at 0x7f0fd60ebb00>, error_callback=<bound method AsyncLLMEngine._error_callback of <vllm.engine.async_llm_engine.AsyncLLMEngine object at 0x7f0fce97a590>>)>
Traceback (most recent call last):
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/worker/model_runner_base.py", line 112, in _wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/worker/model_runner.py", line 1546, in execute_model
hidden_or_intermediate_states = model_executable(
^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/models/qwen2.py", line 361, in forward
hidden_states = self.model(input_ids, positions, kv_caches,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/models/qwen2.py", line 277, in forward
hidden_states, residual = layer(
^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/models/qwen2.py", line 210, in forward
hidden_states = self.self_attn(
^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/models/qwen2.py", line 154, in forward
qkv, _ = self.qkv_proj(hidden_states)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py", line 367, in forward
output_parallel = self.quant_method.apply(self, input_, bias)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py", line 135, in apply
return F.linear(x, layer.weight, bias)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 48, in _log_task_completion
return_value = task.result()
^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 733, in run_engine_loop
result = task.result()
^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 673, in engine_step
request_outputs = await self.engine.step_async(virtual_engine)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 340, in step_async
outputs = await self.model_executor.execute_model_async(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/executor/distributed_gpu_executor.py", line 177, in execute_model_async
return await self._driver_execute_model_async(execute_model_req)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/executor/multiproc_gpu_executor.py", line 231, in _driver_execute_model_async
return await self.driver_exec_model(execute_model_req)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.11/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/worker/worker_base.py", line 327, in execute_model
output = self.model_runner.execute_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/worker/model_runner_base.py", line 125, in _wrapper
pickle.dump(dumped_inputs, filep)
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 563, in __reduce__
raise RuntimeError("LLMEngine should not be pickled!")
RuntimeError: LLMEngine should not be pickled!
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "uvloop/cbhandles.pyx", line 63, in uvloop.loop.Handle._run
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-0b16fcdd-4fa2-42af-bf13-918419d0b49a/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 60, in _log_task_completion
raise AsyncEngineDeadError(
vllm.engine.async_llm_engine.AsyncEngineDeadError: Task finished unexpectedly. This should never happen! Please open an issue on Github. See stack trace above for the actual cause.
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't workingstaleOver 90 days of inactivityOver 90 days of inactivity