-
-
Notifications
You must be signed in to change notification settings - Fork 10.6k
Closed
Labels
usageHow to use vllmHow to use vllm
Description
Your current environment
Is there a way to work with vllm/vllm-openai docker image having a rtx 5090? Getting a lot of errors.
vllm_server | ERROR 04-15 02:01:15 [core.py:387] EngineCore hit an exception: Traceback (most recent call last):
vllm_server | ERROR 04-15 02:01:15 [core.py:387] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 378, in run_engine_core
vllm_server | ERROR 04-15 02:01:15 [core.py:387] engine_core = EngineCoreProc(*args, **kwargs)
vllm_server | ERROR 04-15 02:01:15 [core.py:387] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm_server | ERROR 04-15 02:01:15 [core.py:387] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 320, in __init__
vllm_server | ERROR 04-15 02:01:15 [core.py:387] super().__init__(vllm_config, executor_class, log_stats)
vllm_server | ERROR 04-15 02:01:15 [core.py:387] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 67, in __init__
vllm_server | ERROR 04-15 02:01:15 [core.py:387] self.model_executor = executor_class(vllm_config)
vllm_server | ERROR 04-15 02:01:15 [core.py:387] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm_server | ERROR 04-15 02:01:15 [core.py:387] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/executor_base.py", line 52, in __init__
vllm_server | ERROR 04-15 02:01:15 [core.py:387] self._init_executor()
vllm_server | ERROR 04-15 02:01:15 [core.py:387] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 46, in _init_executor
vllm_server | ERROR 04-15 02:01:15 [core.py:387] self.collective_rpc("init_device")
vllm_server | ERROR 04-15 02:01:15 [core.py:387] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
vllm_server | ERROR 04-15 02:01:15 [core.py:387] answer = run_method(self.driver_worker, method, args, kwargs)
vllm_server | ERROR 04-15 02:01:15 [core.py:387] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm_server | ERROR 04-15 02:01:15 [core.py:387] File "/usr/local/lib/python3.12/dist-packages/vllm/utils.py", line 2378, in run_method
vllm_server | ERROR 04-15 02:01:15 [core.py:387] return func(*args, **kwargs)
vllm_server | ERROR 04-15 02:01:15 [core.py:387] ^^^^^^^^^^^^^^^^^^^^^
vllm_server | ERROR 04-15 02:01:15 [core.py:387] File "/usr/local/lib/python3.12/dist-packages/vllm/worker/worker_base.py", line 604, in init_device
vllm_server | ERROR 04-15 02:01:15 [core.py:387] self.worker.init_device() # type: ignore
vllm_server | ERROR 04-15 02:01:15 [core.py:387] ^^^^^^^^^^^^^^^^^^^^^^^^^
vllm_server | ERROR 04-15 02:01:15 [core.py:387] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_worker.py", line 103, in init_device
vllm_server | ERROR 04-15 02:01:15 [core.py:387] torch.cuda.set_device(self.device)
vllm_server | ERROR 04-15 02:01:15 [core.py:387] File "/usr/local/lib/python3.12/dist-packages/torch/cuda/__init__.py", line 476, in set_device
vllm_server | ERROR 04-15 02:01:15 [core.py:387] torch._C._cuda_setDevice(device)
vllm_server | ERROR 04-15 02:01:15 [core.py:387] File "/usr/local/lib/python3.12/dist-packages/torch/cuda/__init__.py", line 319, in _lazy_init
vllm_server | ERROR 04-15 02:01:15 [core.py:387] torch._C._cuda_init()
vllm_server | ERROR 04-15 02:01:15 [core.py:387] RuntimeError: Unexpected error from cudaGetDeviceCount(). Did you run some cuda functions before calling NumCudaDevices() that might have already set an error? Error 500: named symbol not found
vllm_server | ERROR 04-15 02:01:15 [core.py:387]
vllm_server | CRITICAL 04-15 02:01:15 [core_client.py:359] Got fatal signal from worker processes, shutting down. See stack trace above for root cause issue.
vllm_server | Traceback (most recent call last):
vllm_server | File "<frozen runpy>", line 198, in _run_module_as_main
vllm_server | File "<frozen runpy>", line 88, in _run_code
vllm_server | File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 1121, in <module>
vllm_server | uvloop.run(run_server(args))
vllm_server | File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 109, in run
vllm_server | return __asyncio.run(
vllm_server | ^^^^^^^^^^^^^^
vllm_server | File "/usr/lib/python3.12/asyncio/runners.py", line 195, in run
vllm_server | return runner.run(main)
vllm_server | ^^^^^^^^^^^^^^^^
vllm_server | File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
vllm_server | return self._loop.run_until_complete(task)
vllm_server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm_server | File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
vllm_server | File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 61, in wrapper
vllm_server | return await main
vllm_server | ^^^^^^^^^^
vllm_server | File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 1069, in run_server
vllm_server | async with build_async_engine_client(args) as engine_client:
vllm_server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm_server | File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__
vllm_server | return await anext(self.gen)
vllm_server | ^^^^^^^^^^^^^^^^^^^^^
vllm_server | File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 146, in build_async_engine_client
vllm_server | async with build_async_engine_client_from_engine_args(
vllm_server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm_server | File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__
vllm_server | return await anext(self.gen)
vllm_server | ^^^^^^^^^^^^^^^^^^^^^
vllm_server | File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 178, in build_async_engine_client_from_engine_args
vllm_server | async_llm = AsyncLLM.from_vllm_config(
vllm_server | ^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm_server | File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/async_llm.py", line 136, in from_vllm_config
vllm_server | return cls(
vllm_server | ^^^^
vllm_server | File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/async_llm.py", line 102, in __init__
vllm_server | self.engine_core = EngineCoreClient.make_client(
vllm_server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm_server | File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core_client.py", line 71, in make_client
vllm_server | return AsyncMPClient(vllm_config, executor_class, log_stats)
vllm_server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm_server | File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core_client.py", line 604, in __init__
vllm_server | super().__init__(
vllm_server | File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core_client.py", line 404, in __init__
vllm_server | self._wait_for_engine_startup()
vllm_server | File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core_client.py", line 426, in _wait_for_engine_startup
vllm_server | raise RuntimeError("Engine core initialization failed. "
vllm_server | RuntimeError: Engine core initialization failed. See root cause above.
How would you like to use vllm
I want to run the vllm/vllm-openai docker image with a rtx 5090.
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
maciekpoplawski, oelhammouchi, learnbott, darkness8i8, NamLe-tribes and 4 more
Metadata
Metadata
Assignees
Labels
usageHow to use vllmHow to use vllm