diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py index e4767a378f45..724c4357ff74 100644 --- a/vllm/platforms/__init__.py +++ b/vllm/platforms/__init__.py @@ -33,12 +33,19 @@ def cuda_platform_plugin() -> Optional[str]: is_cuda = False try: + from importlib.metadata import version + from vllm.utils import import_pynvml pynvml = import_pynvml() pynvml.nvmlInit() try: - if pynvml.nvmlDeviceGetCount() > 0: - is_cuda = True + # NOTE: Edge case: vllm cpu build on a GPU machine. + # Third-party pynvml can be imported in cpu build, + # we need to check if vllm is built with cpu too. + # Otherwise, vllm will always activate cuda plugin + # on a GPU machine, even if in a cpu build. + is_cuda = (pynvml.nvmlDeviceGetCount() > 0 + and "cpu" not in version("vllm")) finally: pynvml.nvmlShutdown() except Exception as e: