diff --git a/cpp/kernels/fmha_v2/fmha_test.py b/cpp/kernels/fmha_v2/fmha_test.py index 3523ee1d100..ad2957b42d7 100644 --- a/cpp/kernels/fmha_v2/fmha_test.py +++ b/cpp/kernels/fmha_v2/fmha_test.py @@ -1,7 +1,12 @@ import subprocess import pytest -from cuda import cuda, nvrtc + +try: + from cuda.bindings import driver as cuda + from cuda.bindings import nvrtc +except ImportError: + from cuda import cuda, nvrtc def ASSERT_DRV(err): diff --git a/requirements.txt b/requirements.txt index 55d2777964b..78c3b37c43e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ accelerate>=0.25.0 build colored -cuda-python # Do not override the custom version of cuda-python installed in the NGC PyTorch image. +cuda-python>=12,<13 diffusers>=0.27.0 lark mpi4py diff --git a/tensorrt_llm/_ipc_utils.py b/tensorrt_llm/_ipc_utils.py index c19fa516bab..d0e30a61d6c 100644 --- a/tensorrt_llm/_ipc_utils.py +++ b/tensorrt_llm/_ipc_utils.py @@ -17,17 +17,20 @@ import sys from typing import List, Tuple -from cuda import cuda, cudart -from cuda.cudart import cudaError_t +try: + from cuda.bindings import driver as cuda + from cuda.bindings import runtime as cudart +except ImportError: + from cuda import cuda, cudart from ._utils import mpi_comm from .logger import logger from .mapping import Mapping -def _raise_if_error(error: cudaError_t | cuda.CUresult): - if isinstance(error, cudaError_t): - if error != cudaError_t.cudaSuccess: +def _raise_if_error(error: cudart.cudaError_t | cuda.CUresult): + if isinstance(error, cudart.cudaError_t): + if error != cudart.cudaError_t.cudaSuccess: raise RuntimeError(f"CUDA Runtime API error: {repr(error)}") if isinstance(error, cuda.CUresult): if error != cuda.CUresult.CUDA_SUCCESS: diff --git a/tensorrt_llm/_mnnvl_utils.py b/tensorrt_llm/_mnnvl_utils.py index 4e0b7554e16..c5ead1ece7c 100644 --- a/tensorrt_llm/_mnnvl_utils.py +++ b/tensorrt_llm/_mnnvl_utils.py @@ -18,7 +18,11 @@ import pynvml import torch -from cuda import cuda + +try: + from cuda.bindings import driver as cuda +except ImportError: + from cuda import cuda from ._dlpack_utils import pack_strided_memory from ._utils import mpi_comm diff --git a/tensorrt_llm/auto_parallel/cluster_info.py b/tensorrt_llm/auto_parallel/cluster_info.py index 514504dbe81..8bdf5eba29e 100644 --- a/tensorrt_llm/auto_parallel/cluster_info.py +++ b/tensorrt_llm/auto_parallel/cluster_info.py @@ -5,7 +5,11 @@ import pynvml import torch -from cuda import cudart + +try: + from cuda.bindings import runtime as cudart +except ImportError: + from cuda import cudart from tensorrt_llm._utils import DictConversion from tensorrt_llm.logger import logger diff --git a/tensorrt_llm/runtime/generation.py b/tensorrt_llm/runtime/generation.py index ec223f31c1a..bf6e228f769 100755 --- a/tensorrt_llm/runtime/generation.py +++ b/tensorrt_llm/runtime/generation.py @@ -29,7 +29,10 @@ import torch import tensorrt as trt # isort: on -from cuda import cudart +try: + from cuda.bindings import runtime as cudart +except ImportError: + from cuda import cudart from tensorrt_llm.runtime.memory_pools.memory_pools_allocator import \ MemoryPoolsAllocator diff --git a/tensorrt_llm/runtime/multimodal_model_runner.py b/tensorrt_llm/runtime/multimodal_model_runner.py index 9d6be2bddad..4d71846c3fe 100644 --- a/tensorrt_llm/runtime/multimodal_model_runner.py +++ b/tensorrt_llm/runtime/multimodal_model_runner.py @@ -13,7 +13,12 @@ from typing import Optional, Tuple import torch.nn.functional as F -from cuda import cudart + +try: + from cuda.bindings import runtime as cudart +except ImportError: + from cuda import cudart + from huggingface_hub import hf_hub_download from PIL import Image, UnidentifiedImageError from safetensors import safe_open diff --git a/tests/integration/defs/sysinfo/get_sysinfo.py b/tests/integration/defs/sysinfo/get_sysinfo.py index a7de1871e51..4efd791d742 100644 --- a/tests/integration/defs/sysinfo/get_sysinfo.py +++ b/tests/integration/defs/sysinfo/get_sysinfo.py @@ -24,7 +24,11 @@ import psutil import pynvml -from cuda import cuda + +try: + from cuda.bindings import driver as cuda +except ImportError: + from cuda import cuda # Logger logger = logging.getLogger(__name__) diff --git a/tests/microbenchmarks/all_reduce.py b/tests/microbenchmarks/all_reduce.py index 13cb4271bbb..4aad4170ec5 100644 --- a/tests/microbenchmarks/all_reduce.py +++ b/tests/microbenchmarks/all_reduce.py @@ -18,7 +18,10 @@ # isort: off import torch # isort: on -from cuda import cuda, cudart +try: + from cuda.bindings import runtime as cudart +except ImportError: + from cuda import cudart import tensorrt_llm as tllm from tensorrt_llm import Mapping, Tensor diff --git a/tests/microbenchmarks/build_time_benchmark.py b/tests/microbenchmarks/build_time_benchmark.py index 3313cae1cda..fad8ae2a9af 100644 --- a/tests/microbenchmarks/build_time_benchmark.py +++ b/tests/microbenchmarks/build_time_benchmark.py @@ -7,7 +7,11 @@ import traceback import tensorrt as trt -from cuda import cudart + +try: + from cuda.bindings import runtime as cudart +except ImportError: + from cuda import cudart import tensorrt_llm from tensorrt_llm import (AutoConfig, AutoModelForCausalLM, BuildConfig, diff --git a/tests/unittest/_torch/multi_gpu/test_lowprecision_allreduce.py b/tests/unittest/_torch/multi_gpu/test_lowprecision_allreduce.py index 3aa6871fbe4..9d271a1d6e6 100644 --- a/tests/unittest/_torch/multi_gpu/test_lowprecision_allreduce.py +++ b/tests/unittest/_torch/multi_gpu/test_lowprecision_allreduce.py @@ -34,7 +34,10 @@ def run_single_rank(dtype, strategy, message_size): import numpy as np import torch - from cuda import cuda + try: + from cuda.bindings import driver as cuda + except ImportError: + from cuda import cuda import tensorrt_llm from tensorrt_llm._torch.distributed import AllReduce, AllReduceStrategy diff --git a/tests/unittest/trt/functional/test_allreduce_norm.py b/tests/unittest/trt/functional/test_allreduce_norm.py index a6a8ee477e1..dbd4448c45d 100644 --- a/tests/unittest/trt/functional/test_allreduce_norm.py +++ b/tests/unittest/trt/functional/test_allreduce_norm.py @@ -21,7 +21,10 @@ import torch # isort: on -from cuda import cudart +try: + from cuda.bindings import runtime as cudart +except ImportError: + from cuda import cudart from parameterized import parameterized from utils.util import create_session, run_session, unittest_name_func diff --git a/tests/unittest/trt/functional/test_allreduce_prepost_residual_norm.py b/tests/unittest/trt/functional/test_allreduce_prepost_residual_norm.py index 295e92c0aec..bf0592193d5 100644 --- a/tests/unittest/trt/functional/test_allreduce_prepost_residual_norm.py +++ b/tests/unittest/trt/functional/test_allreduce_prepost_residual_norm.py @@ -21,7 +21,10 @@ import torch # isort: on -from cuda import cudart +try: + from cuda.bindings import runtime as cudart +except ImportError: + from cuda import cudart from parameterized import parameterized from utils.util import create_session, run_session, unittest_name_func diff --git a/tests/unittest/trt/functional/test_nccl.py b/tests/unittest/trt/functional/test_nccl.py index 760e6538123..61b9eacc825 100644 --- a/tests/unittest/trt/functional/test_nccl.py +++ b/tests/unittest/trt/functional/test_nccl.py @@ -21,7 +21,10 @@ import torch # isort: on -from cuda import cudart +try: + from cuda.bindings import runtime as cudart +except ImportError: + from cuda import cudart from parameterized import parameterized from utils.util import create_session, run_session, unittest_name_func diff --git a/tests/unittest/trt/functional/test_pp_reduce_scatter.py b/tests/unittest/trt/functional/test_pp_reduce_scatter.py index fba41cbf76f..660a499c922 100644 --- a/tests/unittest/trt/functional/test_pp_reduce_scatter.py +++ b/tests/unittest/trt/functional/test_pp_reduce_scatter.py @@ -21,7 +21,10 @@ import torch # isort: on -from cuda import cudart +try: + from cuda.bindings import runtime as cudart +except ImportError: + from cuda import cudart from parameterized import parameterized from utils.util import create_session, run_session, unittest_name_func diff --git a/tests/unittest/utils/util.py b/tests/unittest/utils/util.py index 31dcb8efd9e..379102b767d 100644 --- a/tests/unittest/utils/util.py +++ b/tests/unittest/utils/util.py @@ -7,7 +7,13 @@ import pytest import tensorrt as trt import torch -from cuda import cuda, nvrtc + +try: + from cuda.bindings import driver as cuda + from cuda.bindings import nvrtc +except ImportError: + from cuda import cuda, nvrtc + from parameterized import parameterized import tensorrt_llm