diff --git a/setup.py b/setup.py index 52adaf9238..ed1f5b8a9d 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ from build_tools.te_version import te_version from build_tools.utils import ( cuda_archs, + cuda_version, get_frameworks, remove_dups, ) @@ -70,11 +71,11 @@ def setup_common_extension() -> CMakeExtension: if bool(int(os.getenv("NVTE_WITH_CUBLASMP", "0"))): cmake_flags.append("-DNVTE_WITH_CUBLASMP=ON") cublasmp_dir = os.getenv("CUBLASMP_HOME") or metadata.distribution( - "nvidia-cublasmp-cu12" - ).locate_file("nvidia/cublasmp/cu12") + f"nvidia-cublasmp-cu{cuda_version()[0]}" + ).locate_file(f"nvidia/cublasmp/cu{cuda_version()[0]}") cmake_flags.append(f"-DCUBLASMP_DIR={cublasmp_dir}") nvshmem_dir = os.getenv("NVSHMEM_HOME") or metadata.distribution( - "nvidia-nvshmem-cu12" + f"nvidia-nvshmem-cu{cuda_version()[0]}" ).locate_file("nvidia/nvshmem") cmake_flags.append(f"-DNVSHMEM_DIR={nvshmem_dir}") print("CMAKE_FLAGS:", cmake_flags[-2:]) diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt index c2c9d0d915..412c5d34d9 100644 --- a/tests/cpp/CMakeLists.txt +++ b/tests/cpp/CMakeLists.txt @@ -43,5 +43,6 @@ include_directories(${CMAKE_SOURCE_DIR}) find_package(CUDAToolkit REQUIRED) include(${CMAKE_SOURCE_DIR}/../../3rdparty/cudnn-frontend/cmake/cuDNN.cmake) +add_subdirectory(comm_gemm) add_subdirectory(operator) add_subdirectory(util) diff --git a/transformer_engine/common/__init__.py b/transformer_engine/common/__init__.py index 7feb5fda5f..dd1ec480b2 100644 --- a/transformer_engine/common/__init__.py +++ b/transformer_engine/common/__init__.py @@ -218,6 +218,11 @@ def _nvidia_cudart_include_dir() -> str: except ModuleNotFoundError: return "" + # Installing some nvidia-* packages, like nvshmem, create nvidia name, so "import nvidia" + # above doesn't through. However, they don't set "__file__" attribute. + if nvidia.__file__ is None: + return "" + include_dir = Path(nvidia.__file__).parent / "cuda_runtime" return str(include_dir) if include_dir.exists() else ""