From 36978a585f4d551326e947c3ecd04b5d071d9f10 Mon Sep 17 00:00:00 2001 From: pbialecki Date: Thu, 30 Mar 2023 22:09:22 -0700 Subject: [PATCH 1/2] enable nightly CUDA 12.1 builds --- conda/build_pytorch.sh | 5 ++- conda/pytorch-nightly/build.sh | 12 +++-- manywheel/build_cuda.sh | 82 ++++++++++++++++++++++++++++++++-- 3 files changed, 91 insertions(+), 8 deletions(-) diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh index 570c6df11..adde9fb27 100755 --- a/conda/build_pytorch.sh +++ b/conda/build_pytorch.sh @@ -265,7 +265,10 @@ else . ./switch_cuda_version.sh "$desired_cuda" # TODO, simplify after anaconda fixes their cudatoolkit versioning inconsistency. # see: https://github.com/conda-forge/conda-forge.github.io/issues/687#issuecomment-460086164 - if [[ "$desired_cuda" == "11.8" ]]; then + if [[ "desired_cuda" == "12.1" ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT=" - pytorch-cuda >=12.1,<12.2 # [not osx]" + export MAGMA_PACKAGE=" - magma-cuda121 # [not osx and not win]" + elif [[ "$desired_cuda" == "11.8" ]]; then export CONDA_CUDATOOLKIT_CONSTRAINT=" - pytorch-cuda >=11.8,<11.9 # [not osx]" export MAGMA_PACKAGE=" - magma-cuda118 # [not osx and not win]" elif [[ "$desired_cuda" == "11.7" ]]; then diff --git a/conda/pytorch-nightly/build.sh b/conda/pytorch-nightly/build.sh index 14ed57276..03c9a6238 100755 --- a/conda/pytorch-nightly/build.sh +++ b/conda/pytorch-nightly/build.sh @@ -52,23 +52,29 @@ if [[ -z "$USE_CUDA" || "$USE_CUDA" == 1 ]]; then fi if [[ -n "$build_with_cuda" ]]; then export TORCH_NVCC_FLAGS="-Xfatbin -compress-all" - TORCH_CUDA_ARCH_LIST="3.7+PTX;5.0" + TORCH_CUDA_ARCH_LIST="5.0;6.0;6.1;7.0;7.5;8.0;8.6" export USE_STATIC_CUDNN=1 # links cudnn statically (driven by tools/setup_helpers/cudnn.py) if [[ $CUDA_VERSION == 11.7* ]]; then - TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6" + TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;3.7+PTX" #for cuda 11.7 we use cudnn 8.5 #which does not have single static libcudnn_static.a deliverable to link with export USE_STATIC_CUDNN=0 #for cuda 11.7 include all dynamic loading libraries DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.7/extras/CUPTI/lib64/libcupti.so.11.7) elif [[ $CUDA_VERSION == 11.8* ]]; then - TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6;9.0" + TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;3.7+PTX;9.0" #for cuda 11.8 we use cudnn 8.7 #which does not have single static libcudnn_static.a deliverable to link with export USE_STATIC_CUDNN=0 #for cuda 11.8 include all dynamic loading libraries DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8) + elif [[ $CUDA_VERSION == 12.1* ]]; then + # cuda 12 does not support sm_3x + TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;9.0" + # for cuda 12.1 we use cudnn 8.8 and include all dynamic loading libraries + export USE_STATIC_CUDNN=0 + DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-12.1/extras/CUPTI/lib64/libcupti.so.12 fi if [[ -n "$OVERRIDE_TORCH_CUDA_ARCH_LIST" ]]; then TORCH_CUDA_ARCH_LIST="$OVERRIDE_TORCH_CUDA_ARCH_LIST" diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index 83d3c6a3e..bfd85ff4b 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -56,14 +56,18 @@ fi cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.') -TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0" +TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6" case ${CUDA_VERSION} in + 12.2) + TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0" + EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") + ;; 11.8) - TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6;9.0" + TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7;9.0" EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") ;; 11.[67]) - TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6" + TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7" EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") ;; *) @@ -108,7 +112,77 @@ elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1" fi -if [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then +if [[ $CUDA_VERSION == "12.1" ]]; then + export USE_STATIC_CUDNN=0 + # Try parallelizing nvcc as well + export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" + DEPS_LIST=( + "$LIBGOMP_PATH" + ) + DEPS_SONAME=( + "libgomp.so.1" + ) + + if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then + echo "Bundling with cudnn and cublas." + DEPS_LIST+=( + "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8" + "/usr/local/cuda/lib64/libcudnn_adv_train.so.8" + "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8" + "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8" + "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8" + "/usr/local/cuda/lib64/libcudnn_ops_train.so.8" + "/usr/local/cuda/lib64/libcudnn.so.8" + "/usr/local/cuda/lib64/libcublas.so.12" + "/usr/local/cuda/lib64/libcublasLt.so.12" + "/usr/local/cuda/lib64/libcudart.so.12" + "/usr/local/cuda/lib64/libnvToolsExt.so.1" + "/usr/local/cuda/lib64/libnvrtc.so.12" + "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.1" + ) + DEPS_SONAME+=( + "libcudnn_adv_infer.so.8" + "libcudnn_adv_train.so.8" + "libcudnn_cnn_infer.so.8" + "libcudnn_cnn_train.so.8" + "libcudnn_ops_infer.so.8" + "libcudnn_ops_train.so.8" + "libcudnn.so.8" + "libcublas.so.12" + "libcublasLt.so.12" + "libcudart.so.12" + "libnvToolsExt.so.1" + "libnvrtc.so.12" + "libnvrtc-builtins.so.12.1" + ) + else + echo "Using nvidia libs from pypi." + CUDA_RPATHS=( + '$ORIGIN/../../nvidia/cublas/lib' + '$ORIGIN/../../nvidia/cuda_cupti/lib' + '$ORIGIN/../../nvidia/cuda_nvrtc/lib' + '$ORIGIN/../../nvidia/cuda_runtime/lib' + '$ORIGIN/../../nvidia/cudnn/lib' + '$ORIGIN/../../nvidia/cufft/lib' + '$ORIGIN/../../nvidia/curand/lib' + '$ORIGIN/../../nvidia/cusolver/lib' + '$ORIGIN/../../nvidia/cusparse/lib' + '$ORIGIN/../../nvidia/nccl/lib' + '$ORIGIN/../../nvidia/nvtx/lib' + ) + CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") + export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' + export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' + export FORCE_RPATH="--force-rpath" + export USE_STATIC_NCCL=0 + export USE_SYSTEM_NCCL=1 + export ATEN_STATIC_CUDA=0 + export USE_CUDA_STATIC_LINK=0 + export USE_CUPTI_SO=1 + export NCCL_INCLUDE_DIR="/usr/local/cuda/include/" + export NCCL_LIB_DIR="/usr/local/cuda/lib64/" + fi +elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then export USE_STATIC_CUDNN=0 # Try parallelizing nvcc as well export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" From 1a773c17c55e3a557b5c5732c03c146001eb3ff3 Mon Sep 17 00:00:00 2001 From: pbialecki Date: Thu, 30 Mar 2023 22:24:13 -0700 Subject: [PATCH 2/2] fix version typo --- manywheel/build_cuda.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index bfd85ff4b..3bc3a49f9 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -58,7 +58,7 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.') TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6" case ${CUDA_VERSION} in - 12.2) + 12.1) TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0" EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") ;;