From 1d910939ce7fb9048e363a56a25b7d244d2cfc2c Mon Sep 17 00:00:00 2001 From: Syed Tousif Ahmed Date: Mon, 31 Oct 2022 14:34:24 -0700 Subject: [PATCH 1/7] Installs NCCL from redist, uses system NCCL, and adds pypi RPATH --- common/install_cuda.sh | 12 +++++++++++- manywheel/build_cuda.sh | 5 ++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/common/install_cuda.sh b/common/install_cuda.sh index de64d43df..97f148628 100644 --- a/common/install_cuda.sh +++ b/common/install_cuda.sh @@ -24,7 +24,7 @@ function install_116 { } function install_117 { - echo "Installing CUDA 11.7 and CuDNN 8.5" + echo "Installing CUDA 11.7 and CuDNN 8.3 and NCCL 2.14" rm -rf /usr/local/cuda-11.7 /usr/local/cuda # install CUDA 11.7.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run @@ -42,6 +42,16 @@ function install_117 { cd .. rm -rf tmp_cudnn ldconfig + + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + mkdir tmp_nccl && cd tmp_nccl + wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.14/nccl_2.14.3-1+cuda11.7_x86_64.txz + tar xf nccl_2.14.3-1+cuda11.7_x86_64.txz + cp -a nccl_2.14.3-1+cuda11.7_x86_64/include/* /usr/local/cuda/include/ + cp -a nccl_2.14.3-1+cuda11.7_x86_64/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_nccl + ldconfig } function install_118 { diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index 2498f7a22..08ba483e1 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -240,15 +240,18 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then "libcublasLt.so.11" ) else - echo "Using cudnn and cublas from pypi." + echo "Using cudnn, cublas, and nccl from pypi." CUDA_RPATHS=( '$ORIGIN/../../nvidia/cublas/lib' '$ORIGIN/../../nvidia/cudnn/lib' + '$ORIGIN/../../nvidia/nccl/lib' ) CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' export FORCE_RPATH="--force-rpath" + export USE_STATIC_NCCL=0 + export USE_SYSTEM_NCCL=1 fi else echo "Unknown cuda version $CUDA_VERSION" From f8811501d6333e2617794d9fe6e0cfb7c7aec5ff Mon Sep 17 00:00:00 2001 From: Syed Tousif Ahmed Date: Mon, 21 Nov 2022 11:53:41 -0800 Subject: [PATCH 2/7] Cleans up nvrtc patches and adds it using main script --- manywheel/build_cuda.sh | 11 ++++++----- release/pypi/prep_binary_for_pypi.sh | 13 ------------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index 08ba483e1..83b13a7d1 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -149,15 +149,11 @@ elif [[ $CUDA_VERSION == "11.7" ]]; then DEPS_LIST=( "/usr/local/cuda/lib64/libcudart.so.11.0" "/usr/local/cuda/lib64/libnvToolsExt.so.1" - "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50 - "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7" "$LIBGOMP_PATH" ) DEPS_SONAME=( "libcudart.so.11.0" "libnvToolsExt.so.1" - "libnvrtc.so.11.2" - "libnvrtc-builtins.so.11.7" "libgomp.so.1" ) @@ -173,6 +169,8 @@ elif [[ $CUDA_VERSION == "11.7" ]]; then "/usr/local/cuda/lib64/libcudnn.so.8" "/usr/local/cuda/lib64/libcublas.so.11" "/usr/local/cuda/lib64/libcublasLt.so.11" + "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50 + "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7" ) DEPS_SONAME+=( "libcudnn_adv_infer.so.8" @@ -238,11 +236,14 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then "libcudnn.so.8" "libcublas.so.11" "libcublasLt.so.11" + "libnvrtc.so.11.2" + "libnvrtc-builtins.so.11.7" ) else - echo "Using cudnn, cublas, and nccl from pypi." + echo "Using cudnn, cublas, nccl, and nvrtc from pypi." CUDA_RPATHS=( '$ORIGIN/../../nvidia/cublas/lib' + '$ORIGIN/../../nvidia/cuda_nvrtc/lib' '$ORIGIN/../../nvidia/cudnn/lib' '$ORIGIN/../../nvidia/nccl/lib' ) diff --git a/release/pypi/prep_binary_for_pypi.sh b/release/pypi/prep_binary_for_pypi.sh index e3b2b28c6..1d0495ec5 100755 --- a/release/pypi/prep_binary_for_pypi.sh +++ b/release/pypi/prep_binary_for_pypi.sh @@ -56,21 +56,8 @@ for whl_file in "$@"; do if [[ $whl_file == *"with.pypi.cudnn"* ]]; then rm -rf "${whl_dir}/caffe2" rm -rf "${whl_dir}"/torch/lib/libnvrtc* - sed -i -e "s/Requires-Dist: nvidia-cuda-runtime-cu11/Requires-Dist: nvidia-cuda-runtime-cu11 (==11.7.99)/" "${whl_dir}"/*/METADATA - sed -i -e "/^Requires-Dist: nvidia-cublas-cu11 (==11.10.3.66).*/a Requires-Dist: nvidia-cuda-nvrtc-cu11 (==11.7.99) ; platform_system == \"Linux\"" "${whl_dir}"/*/METADATA sed -i -e "s/-with-pypi-cudnn//g" "${whl_dir}/torch/version.py" - find "${whl_dir}/torch/" -maxdepth 1 -type f -name "*.so*" | while read sofile; do - patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN:$ORIGIN/lib' \ - --force-rpath $sofile - patchelf --print-rpath $sofile - done - - find "${whl_dir}/torch/lib" -maxdepth 1 -type f -name "*.so*" | while read sofile; do - patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN' \ - --force-rpath $sofile - patchelf --print-rpath $sofile - done patchelf --replace-needed libnvrtc-d833c4f3.so.11.2 libnvrtc.so.11.2 "${whl_dir}/torch/lib/libcaffe2_nvrtc.so" fi From ee1935f92aee544605cd318b4e4c84badeeab735 Mon Sep 17 00:00:00 2001 From: Syed Tousif Ahmed Date: Mon, 21 Nov 2022 13:05:17 -0800 Subject: [PATCH 3/7] Fixes typo --- common/install_cuda.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/install_cuda.sh b/common/install_cuda.sh index 97f148628..d7b978fa0 100644 --- a/common/install_cuda.sh +++ b/common/install_cuda.sh @@ -24,7 +24,7 @@ function install_116 { } function install_117 { - echo "Installing CUDA 11.7 and CuDNN 8.3 and NCCL 2.14" + echo "Installing CUDA 11.7 and CuDNN 8.5 and NCCL 2.14" rm -rf /usr/local/cuda-11.7 /usr/local/cuda # install CUDA 11.7.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run From 55ae18ee9ceb5dfb9460e7815bfae5bea16b8046 Mon Sep 17 00:00:00 2001 From: Syed Tousif Ahmed Date: Wed, 30 Nov 2022 11:31:33 -0800 Subject: [PATCH 4/7] Adds more dependencies and builds torch with dynamic linking --- manywheel/build_cuda.sh | 20 +++++++++++++++----- release/pypi/prep_binary_for_pypi.sh | 1 - 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index 83b13a7d1..20b053189 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -147,13 +147,9 @@ elif [[ $CUDA_VERSION == "11.7" ]]; then # Try parallelizing nvcc as well export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" DEPS_LIST=( - "/usr/local/cuda/lib64/libcudart.so.11.0" - "/usr/local/cuda/lib64/libnvToolsExt.so.1" "$LIBGOMP_PATH" ) DEPS_SONAME=( - "libcudart.so.11.0" - "libnvToolsExt.so.1" "libgomp.so.1" ) @@ -171,6 +167,8 @@ elif [[ $CUDA_VERSION == "11.7" ]]; then "/usr/local/cuda/lib64/libcublasLt.so.11" "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50 "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7" + "/usr/local/cuda/lib64/libcudart.so.11.0" + "/usr/local/cuda/lib64/libnvToolsExt.so.1" ) DEPS_SONAME+=( "libcudnn_adv_infer.so.8" @@ -238,14 +236,23 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then "libcublasLt.so.11" "libnvrtc.so.11.2" "libnvrtc-builtins.so.11.7" + "libcudart.so.11.0" + "libnvToolsExt.so.1" ) else - echo "Using cudnn, cublas, nccl, and nvrtc from pypi." + echo "Using nvidia libs from pypi." CUDA_RPATHS=( '$ORIGIN/../../nvidia/cublas/lib' + '$ORIGIN/../../nvidia/cuda_cupti/lib' '$ORIGIN/../../nvidia/cuda_nvrtc/lib' + '$ORIGIN/../../nvidia/cuda_runtime/lib' '$ORIGIN/../../nvidia/cudnn/lib' + '$ORIGIN/../../nvidia/cufft/lib' + '$ORIGIN/../../nvidia/curand/lib' + '$ORIGIN/../../nvidia/cusolver/lib' + '$ORIGIN/../../nvidia/cusparse/lib' '$ORIGIN/../../nvidia/nccl/lib' + '$ORIGIN/../../nvidia/nvtx/lib' ) CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' @@ -253,6 +260,9 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then export FORCE_RPATH="--force-rpath" export USE_STATIC_NCCL=0 export USE_SYSTEM_NCCL=1 + export ATEN_STATIC_CUDA=0 + export USE_CUDA_STATIC_LINK=0 + export USE_CUPTI_SO=1 fi else echo "Unknown cuda version $CUDA_VERSION" diff --git a/release/pypi/prep_binary_for_pypi.sh b/release/pypi/prep_binary_for_pypi.sh index 1d0495ec5..fdd9bf4a0 100755 --- a/release/pypi/prep_binary_for_pypi.sh +++ b/release/pypi/prep_binary_for_pypi.sh @@ -58,7 +58,6 @@ for whl_file in "$@"; do rm -rf "${whl_dir}"/torch/lib/libnvrtc* sed -i -e "s/-with-pypi-cudnn//g" "${whl_dir}/torch/version.py" - patchelf --replace-needed libnvrtc-d833c4f3.so.11.2 libnvrtc.so.11.2 "${whl_dir}/torch/lib/libcaffe2_nvrtc.so" fi find "${dist_info_folder}" -type f -exec sed -i "s!${version_with_suffix}!${version_no_suffix}!" {} \; From 8e32d1dc9f76d5dc59896ba7d5c55112655b6adf Mon Sep 17 00:00:00 2001 From: Syed Tousif Ahmed Date: Thu, 1 Dec 2022 16:23:59 -0800 Subject: [PATCH 5/7] NCCL dirs have to be specified. Otherwise picks up different version --- manywheel/build_cuda.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index 20b053189..4abc8f6d7 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -263,6 +263,8 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then export ATEN_STATIC_CUDA=0 export USE_CUDA_STATIC_LINK=0 export USE_CUPTI_SO=1 + export NCCL_INCLUDE_DIR="/usr/local/cuda/include/" + export NCCL_LIB_DIR="/usr/local/cuda/lib64/" fi else echo "Unknown cuda version $CUDA_VERSION" From 6f768e4178e3d75ad9dfedb8fb32b452bedab07b Mon Sep 17 00:00:00 2001 From: Syed Tousif Ahmed Date: Mon, 5 Dec 2022 13:28:46 -0800 Subject: [PATCH 6/7] Handles 11.8 --- common/install_cuda.sh | 10 +++++++ manywheel/build_cuda.sh | 60 +---------------------------------------- 2 files changed, 11 insertions(+), 59 deletions(-) diff --git a/common/install_cuda.sh b/common/install_cuda.sh index d7b978fa0..70c525af8 100644 --- a/common/install_cuda.sh +++ b/common/install_cuda.sh @@ -73,6 +73,16 @@ function install_118 { cd .. rm -rf tmp_cudnn ldconfig + + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + mkdir tmp_nccl && cd tmp_nccl + wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.15.5/nccl_2.15.5-1+cuda11.8_x86_64.txz + tar xf nccl_2.15.5-1+cuda11.8_x86_64.txz + cp -a nccl_2.15.5-1+cuda11.8_x86_64/include/* /usr/local/cuda/include/ + cp -a nccl_2.15.5-1+cuda11.8_x86_64/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_nccl + ldconfig } function prune_116 { diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index 4abc8f6d7..d389b066c 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -142,7 +142,7 @@ DEPS_SONAME=( "libcublasLt.so.11" "libgomp.so.1" ) -elif [[ $CUDA_VERSION == "11.7" ]]; then +elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then export USE_STATIC_CUDNN=0 # Try parallelizing nvcc as well export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" @@ -181,64 +181,6 @@ elif [[ $CUDA_VERSION == "11.7" ]]; then "libcublas.so.11" "libcublasLt.so.11" ) - else - echo "Using cudnn and cublas from pypi." - CUDA_RPATHS=( - '$ORIGIN/../../nvidia/cublas/lib' - '$ORIGIN/../../nvidia/cudnn/lib' - ) - CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") - export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' - export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' - export FORCE_RPATH="--force-rpath" - fi -elif [[ $CUDA_VERSION == "11.8" ]]; then - export USE_STATIC_CUDNN=0 - # Try parallelizing nvcc as well - export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" - DEPS_LIST=( - "/usr/local/cuda/lib64/libcudart.so.11.0" - "/usr/local/cuda/lib64/libnvToolsExt.so.1" - "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.8, it links to 11.8.89 - "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8" - "$LIBGOMP_PATH" - ) - DEPS_SONAME=( - "libcudart.so.11.0" - "libnvToolsExt.so.1" - "libnvrtc.so.11.2" - "libnvrtc-builtins.so.11.8" - "libgomp.so.1" - ) - - if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then - echo "Bundling with cudnn and cublas." - DEPS_LIST+=( - "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_adv_train.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_train.so.8" - "/usr/local/cuda/lib64/libcudnn.so.8" - "/usr/local/cuda/lib64/libcublas.so.11" - "/usr/local/cuda/lib64/libcublasLt.so.11" - ) - DEPS_SONAME+=( - "libcudnn_adv_infer.so.8" - "libcudnn_adv_train.so.8" - "libcudnn_cnn_infer.so.8" - "libcudnn_cnn_train.so.8" - "libcudnn_ops_infer.so.8" - "libcudnn_ops_train.so.8" - "libcudnn.so.8" - "libcublas.so.11" - "libcublasLt.so.11" - "libnvrtc.so.11.2" - "libnvrtc-builtins.so.11.7" - "libcudart.so.11.0" - "libnvToolsExt.so.1" - ) else echo "Using nvidia libs from pypi." CUDA_RPATHS=( From de4bae91a2a368117f1e1dd08d92b206cb0b2bc9 Mon Sep 17 00:00:00 2001 From: Syed Tousif Ahmed Date: Mon, 5 Dec 2022 13:30:29 -0800 Subject: [PATCH 7/7] Adds echo message for nccl 2.15 --- common/install_cuda.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/install_cuda.sh b/common/install_cuda.sh index 70c525af8..c9b9c9d1a 100644 --- a/common/install_cuda.sh +++ b/common/install_cuda.sh @@ -55,7 +55,7 @@ function install_117 { } function install_118 { - echo "Installing CUDA 11.8 and cuDNN 8.5" + echo "Installing CUDA 11.8 and cuDNN 8.5 and NCCL 2.15" rm -rf /usr/local/cuda-11.8 /usr/local/cuda # install CUDA 11.8.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run