From 568c6610c16d2d391d75c57cc5cb2ed785318a72 Mon Sep 17 00:00:00 2001 From: Pruthvi Madugundu Date: Wed, 31 Aug 2022 17:36:40 -0700 Subject: [PATCH 1/7] Updates to support rocm5.3 wheel builds (#6) * Changes to support ROCm 5.3 * Updated as per comments --- manywheel/build_rocm.sh | 75 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/manywheel/build_rocm.sh b/manywheel/build_rocm.sh index 9b4d36348..50d3680dd 100755 --- a/manywheel/build_rocm.sh +++ b/manywheel/build_rocm.sh @@ -89,7 +89,80 @@ else fi ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH)) -if [[ $ROCM_INT -ge 50200 ]]; then +if [[ $ROCM_INT -ge 50300 ]]; then +DEPS_LIST=( + "/opt/rocm/lib/libMIOpen.so.1" + "/opt/rocm/lib/libamdhip64.so.5" + "/opt/rocm/lib/libhipblas.so.0" + "/opt/rocm/lib/libhipfft.so" + "/opt/rocm/lib/libhiprand.so.1" + "/opt/rocm/lib/libhipsparse.so.0" + "/opt/rocm/lib/libhsa-runtime64.so.1" + "/opt/rocm/lib/libamd_comgr.so.2" + "/opt/rocm/magma/lib/libmagma.so" + "/opt/rocm/lib/librccl.so.1" + "/opt/rocm/lib/librocblas.so.0" + "/opt/rocm/lib/librocfft-device-0.so.0" + "/opt/rocm/lib/librocfft-device-1.so.0" + "/opt/rocm/lib/librocfft-device-2.so.0" + "/opt/rocm/lib/librocfft-device-3.so.0" + "/opt/rocm/lib/librocfft.so.0" + "/opt/rocm/lib/librocm_smi64.so.5" + "/opt/rocm/lib/librocrand.so.1" + "/opt/rocm/lib/librocsolver.so.0" + "/opt/rocm/lib/librocsparse.so.0" + "/opt/rocm/lib/libroctracer64.so.4" + "/opt/rocm/lib/libroctx64.so.4" + "$LIBGOMP_PATH" + "$LIBNUMA_PATH" + "$LIBELF_PATH" + "$LIBTINFO_PATH" + "$LIBDRM_PATH" + "$LIBDRM_AMDGPU_PATH" +) + +DEPS_SONAME=( + "libMIOpen.so.1" + "libamdhip64.so.5" + "libhipblas.so.0" + "libhipfft.so" + "libhiprand.so.1" + "libhipsparse.so.0" + "libhsa-runtime64.so.1" + "libamd_comgr.so.2" + "libmagma.so" + "librccl.so.1" + "librocblas.so.0" + "librocfft-device-0.so.0" + "librocfft-device-1.so.0" + "librocfft-device-2.so.0" + "librocfft-device-3.so.0" + "librocfft.so.0" + "librocm_smi64.so.5" + "librocrand.so.1" + "librocsolver.so.0" + "librocsparse.so.0" + "libroctracer64.so.4" + "libroctx64.so.4" + "libgomp.so.1" + "libnuma.so.1" + "libelf.so.1" + "libtinfo.so.5" + "libdrm.so.2" + "libdrm_amdgpu.so.1" +) + +DEPS_AUX_SRCLIST=( + "/opt/rocm/lib/rocblas/library/*" + "/opt/amdgpu/share/libdrm/amdgpu.ids" +) + +DEPS_AUX_DSTLIST=( + "lib/rocblas/library/." + "share/libdrm/amdgpu.ids" +) + +elif [[ $ROCM_INT -ge 50200 ]]; then DEPS_LIST=( "/opt/rocm/lib/libMIOpen.so.1" "/opt/rocm/lib/libamdhip64.so.5" From 42980bc9761082e21f74f463fcced807e6f1e1d6 Mon Sep 17 00:00:00 2001 From: Pruthvi Madugundu Date: Wed, 31 Aug 2022 17:40:23 -0700 Subject: [PATCH 2/7] Installing python before magma build - In ROCm 5.3 libtorch build are failing during magma build due to to missing python binary so added install statement --- common/install_rocm_magma.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/common/install_rocm_magma.sh b/common/install_rocm_magma.sh index c651a6e4e..5f5182721 100644 --- a/common/install_rocm_magma.sh +++ b/common/install_rocm_magma.sh @@ -9,6 +9,8 @@ set -ex # TODO (2) MKLROOT=${MKLROOT:-/opt/intel} +apt-get install -y python + # "install" hipMAGMA into /opt/rocm/magma by copying after build git clone https://bitbucket.org/icl/magma.git pushd magma From f04c92af74c02bc9554c5b866864681f00c75a7f Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Mon, 12 Sep 2022 10:48:34 -0500 Subject: [PATCH 3/7] Move python install to libtorch/Dockerfile (#8) --- common/install_rocm_magma.sh | 2 -- libtorch/Dockerfile | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/common/install_rocm_magma.sh b/common/install_rocm_magma.sh index 5f5182721..c651a6e4e 100644 --- a/common/install_rocm_magma.sh +++ b/common/install_rocm_magma.sh @@ -9,8 +9,6 @@ set -ex # TODO (2) MKLROOT=${MKLROOT:-/opt/intel} -apt-get install -y python - # "install" hipMAGMA into /opt/rocm/magma by copying after build git clone https://bitbucket.org/icl/magma.git pushd magma diff --git a/libtorch/Dockerfile b/libtorch/Dockerfile index 224c0610a..d8fa2e1b4 100644 --- a/libtorch/Dockerfile +++ b/libtorch/Dockerfile @@ -71,9 +71,10 @@ ENV MKLROOT /opt/intel ADD ./common/install_rocm.sh install_rocm.sh ADD ./common/install_rocm_drm.sh install_rocm_drm.sh ADD ./common/install_rocm_magma.sh install_rocm_magma.sh -# gfortran needed for building magma from source for ROCm +# gfortran and python needed for building magma from source for ROCm RUN apt-get update -y && \ apt-get install gfortran -y && \ + apt-get install python -y && \ apt-get clean FROM rocm as rocm5.1.1 From 1305d7711b89e121d274fc2ff2a66f46babb6bf4 Mon Sep 17 00:00:00 2001 From: Pruthvi Madugundu Date: Thu, 22 Sep 2022 13:58:40 -0700 Subject: [PATCH 4/7] Updating the condition for noRCCL build (#9) * Updating the condition for noRCCL build * Updated changes as per comments --- manywheel/build_common.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh index ac3b2755c..c213145ef 100644 --- a/manywheel/build_common.sh +++ b/manywheel/build_common.sh @@ -162,11 +162,23 @@ else echo "BUILD_DEBUG_INFO was not set, skipping debug info" fi +if [[ "$DISABLE_RCCL" = 1 ]]; then + echo "Disabling NCCL/RCCL in pyTorch" + USE_RCCL=0 + USE_NCCL=0 + USE_KINETO=0 +else + USE_RCCL=1 + USE_NCCL=1 + USE_KINETO=1 +fi + echo "Calling setup.py bdist at $(date)" time CMAKE_ARGS=${CMAKE_ARGS[@]} \ - EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \ - BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \ - python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR + EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \ + BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \ + USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \ + python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR echo "Finished setup.py bdist at $(date)" # Build libtorch packages From ecea0887bae0538e70262e6f49efdcd06f50a901 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Wed, 5 Oct 2022 05:27:35 +0000 Subject: [PATCH 5/7] Use MIOpen branch for ROCm5.3; Change all conditions to -eq --- common/install_miopen.sh | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 5c0fd84c9..fb4f26356 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -59,23 +59,31 @@ MIOPEN_CMAKE_COMMON_FLAGS=" -DMIOPEN_BUILD_DRIVER=OFF " # Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version +<<<<<<< HEAD if [[ $ROCM_INT -ge 50200 ]]; then MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36" +======= +if [[ $ROCM_INT -eq 50300 ]]; then + MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off" + MIOPEN_BRANCH="release/rocm-rel-5.3" +elif [[ $ROCM_INT -eq 50200 ]]; then + MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off" +>>>>>>> 42f213f... Use MIOpen branch for ROCm5.3; Change all conditions to -eq MIOPEN_BRANCH="release/rocm-rel-5.2-staging" -elif [[ $ROCM_INT -ge 50100 ]]; then +elif [[ $ROCM_INT -eq 50100 ]]; then MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36" MIOPEN_BRANCH="release/rocm-rel-5.1-staging" -elif [[ $ROCM_INT -ge 50000 ]]; then +elif [[ $ROCM_INT -eq 50000 ]]; then MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36" MIOPEN_BRANCH="release/rocm-rel-5.0-staging" -elif [[ $ROCM_INT -ge 40500 ]]; then +elif [[ $ROCM_INT -eq 40500 ]]; then MIOPEN_CMAKE_COMMON_FLAGS="${MIOPEN_CMAKE_COMMON_FLAGS} -DMIOPEN_USE_HIP_KERNELS=Off -DMIOPEN_DEFAULT_FIND_MODE=Normal" MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36" MIOPEN_BRANCH="release/rocm-rel-4.5-staging" -elif [[ $ROCM_INT -ge 40300 ]]; then +elif [[ $ROCM_INT -eq 40300 ]]; then MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx900_64;gfx906_60;gfx906_64;gfx90878;gfx1030_36" MIOPEN_BRANCH="release/rocm-rel-4.3-staging" -elif [[ $ROCM_INT -ge 40200 ]]; then +elif [[ $ROCM_INT -eq 40200 ]]; then MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx803_36;gfx803_64;gfx900_56;gfx900_64;gfx906_60;gfx906_64;gfx90878" MIOPEN_BRANCH="rocm-4.2.x-staging" else From 015b0b655a8aa02e15dec54329fccb45fb46761b Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Wed, 5 Oct 2022 07:04:51 +0000 Subject: [PATCH 6/7] Use staging branch of MIOpen for ROCm5.3 --- common/install_miopen.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/install_miopen.sh b/common/install_miopen.sh index fb4f26356..67b8aa955 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -65,7 +65,7 @@ if [[ $ROCM_INT -ge 50200 ]]; then ======= if [[ $ROCM_INT -eq 50300 ]]; then MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off" - MIOPEN_BRANCH="release/rocm-rel-5.3" + MIOPEN_BRANCH="release/rocm-rel-5.3-staging" elif [[ $ROCM_INT -eq 50200 ]]; then MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off" >>>>>>> 42f213f... Use MIOpen branch for ROCm5.3; Change all conditions to -eq From a862cf2d809b5bea5d76ba2b6819192a1fe3494b Mon Sep 17 00:00:00 2001 From: Jack Taylor <108682042+jataylo@users.noreply.github.com> Date: Mon, 10 Oct 2022 17:10:03 +0100 Subject: [PATCH 7/7] Fix merge conflict Fix merge conflict --- common/install_miopen.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 67b8aa955..cace75906 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -59,16 +59,11 @@ MIOPEN_CMAKE_COMMON_FLAGS=" -DMIOPEN_BUILD_DRIVER=OFF " # Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version -<<<<<<< HEAD -if [[ $ROCM_INT -ge 50200 ]]; then - MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36" -======= if [[ $ROCM_INT -eq 50300 ]]; then MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off" MIOPEN_BRANCH="release/rocm-rel-5.3-staging" elif [[ $ROCM_INT -eq 50200 ]]; then MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off" ->>>>>>> 42f213f... Use MIOpen branch for ROCm5.3; Change all conditions to -eq MIOPEN_BRANCH="release/rocm-rel-5.2-staging" elif [[ $ROCM_INT -eq 50100 ]]; then MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"