From 53fea625dc4fcefcd48d77d0270c60975b55e5fa Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Sun, 17 Sep 2023 13:30:28 +0000 Subject: [PATCH 1/5] Update docker build images for rocm5.7 --- .github/workflows/build-libtorch-images.yml | 2 +- .github/workflows/build-manywheel-images.yml | 2 +- common/install_miopen.sh | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml index b2b50d250..d62030047 100644 --- a/.github/workflows/build-libtorch-images.yml +++ b/.github/workflows/build-libtorch-images.yml @@ -52,7 +52,7 @@ jobs: runs-on: linux.12xlarge strategy: matrix: - rocm_version: ["5.5", "5.6"] + rocm_version: ["5.6", "5.7"] env: GPU_ARCH_TYPE: rocm GPU_ARCH_VERSION: ${{ matrix.rocm_version }} diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml index c7dbe2248..bbac707dd 100644 --- a/.github/workflows/build-manywheel-images.yml +++ b/.github/workflows/build-manywheel-images.yml @@ -58,7 +58,7 @@ jobs: runs-on: linux.12xlarge strategy: matrix: - rocm_version: ["5.5", "5.6"] + rocm_version: ["5.6", "5.7"] env: GPU_ARCH_TYPE: rocm GPU_ARCH_VERSION: ${{ matrix.rocm_version }} diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 696a91905..27f8a89ab 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -58,7 +58,9 @@ MIOPEN_CMAKE_COMMON_FLAGS=" -DMIOPEN_BUILD_DRIVER=OFF " # Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version -if [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then +if [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 50800 ]]; then + MIOPEN_BRANCH="release/rocm-rel-5.7-staging" +elif [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then MIOPEN_BRANCH="release/rocm-rel-5.6-staging" elif [[ $ROCM_INT -ge 50500 ]] && [[ $ROCM_INT -lt 50600 ]]; then MIOPEN_BRANCH="release/rocm-rel-5.5-gfx11" From 5e89be52acf79bc5e0bf6b273bc4613bd888182b Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Wed, 2 Aug 2023 23:10:37 +0000 Subject: [PATCH 2/5] Fix erroneous logic that was skipping msccl files even for ROCm5.6; update msccl path for ROCm5.7 (cherry picked from commit 36c10cc3be475780aa7d76a7ccdbe3f8731042c9) --- manywheel/build_rocm.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/manywheel/build_rocm.sh b/manywheel/build_rocm.sh index 80ebde6f4..0fed5970b 100755 --- a/manywheel/build_rocm.sh +++ b/manywheel/build_rocm.sh @@ -204,10 +204,17 @@ if [[ $ROCM_INT -ge 50500 ]]; then DEPS_AUX_SRCLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_SRC/}) DEPS_AUX_DSTLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_DST/}) -elif [[ $ROCM_INT -ge 50600 ]]; then +fi + +if [[ $ROCM_INT -ge 50600 ]]; then # RCCL library files - RCCL_SHARE_SRC=$ROCM_HOME/lib/msccl-algorithms - RCCL_SHARE_DST=lib/msccl-algorithms + if [[ $ROCM_INT -ge 50700 ]]; then + RCCL_SHARE_SRC=$ROCM_HOME/share/rccl/msccl-algorithms + RCCL_SHARE_DST=share/rccl/msccl-algorithms + else + RCCL_SHARE_SRC=$ROCM_HOME/lib/msccl-algorithms + RCCL_SHARE_DST=lib/msccl-algorithms + fi RCCL_SHARE_FILES=($(ls $RCCL_SHARE_SRC)) DEPS_AUX_SRCLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_SRC/}) From 8ef5fc956508e34315866059431ca015f485f77d Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Mon, 18 Sep 2023 15:31:32 +0000 Subject: [PATCH 3/5] missing bzip2 package install for miopen --- common/install_miopen.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 27f8a89ab..3aca0038e 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -35,6 +35,9 @@ fi yum remove -y miopen-hip +# MIOpen for rocm 5.7 added bunzip2 as a dependency during its cmake invocation +yum install -y bzip2 + # Function to retry functions that sometimes timeout or have flaky failures retry () { $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) From 858231451f815d194aeb3b2eb6ecaa2ecad1711d Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Mon, 18 Sep 2023 21:58:36 +0000 Subject: [PATCH 4/5] Revert "missing bzip2 package install for miopen" This reverts commit 8ef5fc956508e34315866059431ca015f485f77d. --- common/install_miopen.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 3aca0038e..27f8a89ab 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -35,9 +35,6 @@ fi yum remove -y miopen-hip -# MIOpen for rocm 5.7 added bunzip2 as a dependency during its cmake invocation -yum install -y bzip2 - # Function to retry functions that sometimes timeout or have flaky failures retry () { $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) From 25592a79c2a00325b622c5cac47c244d83753158 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Mon, 18 Sep 2023 22:05:00 +0000 Subject: [PATCH 5/5] ROCm 5.7 MIOpen does not need any patches, do not build from source --- common/install_miopen.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 27f8a89ab..c01517927 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -59,7 +59,8 @@ MIOPEN_CMAKE_COMMON_FLAGS=" " # Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version if [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 50800 ]]; then - MIOPEN_BRANCH="release/rocm-rel-5.7-staging" + echo "ROCm 5.7 MIOpen does not need any patches, do not build from source" + exit 0 elif [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then MIOPEN_BRANCH="release/rocm-rel-5.6-staging" elif [[ $ROCM_INT -ge 50500 ]] && [[ $ROCM_INT -lt 50600 ]]; then