From 4a5e2b0ff83167ea4c9503d6b18dffdba3c562aa Mon Sep 17 00:00:00 2001 From: liligwu Date: Wed, 18 May 2022 22:37:14 +0000 Subject: [PATCH] Deactive ncclAllToAll since degradation was observed on a Hayabusa system --- torch/csrc/cuda/nccl.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/torch/csrc/cuda/nccl.cpp b/torch/csrc/cuda/nccl.cpp index 2e6fc08ed503eb..daae91e06a1346 100644 --- a/torch/csrc/cuda/nccl.cpp +++ b/torch/csrc/cuda/nccl.cpp @@ -652,9 +652,6 @@ void all2all_single_equal_split(at::Tensor& input, const auto* sendbuff = reinterpret_cast(input.data_ptr()); auto* recvbuff = reinterpret_cast(output.data_ptr()); auto comm = to_nccl_comm(_comm); -#if defined(USE_ROCM) && ROCM_VERSION >= 50000 - NCCL_CHECK(ncclAllToAll(sendbuff , recvbuff , count, type, comm, stream)); -#else NCCL_CHECK(ncclCommCount(comm, &numranks)); NCCL_CHECK(ncclGroupStart()); for(const auto r : c10::irange(numranks)) { @@ -666,7 +663,6 @@ void all2all_single_equal_split(at::Tensor& input, } } NCCL_CHECK(ncclGroupEnd()); -#endif #else AT_ERROR("all2all is only supported for NCCL lib version >= 2.7.0"); #endif