Skip to content

Commit b5cc4eb

Browse files
authored
Merge pull request #198 from iotamudelta/lookup_bag
Correct the warp size for current AMD GPUs.
2 parents 9d220e0 + cd29f3b commit b5cc4eb

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

aten/src/THCUNN/LookupTableBag.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515
#include "THCHalfAutoNumerics.cuh"
1616
#include "THCTensorSort.cuh"
1717

18+
#if defined(__HIP_PLATFORM_HCC__)
19+
const int WARP_SIZE = 64;
20+
#else
1821
const int WARP_SIZE = 32;
22+
#endif
1923
const int MODE_SUM = 0;
2024
const int MODE_MEAN = 1;
2125

cmake/public/LoadHIP.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ ENDIF()
4747

4848
# ROCFFT_PATH
4949
IF(NOT DEFINED ENV{ROCFFT_PATH})
50-
SET(ROCBLAS_PATH ${ROCM_PATH}/rocfft)
50+
SET(ROCFFT_PATH ${ROCM_PATH}/rocfft)
5151
ELSE()
5252
SET(ROCFFT_PATH $ENV{ROCFFT_PATH})
5353
ENDIF()

0 commit comments

Comments
 (0)