Skip to content

Commit def7d70

Browse files
pruthvistonyjithunnair-amd
authored andcommitted
Set thread work size to 4 for elementwise kernels similar to cuda
1 parent d859221 commit def7d70

File tree

2 files changed

+0
-8
lines changed

2 files changed

+0
-8
lines changed

aten/src/ATen/native/cuda/DistributionTemplates.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,7 @@ const uint32_t block_size_bound = 256;
3434
const uint32_t grid_size_bound = 4;
3535
// number of randoms given by distributions like curand_uniform4, curand_uniform2_double
3636
// used in calculating philox offset.
37-
#if defined(USE_ROCM)
38-
const uint32_t curand4_engine_calls = 8;
39-
#else
4037
const uint32_t curand4_engine_calls = 4;
41-
#endif
4238

4339
// utility function that calculates proper philox_offset
4440
// for distributions utilizing TensorIterator. For distributions using

aten/src/ATen/native/cuda/thread_constants.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,5 @@ constexpr uint32_t num_threads() {
1818
}
1919
#endif
2020

21-
#if defined(USE_ROCM)
22-
constexpr int thread_work_size() { return 8; }
23-
#else
2421
constexpr int thread_work_size() { return 4; }
25-
#endif
2622
constexpr int block_work_size() { return thread_work_size() * num_threads(); }

0 commit comments

Comments
 (0)