Skip to content

Commit 49c91b4

Browse files
FFFrogpytorchmergebot
authored andcommitted
[Easy][Building] Fix the warning of int4mm.cu when building (pytorch#151427)
As the title stated. **Changes Before:** ```C++ [999/1526] Building CUDA object caffe2/CMakeFiles/torch_cuda.dir/__/aten/src/ATen/native/cuda/int4mm.cu.o /root/Git.d/pytorch/pytorch/aten/src/ATen/native/cuda/int4mm.cu(142): warning ROCm#177-D: variable "at::native::kWarpSize" was declared but never referenced constexpr int32_t kWarpSize = 32; ^ Remark: The warnings can be suppressed with "-diag-suppress <warning-number>" ``` Pull Request resolved: pytorch#151427 Approved by: https://github.com/Skylion007, https://github.com/malfet
1 parent a05cc9f commit 49c91b4

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

aten/src/ATen/native/cuda/int4mm.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,14 @@ inline __host__ __device__ uint32_t getAlignmentRoundUp(const void* p) {
127127
return diff == 0 ? 0 : uint32_t(Align) - diff;
128128
}
129129

130+
#if defined (__gfx90a__) || defined(__gfx942__)
131+
#define CDNA2_OR_LATER 1
132+
#else
133+
#define CDNA2_OR_LATER 0
134+
#endif
135+
136+
#if (defined(USE_ROCM) && ROCM_VERSION >= 50700) || ((defined(CUDA_VERSION) && CUDA_VERSION >= 12000) && (!defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 800)))
137+
130138
#if defined(USE_ROCM)
131139
// TODO: Support RDNA
132140
constexpr int32_t kWarpSize = 64;
@@ -142,14 +150,6 @@ static bool isCDNA2orLater(int index) {
142150
constexpr int32_t kWarpSize = 32;
143151
#endif
144152

145-
#if defined (__gfx90a__) || defined(__gfx942__)
146-
#define CDNA2_OR_LATER 1
147-
#else
148-
#define CDNA2_OR_LATER 0
149-
#endif
150-
151-
#if (defined(USE_ROCM) && ROCM_VERSION >= 50700) || ((defined(CUDA_VERSION) && CUDA_VERSION >= 12000) && (!defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 800)))
152-
153153
// f16 vector types
154154
struct __align__(2) f16x1 {
155155
__half vals[1];

0 commit comments

Comments
 (0)