Skip to content

Commit f885c13

Browse files
author
Chao Liu
committed
tidy
1 parent 80120f0 commit f885c13

11 files changed

+90
-147
lines changed

composable_kernel/include/utility/amd_address_space.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ __device__ T* cast_pointer_to_generic_address_space(T CONSTANT* p)
2020
return (T*)p;
2121
}
2222

23+
template <typename T>
24+
__host__ __device__ T CONSTANT* cast_pointer_to_constant_address_space(T* p)
25+
{
26+
return (T CONSTANT*)p;
27+
}
28+
2329
} // namespace ck
2430

2531
#endif

composable_kernel/include/utility/print.hpp

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -11,59 +11,11 @@ namespace ck {
1111
template <typename T>
1212
__host__ __device__ void print_array(const char* s, T a)
1313
{
14-
using data_type = decltype(a.At(Number<0>{}));
1514
constexpr index_t nsize = a.Size();
1615

17-
#if 0
18-
if constexpr(is_same<data_type, uint32_t>{})
19-
{
20-
printf("%s size %u, {", s, nsize);
21-
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("%u, ", uint32_t{a[i]}); });
22-
printf("}\n");
23-
}
24-
else if constexpr(is_same<data_type, int32_t>{})
25-
{
26-
printf("%s size %d, {", s, nsize);
27-
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("%d, ", int32_t{a[i]}); });
28-
printf("}\n");
29-
}
30-
else if constexpr(is_same<data_type, bool>{})
31-
{
32-
printf("%s size %d, {", s, nsize);
33-
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("%d, ", bool{a[i]}); });
34-
printf("}\n");
35-
}
36-
#else
3716
printf("%s size %d, {", s, nsize);
3817
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("%d, ", int32_t{a[i]}); });
3918
printf("}\n");
40-
#endif
41-
}
42-
43-
template <typename T>
44-
__host__ __device__ void print_array_v2(const char* s, T a)
45-
{
46-
using data_type = decltype(a.At(Number<0>{}));
47-
constexpr index_t nsize = a.Size();
48-
49-
#if 0
50-
if constexpr(is_same<data_type, uint32_t>{})
51-
{
52-
printf("%s size %u, {", s, nsize);
53-
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("[%u] %u, ", i.value, a[i]); });
54-
printf("}\n");
55-
}
56-
else if constexpr(is_same<data_type, int32_t>{})
57-
{
58-
printf("%s size %d, {", s, nsize);
59-
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("[%d] %d, ", i.value, a[i]); });
60-
printf("}\n");
61-
}
62-
#else
63-
printf("%s size %d, {", s, nsize);
64-
static_for<0, nsize, 1>{}([&a](auto i) constexpr { printf("[%d] %d, ", i.value, a[i]); });
65-
printf("}\n");
66-
#endif
6719
}
6820

6921
} // namespace ck

host/driver_offline/include/device_dynamic_convolution_forward_implicit_gemm_v4r4r2_dlops_nhwc_kyxc_nhwk.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,9 +257,6 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4r2_dlops_nhwc_kyxc_nhw
257257
const auto K = out_n_ho_wo_k_lengths[I3];
258258
const auto C = wei_k_y_x_c_lengths[I3];
259259

260-
const auto Hi = in_n_hi_wi_c_lengths[I1];
261-
const auto Wi = in_n_hi_wi_c_lengths[I2];
262-
263260
const auto Ho = out_n_ho_wo_k_lengths[I1];
264261
const auto Wo = out_n_ho_wo_k_lengths[I2];
265262

host/driver_offline/include/driver_dynamic_contraction_dlops_v1r2.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,6 @@ driver_dynamic_contraction_dlops_v1r2(const FloatAB* p_a_grid,
194194
dim3(grid_size),
195195
dim3(BlockSize),
196196
0,
197-
0,
198197
p_a_grid,
199198
p_b_grid,
200199
p_c_grid,
@@ -221,7 +220,6 @@ driver_dynamic_contraction_dlops_v1r2(const FloatAB* p_a_grid,
221220
dim3(grid_size),
222221
dim3(BlockSize),
223222
0,
224-
0,
225223
p_a_grid,
226224
p_b_grid,
227225
p_c_grid,
@@ -248,7 +246,6 @@ driver_dynamic_contraction_dlops_v1r2(const FloatAB* p_a_grid,
248246
dim3(grid_size),
249247
dim3(BlockSize),
250248
0,
251-
0,
252249
p_a_grid,
253250
p_b_grid,
254251
p_c_grid,
@@ -275,7 +272,6 @@ driver_dynamic_contraction_dlops_v1r2(const FloatAB* p_a_grid,
275272
dim3(grid_size),
276273
dim3(BlockSize),
277274
0,
278-
0,
279275
p_a_grid,
280276
p_b_grid,
281277
p_c_grid,

host/driver_offline/include/driver_dynamic_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,6 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_pad
244244
dim3(GridSize),
245245
dim3(BlockSize),
246246
0,
247-
0,
248247
wei_e_k_global_desc,
249248
p_wei_global,
250249
in_e_n_ho_wo_global_desc,
@@ -270,7 +269,6 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_pad
270269
dim3(GridSize),
271270
dim3(BlockSize),
272271
0,
273-
0,
274272
wei_e_k_global_desc,
275273
p_wei_global,
276274
in_e_n_ho_wo_global_desc,
@@ -296,7 +294,6 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_pad
296294
dim3(GridSize),
297295
dim3(BlockSize),
298296
0,
299-
0,
300297
wei_e_k_global_desc,
301298
p_wei_global,
302299
in_e_n_ho_wo_global_desc,
@@ -322,7 +319,6 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_pad
322319
dim3(GridSize),
323320
dim3(BlockSize),
324321
0,
325-
0,
326322
wei_e_k_global_desc,
327323
p_wei_global,
328324
in_e_n_ho_wo_global_desc,

host/driver_offline/include/driver_dynamic_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw_outpad.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,6 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
257257
dim3(GridSize),
258258
dim3(BlockSize),
259259
0,
260-
0,
261260
wei_e_k_global_desc,
262261
p_wei_global,
263262
in_e_n_ho_wo_global_desc,
@@ -284,7 +283,6 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
284283
dim3(GridSize),
285284
dim3(BlockSize),
286285
0,
287-
0,
288286
wei_e_k_global_desc,
289287
p_wei_global,
290288
in_e_n_ho_wo_global_desc,
@@ -311,7 +309,6 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
311309
dim3(GridSize),
312310
dim3(BlockSize),
313311
0,
314-
0,
315312
wei_e_k_global_desc,
316313
p_wei_global,
317314
in_e_n_ho_wo_global_desc,
@@ -338,7 +335,6 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
338335
dim3(GridSize),
339336
dim3(BlockSize),
340337
0,
341-
0,
342338
wei_e_k_global_desc,
343339
p_wei_global,
344340
in_e_n_ho_wo_global_desc,

host/driver_offline/include/driver_dynamic_gemm_dlops_v1r2.hpp

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,6 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
189189
dim3(grid_size),
190190
dim3(BlockSize),
191191
0,
192-
0,
193192
p_a_grid,
194193
p_b_grid,
195194
p_c_grid,
@@ -216,7 +215,6 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
216215
dim3(grid_size),
217216
dim3(BlockSize),
218217
0,
219-
0,
220218
p_a_grid,
221219
p_b_grid,
222220
p_c_grid,
@@ -243,7 +241,6 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
243241
dim3(grid_size),
244242
dim3(BlockSize),
245243
0,
246-
0,
247244
p_a_grid,
248245
p_b_grid,
249246
p_c_grid,
@@ -270,7 +267,6 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
270267
dim3(grid_size),
271268
dim3(BlockSize),
272269
0,
273-
0,
274270
p_a_grid,
275271
p_b_grid,
276272
p_c_grid,
@@ -315,14 +311,15 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
315311
dim3(grid_size),
316312
dim3(BlockSize),
317313
0,
318-
0,
319314
p_a_grid,
320315
p_b_grid,
321316
p_c_grid,
322-
(void CONSTANT*)a_k_m0_m1_grid_desc_dev_buf.GetDeviceBuffer(),
323-
(void CONSTANT*)b_k_n0_n1_grid_desc_dev_buf.GetDeviceBuffer(),
324-
(void CONSTANT*)c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf.GetDeviceBuffer(),
325-
(void CONSTANT*)c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf.GetDeviceBuffer());
317+
cast_pointer_to_constant_address_space(a_k_m0_m1_grid_desc_dev_buf.GetDeviceBuffer()),
318+
cast_pointer_to_constant_address_space(b_k_n0_n1_grid_desc_dev_buf.GetDeviceBuffer()),
319+
cast_pointer_to_constant_address_space(
320+
c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf.GetDeviceBuffer()),
321+
cast_pointer_to_constant_address_space(
322+
c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf.GetDeviceBuffer()));
326323
}
327324
else if(has_main_k_block_loop && !has_double_tail_k_block_loop)
328325
{
@@ -343,14 +340,15 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
343340
dim3(grid_size),
344341
dim3(BlockSize),
345342
0,
346-
0,
347343
p_a_grid,
348344
p_b_grid,
349345
p_c_grid,
350-
(void CONSTANT*)a_k_m0_m1_grid_desc_dev_buf.GetDeviceBuffer(),
351-
(void CONSTANT*)b_k_n0_n1_grid_desc_dev_buf.GetDeviceBuffer(),
352-
(void CONSTANT*)c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf.GetDeviceBuffer(),
353-
(void CONSTANT*)c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf.GetDeviceBuffer());
346+
cast_pointer_to_constant_address_space(a_k_m0_m1_grid_desc_dev_buf.GetDeviceBuffer()),
347+
cast_pointer_to_constant_address_space(b_k_n0_n1_grid_desc_dev_buf.GetDeviceBuffer()),
348+
cast_pointer_to_constant_address_space(
349+
c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf.GetDeviceBuffer()),
350+
cast_pointer_to_constant_address_space(
351+
c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf.GetDeviceBuffer()));
354352
}
355353
else if(!has_main_k_block_loop && has_double_tail_k_block_loop)
356354
{
@@ -371,14 +369,15 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
371369
dim3(grid_size),
372370
dim3(BlockSize),
373371
0,
374-
0,
375372
p_a_grid,
376373
p_b_grid,
377374
p_c_grid,
378-
(void CONSTANT*)a_k_m0_m1_grid_desc_dev_buf.GetDeviceBuffer(),
379-
(void CONSTANT*)b_k_n0_n1_grid_desc_dev_buf.GetDeviceBuffer(),
380-
(void CONSTANT*)c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf.GetDeviceBuffer(),
381-
(void CONSTANT*)c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf.GetDeviceBuffer());
375+
cast_pointer_to_constant_address_space(a_k_m0_m1_grid_desc_dev_buf.GetDeviceBuffer()),
376+
cast_pointer_to_constant_address_space(b_k_n0_n1_grid_desc_dev_buf.GetDeviceBuffer()),
377+
cast_pointer_to_constant_address_space(
378+
c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf.GetDeviceBuffer()),
379+
cast_pointer_to_constant_address_space(
380+
c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf.GetDeviceBuffer()));
382381
}
383382
else
384383
{
@@ -399,14 +398,15 @@ __host__ float driver_dynamic_gemm_dlops_v1r2(const FloatAB* p_a_grid,
399398
dim3(grid_size),
400399
dim3(BlockSize),
401400
0,
402-
0,
403401
p_a_grid,
404402
p_b_grid,
405403
p_c_grid,
406-
(void CONSTANT*)a_k_m0_m1_grid_desc_dev_buf.GetDeviceBuffer(),
407-
(void CONSTANT*)b_k_n0_n1_grid_desc_dev_buf.GetDeviceBuffer(),
408-
(void CONSTANT*)c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf.GetDeviceBuffer(),
409-
(void CONSTANT*)c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf.GetDeviceBuffer());
404+
cast_pointer_to_constant_address_space(a_k_m0_m1_grid_desc_dev_buf.GetDeviceBuffer()),
405+
cast_pointer_to_constant_address_space(b_k_n0_n1_grid_desc_dev_buf.GetDeviceBuffer()),
406+
cast_pointer_to_constant_address_space(
407+
c_m0_m10_m11_n0_n10_n11_grid_desc_dev_buf.GetDeviceBuffer()),
408+
cast_pointer_to_constant_address_space(
409+
c_blockid_to_m0_n0_block_cluster_adaptor_dev_buf.GetDeviceBuffer()));
410410
}
411411

412412
return ave_time;

0 commit comments

Comments
 (0)