Skip to content

Commit d184289

Browse files
author
Chao Liu
committed
tidy
1 parent f885c13 commit d184289

8 files changed

+21
-44
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ link_libraries(${OpenMP_pthread_LIBRARY})
3838
find_package(HIP REQUIRED)
3939
message(STATUS "Build with HIP ${hip_VERSION}")
4040

41+
## half
42+
#find_path(HALF_INCLUDE_DIR half.hpp)
43+
#message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")
44+
4145
## tidy
4246
include(EnableCompilerWarnings)
4347
set(MIOPEN_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name)

composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2r3.hpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
203203
__host__ __device__ static constexpr auto
204204
MakeCM0M1M2NGridDescriptor(const CMNGridDesc& c_m_n_grid_desc)
205205
{
206-
const auto M = c_m_n_grid_desc.GetLength(I0);
207-
const auto N = c_m_n_grid_desc.GetLength(I1);
208-
209206
constexpr auto xdlops_gemm = XdlopsGemm<FloatAB, MPerWave, NPerWave, K1>{};
210207

211208
constexpr auto CLayout = xdlops_gemm.GetCLayout();
@@ -217,7 +214,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
217214
constexpr index_t MWaves = MPerBlock / (MPerWave * MRepeat);
218215
constexpr index_t NWaves = NPerBlock / (NPerWave * NRepeat);
219216

220-
constexpr auto N0 = Number<CLayout.N1()>{};
221217
constexpr auto N1 = Number<CLayout.N0()>{};
222218

223219
const auto c_m0_m1_m2_n_grid_desc = transform_dynamic_tensor_descriptor(
@@ -277,8 +273,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
277273
p_c_grid, c_m0_m1_m2_n_grid_desc.GetElementSpaceSize());
278274

279275
const auto K0 = a_k0_m_k1_grid_desc.GetLength(I0);
280-
const auto M = a_k0_m_k1_grid_desc.GetLength(I1);
281-
const auto N = b_k0_n_k1_grid_desc.GetLength(I1);
282276

283277
// divide block work by [M, N]
284278
const auto block_work_idx =

host/driver_offline/include/device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyxc_nhwk.hpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,6 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyx
3535
constexpr auto I1 = Number<1>{};
3636
constexpr auto I2 = Number<2>{};
3737
constexpr auto I3 = Number<3>{};
38-
constexpr auto I4 = Number<4>{};
39-
constexpr auto I5 = Number<5>{};
40-
constexpr auto I6 = Number<6>{};
41-
constexpr auto I7 = Number<7>{};
42-
constexpr auto I8 = Number<8>{};
4338

4439
DeviceMem in_n_hi_wi_c_device_buf(sizeof(TInWei) * in_n_hi_wi_c.mDesc.GetElementSpace());
4540
DeviceMem wei_k_y_x_c_device_buf(sizeof(TInWei) * wei_k_y_x_c.mDesc.GetElementSpace());
@@ -319,16 +314,13 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyx
319314
const auto K = out_n_ho_wo_k_lengths[I3];
320315
const auto C = wei_k_y_x_c_lengths[I3];
321316

322-
const auto Hi = in_n_hi_wi_c_lengths[I1];
323-
const auto Wi = in_n_hi_wi_c_lengths[I2];
324-
325317
const auto Ho = out_n_ho_wo_k_lengths[I1];
326318
const auto Wo = out_n_ho_wo_k_lengths[I2];
327319

328320
const auto Y = wei_k_y_x_c_lengths[I1];
329321
const auto X = wei_k_y_x_c_lengths[I2];
330322

331-
float perf = (float)(std::size_t(2) * N * K * Ho * Wo * C * Y * X) /
323+
float perf = static_cast<float>((std::size_t(2) * N * K * Ho * Wo * C * Y * X)) /
332324
(std::size_t(1000) * 1000 * 1000) / ave_time;
333325

334326
std::cout << "Average time : " << ave_time << " ms, " << perf << " TFlop/s"

host/driver_offline/include/device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_kyxc_nhwk.hpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,6 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_k
3535
constexpr auto I1 = Number<1>{};
3636
constexpr auto I2 = Number<2>{};
3737
constexpr auto I3 = Number<3>{};
38-
constexpr auto I4 = Number<4>{};
39-
constexpr auto I5 = Number<5>{};
40-
constexpr auto I6 = Number<6>{};
41-
constexpr auto I7 = Number<7>{};
42-
constexpr auto I8 = Number<8>{};
4338

4439
DeviceMem in_n_hi_wi_c_device_buf(sizeof(TInWei) * in_n_hi_wi_c.mDesc.GetElementSpace());
4540
DeviceMem wei_k_y_x_c_device_buf(sizeof(TInWei) * wei_k_y_x_c.mDesc.GetElementSpace());
@@ -304,7 +299,7 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_k
304299
const auto Y = wei_k_y_x_c_lengths[I1];
305300
const auto X = wei_k_y_x_c_lengths[I2];
306301

307-
float perf = (float)(std::size_t(2) * N * K * Ho * Wo * C * Y * X) /
302+
float perf = static_cast<float>((std::size_t(2) * N * K * Ho * Wo * C * Y * X)) /
308303
(std::size_t(1000) * 1000 * 1000) / ave_time;
309304

310305
std::cout << "Average time : " << ave_time << " ms, " << perf << " TFlop/s"

host/driver_offline/src/conv_bwd_driver_offline.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,8 +277,6 @@ int main(int argc, char* argv[])
277277
in_right_pads_dev);
278278
};
279279

280-
const auto nhwc_desc = f_make_for_device_nhwc();
281-
282280
#if USE_CONV_BWD_V4R1_XDL_NHWC
283281
if(algo == ConvBackwardDataAlgo::V4R1XDLNHWC)
284282
{

host/driver_offline/src/conv_fwd_driver_offline.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@
2020
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp"
2121

2222
#define USE_DYNAMIC_MODE 1
23-
#define USE_CONV_FWD_V4R4_NCHW 1
24-
#define USE_CONV_FWD_V4R4R2_NHWC 1
25-
#define USE_CONV_FWD_V6R1_NCHW 1
23+
#define USE_CONV_FWD_V4R4_NCHW 0
24+
#define USE_CONV_FWD_V4R4R2_NHWC 0
25+
#define USE_CONV_FWD_V6R1_NCHW 0
2626
#define USE_CONV_FWD_V5R1_NCHW 0
2727
#define USE_CONV_FWD_V4R4R2_XDL_NCHW 0
28-
#define USE_CONV_FWD_V4R4R4_XDL_NHWC 0
28+
#define USE_CONV_FWD_V4R4R4_XDL_NHWC 1
2929

3030
enum ConvForwardAlgo
3131
{

host/host_tensor/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ set(HOST_TENSOR_SOURCE
1010
## the library target
1111
add_library(host_tensor SHARED ${HOST_TENSOR_SOURCE})
1212

13+
#target_include_directories(host_tensor SYSTEM PUBLIC $<BUILD_INTERFACE:${HALF_INCLUDE_DIR}>)
14+
1315
target_link_libraries(host_tensor PRIVATE hip::device)
1416
target_link_libraries(host_tensor INTERFACE hip::host)
1517

host/host_tensor/include/host_conv_bwd_data.hpp

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
1414
const ConvStrides& conv_strides,
1515
const ConvDilations& conv_dilations,
1616
const InLeftPads& in_left_pads,
17-
const InRightPads& in_right_pads,
17+
const InRightPads& /* in_right_pads */,
1818
const ConvTensorLayout layout = ConvTensorLayout::NCHW)
1919
{
2020
using namespace ck;
@@ -25,11 +25,6 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
2525
constexpr auto I3 = Number<3>{};
2626

2727
auto f_nchw = [&](auto n, auto c, auto hi, auto wi) {
28-
std::size_t N = in.mDesc.GetLengths()[I0];
29-
std::size_t C = in.mDesc.GetLengths()[I1];
30-
std::size_t Hi = in.mDesc.GetLengths()[I2];
31-
std::size_t Wi = in.mDesc.GetLengths()[I3];
32-
3328
std::size_t K = wei.mDesc.GetLengths()[I0];
3429
std::size_t Y = wei.mDesc.GetLengths()[I2];
3530
std::size_t X = wei.mDesc.GetLengths()[I3];
@@ -74,11 +69,6 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
7469
};
7570

7671
auto f_nhwc = [&](auto n, auto hi, auto wi, auto c) {
77-
std::size_t N = in.mDesc.GetLengths()[I0];
78-
std::size_t Hi = in.mDesc.GetLengths()[I1];
79-
std::size_t Wi = in.mDesc.GetLengths()[I2];
80-
std::size_t C = in.mDesc.GetLengths()[I3];
81-
8272
std::size_t K = wei.mDesc.GetLengths()[I0];
8373
std::size_t Y = wei.mDesc.GetLengths()[I1];
8474
std::size_t X = wei.mDesc.GetLengths()[I2];
@@ -122,22 +112,24 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
122112
in(n, hi, wi, c) = v;
123113
};
124114

125-
switch(layout)
115+
if(layout == ConvTensorLayout::NCHW)
126116
{
127-
case ConvTensorLayout::NCHW:
128117
make_ParallelTensorFunctor(f_nchw,
129118
in.mDesc.GetLengths()[0],
130119
in.mDesc.GetLengths()[1],
131120
in.mDesc.GetLengths()[2],
132121
in.mDesc.GetLengths()[3])(std::thread::hardware_concurrency());
133-
break;
134-
case ConvTensorLayout::NHWC:
122+
}
123+
else if(layout == ConvTensorLayout::NHWC)
124+
{
135125
make_ParallelTensorFunctor(f_nhwc,
136126
in.mDesc.GetLengths()[0],
137127
in.mDesc.GetLengths()[1],
138128
in.mDesc.GetLengths()[2],
139129
in.mDesc.GetLengths()[3])(std::thread::hardware_concurrency());
140-
break;
141-
default: throw std::runtime_error("wrong! not supported layout");
130+
}
131+
else
132+
{
133+
throw std::runtime_error("wrong! not supported layout");
142134
}
143135
}

0 commit comments

Comments
 (0)