2
2
#define CONV_IGEMM_FWD_V6R1_DLOPS_NCHW_KCYX_NKHW_HPP
3
3
4
4
#include < numeric>
5
+ #include < sstream>
5
6
6
7
namespace ck {
7
8
namespace driver {
@@ -10,93 +11,97 @@ struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw
10
11
{
11
12
auto GetCompileParameterString () const
12
13
{
14
+ auto param = std::stringstream ();
15
+
13
16
// clang-format off
14
- return
15
- " -DCK_PARAM_ABDataTypeEnum=" +
16
- std::to_string ( ABDataTypeEnum) +
17
- " -DCK_PARAM_AccDataTypeEnum=" +
18
- std::to_string ( AccDataTypeEnum) +
19
- " -DCK_PARAM_CDataTypeEnum=" +
20
- std::to_string ( CDataTypeEnum) +
21
- " -DCK_PARAM_BlockSize=" +
22
- std::to_string ( BlockSize) +
23
- " -DCK_PARAM_GN0=" +
24
- std::to_string ( GN0) +
25
- " -DCK_PARAM_GK1=" +
26
- std::to_string ( GK1) +
27
- " -DCK_PARAM_GM1PerBlockGM11=" +
28
- std::to_string ( GM1PerBlockGM11) +
29
- " -DCK_PARAM_GN1PerBlockGN11=" +
30
- std::to_string ( GN1PerBlockGN11) +
31
- " -DCK_PARAM_GK0PerBlock=" +
32
- std::to_string ( GK0PerBlock) +
33
- " -DCK_PARAM_BM1PerThreadBM11=" +
34
- std::to_string ( BM1PerThreadBM11) +
35
- " -DCK_PARAM_BN1PerThreadBN11=" +
36
- std::to_string ( BN1PerThreadBN11) +
37
- " -DCK_PARAM_BK0PerThread=" +
38
- std::to_string ( BK0PerThread) +
39
- " -DCK_PARAM_BM10BN10ThreadClusterBM10Xs=" +
40
- std::to_string ( BM10BN10ThreadClusterBM10Xs[0 ]) + " ," +
41
- std::to_string ( BM10BN10ThreadClusterBM10Xs[1 ]) +
42
- " -DCK_PARAM_BM10BN10ThreadClusterBN10Xs=" +
43
- std::to_string ( BM10BN10ThreadClusterBN10Xs[0 ]) + " ," +
44
- std::to_string ( BM10BN10ThreadClusterBN10Xs[1 ]) +
45
- " -DCK_PARAM_ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1=" +
46
- std::to_string ( ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[0 ]) + " ," +
47
- std::to_string ( ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[1 ]) + " ," +
48
- std::to_string ( ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[2 ]) + " ," +
49
- std::to_string ( ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[3 ]) + " ," +
50
- std::to_string ( ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[4 ]) +
51
- " -DCK_PARAM_ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1=" +
52
- std::to_string ( ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[0 ]) + " ," +
53
- std::to_string ( ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[1 ]) + " ," +
54
- std::to_string ( ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[2 ]) + " ," +
55
- std::to_string ( ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[3 ]) + " ," +
56
- std::to_string ( ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[4 ]) +
57
- " -DCK_PARAM_ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1=" +
58
- std::to_string ( ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[0 ]) + " ," +
59
- std::to_string ( ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[1 ]) + " ," +
60
- std::to_string ( ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[2 ]) + " ," +
61
- std::to_string ( ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[3 ]) + " ," +
62
- std::to_string ( ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[4 ]) +
63
- " -DCK_PARAM_ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1=" +
64
- std::to_string ( ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[0 ]) + " ," +
65
- std::to_string ( ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[1 ]) + " ," +
66
- std::to_string ( ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[2 ]) + " ," +
67
- std::to_string ( ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[3 ]) + " ," +
68
- std::to_string ( ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[4 ]) +
69
- " -DCK_PARAM_BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1=" +
70
- std::to_string ( BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[0 ]) + " ," +
71
- std::to_string ( BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[1 ]) + " ," +
72
- std::to_string ( BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[2 ]) + " ," +
73
- std::to_string ( BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[3 ]) + " ," +
74
- std::to_string ( BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[4 ]) +
75
- " -DCK_PARAM_BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1=" +
76
- std::to_string ( BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[0 ]) + " ," +
77
- std::to_string ( BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[1 ]) + " ," +
78
- std::to_string ( BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[2 ]) + " ," +
79
- std::to_string ( BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[3 ]) + " ," +
80
- std::to_string ( BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[4 ]) +
81
- " -DCK_PARAM_BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1=" +
82
- std::to_string ( BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[0 ]) + " ," +
83
- std::to_string ( BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[1 ]) + " ," +
84
- std::to_string ( BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[2 ]) + " ," +
85
- std::to_string ( BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[3 ]) + " ," +
86
- std::to_string ( BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[4 ]) +
87
- " -DCK_PARAM_BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1=" +
88
- std::to_string ( BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[0 ]) + " ," +
89
- std::to_string ( BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[1 ]) + " ," +
90
- std::to_string ( BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[2 ]) + " ," +
91
- std::to_string ( BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[3 ]) + " ," +
92
- std::to_string ( BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[4 ]) +
93
- " -DCK_PARAM_CThreadTransferDstScalarPerVector=" +
94
- std::to_string ( CThreadTransferDstScalarPerVector) +
95
- " -DCK_PARAM_HasMainKBlockLoop=" +
96
- std::to_string ( static_cast <int >(HasMainKBlockLoop)) +
97
- " -DCK_PARAM_HasDoubleTailKBlockLoop=" +
98
- std::to_string ( static_cast <int >(HasDoubleTailKBlockLoop) );
17
+ param <<
18
+ " -DCK_PARAM_ABDataTypeEnum=" <<
19
+ ABDataTypeEnum <<
20
+ " -DCK_PARAM_AccDataTypeEnum=" <<
21
+ AccDataTypeEnum <<
22
+ " -DCK_PARAM_CDataTypeEnum=" <<
23
+ CDataTypeEnum <<
24
+ " -DCK_PARAM_BlockSize=" <<
25
+ BlockSize <<
26
+ " -DCK_PARAM_GN0=" <<
27
+ GN0 <<
28
+ " -DCK_PARAM_GK1=" <<
29
+ GK1 <<
30
+ " -DCK_PARAM_GM1PerBlockGM11="
31
+ << GM1PerBlockGM11 <<
32
+ " -DCK_PARAM_GN1PerBlockGN11=" <<
33
+ GN1PerBlockGN11 <<
34
+ " -DCK_PARAM_GK0PerBlock=" <<
35
+ GK0PerBlock <<
36
+ " -DCK_PARAM_BM1PerThreadBM11=" <<
37
+ BM1PerThreadBM11 <<
38
+ " -DCK_PARAM_BN1PerThreadBN11=" <<
39
+ BN1PerThreadBN11 <<
40
+ " -DCK_PARAM_BK0PerThread=" <<
41
+ BK0PerThread <<
42
+ " -DCK_PARAM_BM10BN10ThreadClusterBM10Xs=" <<
43
+ BM10BN10ThreadClusterBM10Xs[0 ] << " ," <<
44
+ BM10BN10ThreadClusterBM10Xs[1 ] <<
45
+ " -DCK_PARAM_BM10BN10ThreadClusterBN10Xs=" <<
46
+ BM10BN10ThreadClusterBN10Xs[0 ] << " ," <<
47
+ BM10BN10ThreadClusterBN10Xs[1 ] <<
48
+ " -DCK_PARAM_ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1=" <<
49
+ ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[0 ] << " ," <<
50
+ ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[1 ] << " ," <<
51
+ ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[2 ] << " ," <<
52
+ ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[3 ] << " ," <<
53
+ ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[4 ] <<
54
+ " -DCK_PARAM_ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1=" <<
55
+ ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[0 ] << " ," <<
56
+ ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[1 ] << " ," <<
57
+ ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[2 ] << " ," <<
58
+ ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[3 ] << " ," <<
59
+ ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[4 ] <<
60
+ " -DCK_PARAM_ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1=" <<
61
+ ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[0 ] << " ," <<
62
+ ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[1 ] << " ," <<
63
+ ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[2 ] << " ," <<
64
+ ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[3 ] << " ," <<
65
+ ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[4 ] <<
66
+ " -DCK_PARAM_ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1=" <<
67
+ ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[0 ] << " ," <<
68
+ ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[1 ] << " ," <<
69
+ ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[2 ] << " ," <<
70
+ ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[3 ] << " ," <<
71
+ ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[4 ] <<
72
+ " -DCK_PARAM_BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1=" <<
73
+ BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[0 ] << " ," <<
74
+ BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[1 ] << " ," <<
75
+ BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[2 ] << " ," <<
76
+ BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[3 ] << " ," <<
77
+ BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[4 ] <<
78
+ " -DCK_PARAM_BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1=" <<
79
+ BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[0 ] << " ," <<
80
+ BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[1 ] << " ," <<
81
+ BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[2 ] << " ," <<
82
+ BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[3 ] << " ," <<
83
+ BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[4 ] <<
84
+ " -DCK_PARAM_BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1=" <<
85
+ BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[0 ] << " ," <<
86
+ BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[1 ] << " ," <<
87
+ BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[2 ] << " ," <<
88
+ BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[3 ] << " ," <<
89
+ BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[4 ] <<
90
+ " -DCK_PARAM_BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1=" <<
91
+ BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[0 ] << " ," <<
92
+ BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[1 ] << " ," <<
93
+ BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[2 ] << " ," <<
94
+ BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[3 ] << " ," <<
95
+ BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[4 ] <<
96
+ " -DCK_PARAM_CThreadTransferDstScalarPerVector=" <<
97
+ CThreadTransferDstScalarPerVector <<
98
+ " -DCK_PARAM_HasMainKBlockLoop=" <<
99
+ static_cast <int >(HasMainKBlockLoop) <<
100
+ " -DCK_PARAM_HasDoubleTailKBlockLoop=" <<
101
+ static_cast <int >(HasDoubleTailKBlockLoop);
99
102
// clang-format on
103
+
104
+ return param.str ();
100
105
}
101
106
102
107
ck::DataTypeEnum_t ABDataTypeEnum = ck::DataTypeEnum_t::Unknown;
0 commit comments