@@ -30,6 +30,7 @@ struct quantize_perf_params {
30
30
bool op_quantize_row_q_reference = false ;
31
31
bool op_quantize_row_q = false ;
32
32
bool op_dequantize_row_q = false ;
33
+ bool op_quantize_row_q_dot = false ;
33
34
bool op_vec_dot_q = false ;
34
35
};
35
36
@@ -147,6 +148,8 @@ int main(int argc, char * argv[]) {
147
148
params.op_quantize_row_q = true ;
148
149
} else if (op == " dequantize_row_q" ) {
149
150
params.op_dequantize_row_q = true ;
151
+ } else if (op == " quantize_row_q_dot" ) {
152
+ params.op_quantize_row_q_dot = true ;
150
153
} else if (op == " vec_dot_q" ) {
151
154
params.op_vec_dot_q = true ;
152
155
} else {
@@ -184,8 +187,8 @@ int main(int argc, char * argv[]) {
184
187
if (params.test_sizes .empty ()) {
185
188
params.test_sizes .push_back (L1_SIZE);
186
189
}
187
- if (!(params.op_quantize_row_q_reference || params.op_quantize_row_q || params.op_dequantize_row_q || params.op_vec_dot_q )) {
188
- params.op_quantize_row_q_reference = params.op_quantize_row_q = params.op_dequantize_row_q = params.op_vec_dot_q = true ;
190
+ if (!(params.op_quantize_row_q_reference || params.op_quantize_row_q || params.op_dequantize_row_q || params.op_quantize_row_q_dot || params. op_vec_dot_q )) {
191
+ params.op_quantize_row_q_reference = params.op_quantize_row_q = params.op_dequantize_row_q = params.op_quantize_row_q_dot = params. op_vec_dot_q = true ;
189
192
}
190
193
191
194
std::sort (params.test_sizes .begin (), params.test_sizes .end ());
@@ -268,6 +271,20 @@ int main(int argc, char * argv[]) {
268
271
printf (" \n " );
269
272
}
270
273
274
+ if (params.op_quantize_row_q_dot ) {
275
+ printf (" quantize_row_q_dot\n " );
276
+ for (size_t size : params.test_sizes ) {
277
+ printf (" %zu values (%.2f MB)\n " , size, 4 *size/(float )(1024 *1024 ));
278
+ auto quantize_fn = [&](void ) {
279
+ qfns.quantize_row_q_dot (test_data1, test_q1, size);
280
+ return test_q1[0 ];
281
+ };
282
+ size_t quantized_size = size / ggml_blck_size (type) * ggml_type_size (type);
283
+ benchmark_function (size, quantized_size, quantize_fn);
284
+ }
285
+ printf (" \n " );
286
+ }
287
+
271
288
if (params.op_vec_dot_q ) {
272
289
printf (" vec_dot_q\n " );
273
290
qfns.quantize_row_q (test_data1, test_q1, largest);
0 commit comments