From 6c96ac3d28d6273cf5f686c4caf2d090d47e36db Mon Sep 17 00:00:00 2001 From: Stephen Jia Date: Wed, 30 Apr 2025 11:39:24 -0700 Subject: [PATCH] [ET-VK][ez] Use standard quant naming scheme for quantized ops ## Context Use standard naming scheme for quantized operators introduced in the previous PR. For weight only quantized linear operators, the names introduced are: `linear_qcsnw`: * q - quantized * c - per-channel / channelswise * s - symmetric * n - number of bits (qcs4w for 4-bit quant, qcs8w for 8-bit quant) * w - weight quantized `linear_qga4w`: * q - quantized * g - per-group / groupwise * a - affine * 4 - quantized to 4 bits * w - weight quantized ## Changes Rename instances of `q_8w_linear` to `linear_qcs8w` or `linear_qcsnw`. Rename instances of `q_4w_linear` to `linear_qga4w`. Rename cpp files to match the new naming convention. Differential Revision: [D73941992](https://our.internmc.facebook.com/intern/diff/D73941992/) [ghstack-poisoned] --- .../{q_8w_linear.glsl => linear_qcsnw.glsl} | 0 .../{q_8w_linear.yaml => linear_qcsnw.yaml} | 6 ++-- ...inear_coop.glsl => linear_qcsnw_coop.glsl} | 0 ...inear_coop.yaml => linear_qcsnw_coop.yaml} | 8 ++--- ...ear_tiled.glsl => linear_qcsnw_tiled.glsl} | 0 ...ear_tiled.yaml => linear_qcsnw_tiled.yaml} | 8 ++--- ...inear_coop.glsl => linear_qga4w_coop.glsl} | 0 ...inear_coop.yaml => linear_qga4w_coop.yaml} | 8 ++--- ...ear_tiled.glsl => linear_qga4w_tiled.glsl} | 0 ...ear_tiled.yaml => linear_qga4w_tiled.yaml} | 8 ++--- ...inearInt8.cpp => QuantizedLinearQCSNW.cpp} | 22 +++++++------- ...pwiseInt4.cpp => QuantizedLinearQGANW.cpp} | 14 ++++----- .../test/op_tests/linear_weight_int4_test.cpp | 30 +++++++++---------- 13 files changed, 52 insertions(+), 52 deletions(-) rename backends/vulkan/runtime/graph/ops/glsl/{q_8w_linear.glsl => linear_qcsnw.glsl} (100%) rename backends/vulkan/runtime/graph/ops/glsl/{q_8w_linear.yaml => linear_qcsnw.yaml} (84%) rename backends/vulkan/runtime/graph/ops/glsl/{q_8w_linear_coop.glsl => linear_qcsnw_coop.glsl} (100%) rename backends/vulkan/runtime/graph/ops/glsl/{q_8w_linear_coop.yaml => linear_qcsnw_coop.yaml} (74%) rename backends/vulkan/runtime/graph/ops/glsl/{q_8w_linear_tiled.glsl => linear_qcsnw_tiled.glsl} (100%) rename backends/vulkan/runtime/graph/ops/glsl/{q_8w_linear_tiled.yaml => linear_qcsnw_tiled.yaml} (75%) rename backends/vulkan/runtime/graph/ops/glsl/{q_4w_linear_coop.glsl => linear_qga4w_coop.glsl} (100%) rename backends/vulkan/runtime/graph/ops/glsl/{q_4w_linear_coop.yaml => linear_qga4w_coop.yaml} (72%) rename backends/vulkan/runtime/graph/ops/glsl/{q_4w_linear_tiled.glsl => linear_qga4w_tiled.glsl} (100%) rename backends/vulkan/runtime/graph/ops/glsl/{q_4w_linear_tiled.yaml => linear_qga4w_tiled.yaml} (72%) rename backends/vulkan/runtime/graph/ops/impl/{QuantizedLinearInt8.cpp => QuantizedLinearQCSNW.cpp} (95%) rename backends/vulkan/runtime/graph/ops/impl/{QuantizedLinearGroupwiseInt4.cpp => QuantizedLinearQGANW.cpp} (96%) diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw.glsl similarity index 100% rename from backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.glsl rename to backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw.glsl diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.yaml b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw.yaml similarity index 84% rename from backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.yaml rename to backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw.yaml index a7fdcf1c9fd..800007406f0 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw.yaml @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -q_8w_linear: +linear_qcsnw: parameter_names_with_default_values: DTYPE: float STORAGE: texture3d @@ -18,6 +18,6 @@ q_8w_linear: - VALUE: texture3d - VALUE: buffer shader_variants: - - NAME: q_8w_linear_W_packed_W_packed - - NAME: q_8w_linear_W_packed_H_packed + - NAME: linear_qcs8w_W_packed_W_packed + - NAME: linear_qcs8w_W_packed_H_packed MAT2_PACKING: H_packed diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_coop.glsl b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.glsl similarity index 100% rename from backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_coop.glsl rename to backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.glsl diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_coop.yaml b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.yaml similarity index 74% rename from backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_coop.yaml rename to backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.yaml index 5daf28132e6..e0477a3a3d1 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_coop.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.yaml @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -q_8w_linear_coop: +linear_qcsnw_coop: parameter_names_with_default_values: DTYPE: float IN_STORAGE: texture3d @@ -17,11 +17,11 @@ q_8w_linear_coop: - VALUE: 1 SUFFIX: o4x1 shader_variants: - - NAME: q_8w_linear_coop_texture3d_texture3d_texture2d_texture2d_float - - NAME: q_8w_linear_coop_buffer_buffer_texture2d_texture2d_float + - NAME: linear_qcs8w_coop_texture3d_texture3d_texture2d_texture2d_float + - NAME: linear_qcs8w_coop_buffer_buffer_texture2d_texture2d_float IN_STORAGE: buffer OUT_STORAGE: buffer - - NAME: q_8w_linear_coop_buffer_buffer_buffer_buffer_float + - NAME: linear_qcs8w_coop_buffer_buffer_buffer_buffer_float IN_STORAGE: buffer OUT_STORAGE: buffer WEIGHT_STORAGE: buffer diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_tiled.glsl b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.glsl similarity index 100% rename from backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_tiled.glsl rename to backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.glsl diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_tiled.yaml b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.yaml similarity index 75% rename from backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_tiled.yaml rename to backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.yaml index 941836b48c4..f9f0134d995 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/q_8w_linear_tiled.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.yaml @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -q_8w_linear_tiled: +linear_qcsnw_tiled: parameter_names_with_default_values: DTYPE: float IN_STORAGE: texture3d @@ -21,11 +21,11 @@ q_8w_linear_tiled: - VALUE: 4 SUFFIX: o4x4 shader_variants: - - NAME: q_8w_linear_tiled_texture3d_texture3d_texture2d_texture2d_float - - NAME: q_8w_linear_tiled_buffer_buffer_texture2d_texture2d_float + - NAME: linear_qcs8w_tiled_texture3d_texture3d_texture2d_texture2d_float + - NAME: linear_qcs8w_tiled_buffer_buffer_texture2d_texture2d_float IN_STORAGE: buffer OUT_STORAGE: buffer - - NAME: q_8w_linear_tiled_buffer_buffer_buffer_buffer_float + - NAME: linear_qcs8w_tiled_buffer_buffer_buffer_buffer_float IN_STORAGE: buffer OUT_STORAGE: buffer WEIGHT_STORAGE: buffer diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear_coop.glsl b/backends/vulkan/runtime/graph/ops/glsl/linear_qga4w_coop.glsl similarity index 100% rename from backends/vulkan/runtime/graph/ops/glsl/q_4w_linear_coop.glsl rename to backends/vulkan/runtime/graph/ops/glsl/linear_qga4w_coop.glsl diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear_coop.yaml b/backends/vulkan/runtime/graph/ops/glsl/linear_qga4w_coop.yaml similarity index 72% rename from backends/vulkan/runtime/graph/ops/glsl/q_4w_linear_coop.yaml rename to backends/vulkan/runtime/graph/ops/glsl/linear_qga4w_coop.yaml index 504cc4ab3b1..25ffe94f430 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear_coop.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/linear_qga4w_coop.yaml @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -q_4w_linear_coop: +linear_qga4w_coop: parameter_names_with_default_values: DTYPE: float OUT_STORAGE: texture3d @@ -13,11 +13,11 @@ q_4w_linear_coop: PARAMS_STORAGE: buffer TILE_ROWS: 1 shader_variants: - - NAME: q_4w_linear_coop_texture3d_texture3d_texture2d_float - - NAME: q_4w_linear_coop_buffer_buffer_texture2d_float + - NAME: linear_qga4w_coop_texture3d_texture3d_texture2d_float + - NAME: linear_qga4w_coop_buffer_buffer_texture2d_float OUT_STORAGE: buffer IN_STORAGE: buffer - - NAME: q_4w_linear_coop_buffer_buffer_buffer_float + - NAME: linear_qga4w_coop_buffer_buffer_buffer_float OUT_STORAGE: buffer IN_STORAGE: buffer WEIGHT_STORAGE: buffer diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear_tiled.glsl b/backends/vulkan/runtime/graph/ops/glsl/linear_qga4w_tiled.glsl similarity index 100% rename from backends/vulkan/runtime/graph/ops/glsl/q_4w_linear_tiled.glsl rename to backends/vulkan/runtime/graph/ops/glsl/linear_qga4w_tiled.glsl diff --git a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear_tiled.yaml b/backends/vulkan/runtime/graph/ops/glsl/linear_qga4w_tiled.yaml similarity index 72% rename from backends/vulkan/runtime/graph/ops/glsl/q_4w_linear_tiled.yaml rename to backends/vulkan/runtime/graph/ops/glsl/linear_qga4w_tiled.yaml index 865a46629df..8475c7d48a3 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/q_4w_linear_tiled.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/linear_qga4w_tiled.yaml @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -q_4w_linear_tiled: +linear_qga4w_tiled: parameter_names_with_default_values: DTYPE: float OUT_STORAGE: texture3d @@ -13,11 +13,11 @@ q_4w_linear_tiled: PARAMS_STORAGE: buffer TILE_ROWS: 3 shader_variants: - - NAME: q_4w_linear_tiled_texture3d_texture3d_texture2d_float - - NAME: q_4w_linear_tiled_buffer_buffer_texture2d_float + - NAME: linear_qga4w_tiled_texture3d_texture3d_texture2d_float + - NAME: linear_qga4w_tiled_buffer_buffer_texture2d_float OUT_STORAGE: buffer IN_STORAGE: buffer - - NAME: q_4w_linear_tiled_buffer_buffer_buffer_float + - NAME: linear_qga4w_tiled_buffer_buffer_buffer_float OUT_STORAGE: buffer IN_STORAGE: buffer WEIGHT_STORAGE: buffer diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearInt8.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQCSNW.cpp similarity index 95% rename from backends/vulkan/runtime/graph/ops/impl/QuantizedLinearInt8.cpp rename to backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQCSNW.cpp index d7156ebef90..7269b75ae6e 100644 --- a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearInt8.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQCSNW.cpp @@ -15,7 +15,7 @@ namespace vkcompute { -void check_q_8w_linear_args( +void check_linear_qcsnw_args( const ComputeGraph& graph, const ValueRef mat1, const ValueRef qmat2_data, @@ -37,7 +37,7 @@ void check_q_8w_linear_args( utils::val_at(-1, scales_sizes) == utils::val_at(-2, qmat2_sizes)); } -void resize_q_8w_linear_node( +void resize_linear_qcs8w_node( ComputeGraph* graph, const std::vector& args, const std::vector& extra_args) { @@ -64,7 +64,7 @@ void resize_q_8w_linear_node( out->virtual_resize(new_out_sizes); } -void add_q_8w_linear_node( +void add_linear_qcs8w_node( ComputeGraph& graph, const ValueRef mat1, const ValueRef q_mat2_data, @@ -91,7 +91,7 @@ void add_q_8w_linear_node( ValueRef scales = prepack_standard( graph, scales_data, graph.storage_type_of(out), utils::kWidthPacked); - std::string kernel_name = "q_8w_linear"; + std::string kernel_name = "linear_qcs8w"; kernel_name.reserve(kShaderNameReserve); add_packed_dim_suffix(kernel_name, graph.packed_dim_of(mat1_W_packed)); add_packed_dim_suffix(kernel_name, graph.packed_dim_of(q_mat2)); @@ -131,7 +131,7 @@ void add_q_8w_linear_node( // Specialization Constants {}, // Resizing Logic - resize_q_8w_linear_node, + resize_linear_qcs8w_node, {}, pcs)); if (!graph.is_buffer_storage(out) && @@ -140,7 +140,7 @@ void add_q_8w_linear_node( } } -void add_q_8w_linear_tiled_node( +void add_linear_qcs8w_tiled_node( ComputeGraph& graph, const bool use_coop_algorithm, const ValueRef mat1, @@ -170,7 +170,7 @@ void add_q_8w_linear_tiled_node( prepack_standard(graph, scales_data, scales_storage, utils::kWidthPacked); std::string kernel_name = - use_coop_algorithm ? "q_8w_linear_coop" : "q_8w_linear_tiled"; + use_coop_algorithm ? "linear_qcs8w_coop" : "linear_qcs8w_tiled"; kernel_name.reserve(kShaderNameReserve); add_storage_type_suffix(kernel_name, graph.storage_type_of(out)); add_storage_type_suffix(kernel_name, graph.storage_type_of(mat1)); @@ -218,7 +218,7 @@ void add_q_8w_linear_tiled_node( // Specialization Constants {}, // Resizing Logic - resize_q_8w_linear_node, + resize_linear_qcs8w_node, {}, // Push Constants {{graph.sizes_pc_of(out), graph.sizes_pc_of(mat1)}})); @@ -280,13 +280,13 @@ bool can_use_coop_impl(ComputeGraph& graph, const ValueRef mat1) { void weight_int8pack_mm( ComputeGraph& graph, const std::vector& args) { - check_q_8w_linear_args(graph, args[0], args[1], args[2], args[3]); + check_linear_qcsnw_args(graph, args[0], args[1], args[2], args[3]); if (can_use_tiled_impl(graph, args[0], args[1], args[2], args[3])) { bool use_coop_algorithm = can_use_coop_impl(graph, args[0]); - return add_q_8w_linear_tiled_node( + return add_linear_qcs8w_tiled_node( graph, use_coop_algorithm, args[0], args[1], args[2], args[3]); } - return add_q_8w_linear_node(graph, args[0], args[1], args[2], args[3]); + return add_linear_qcs8w_node(graph, args[0], args[1], args[2], args[3]); } REGISTER_OPERATORS { diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearGroupwiseInt4.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQGANW.cpp similarity index 96% rename from backends/vulkan/runtime/graph/ops/impl/QuantizedLinearGroupwiseInt4.cpp rename to backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQGANW.cpp index 4b33dd9b806..ec718bea7da 100644 --- a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearGroupwiseInt4.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQGANW.cpp @@ -15,7 +15,7 @@ namespace vkcompute { -void check_q_4w_linear_args( +void check_linear_qga4w_args( ComputeGraph& graph, const ValueRef mat1, const ValueRef mat2_data, @@ -43,7 +43,7 @@ void check_q_4w_linear_args( VK_CHECK_COND(graph.has_standard_axis_map(out)); } -void resize_q_4w_linear_node( +void resize_linear_qga4w_node( ComputeGraph* graph, const std::vector& args, const std::vector& extra_args) { @@ -118,14 +118,14 @@ ValueRef prepack_int4_linear_weight_transposed_interleaved( return qmat2; } -void add_q_4w_linear_node( +void add_linear_qga4w_node( ComputeGraph& graph, const ValueRef mat1, const ValueRef mat2_data, const ValueRef group_size, const ValueRef scales_and_zeros_data, const ValueRef out) { - check_q_4w_linear_args( + check_linear_qga4w_args( graph, mat1, mat2_data, group_size, scales_and_zeros_data, out); const uint32_t group_size_val = graph.extract_scalar(group_size); @@ -143,7 +143,7 @@ void add_q_4w_linear_node( ValueRef scales_and_zeros = prepack_standard_hw_transposed( graph, scales_and_zeros_data, utils::kBuffer, utils::kWidthPacked); - std::string kernel_name = "q_4w_linear"; + std::string kernel_name = "linear_qga4w"; if (use_coop_algorithm) { kernel_name += "_coop"; } else { @@ -176,7 +176,7 @@ void add_q_4w_linear_node( // Specialization Constants {SV(group_size_val)}, // Resizing Logic - resize_q_4w_linear_node, + resize_linear_qga4w_node, {}, // Push Constants {graph.sizes_pc_of(out), @@ -187,7 +187,7 @@ void add_q_4w_linear_node( void linear_weight_int4( ComputeGraph& graph, const std::vector& args) { - return add_q_4w_linear_node( + return add_linear_qga4w_node( graph, args[0], // mat1 args[1], // mat2 diff --git a/backends/vulkan/test/op_tests/linear_weight_int4_test.cpp b/backends/vulkan/test/op_tests/linear_weight_int4_test.cpp index e617f5b5249..5d08ee57859 100644 --- a/backends/vulkan/test/op_tests/linear_weight_int4_test.cpp +++ b/backends/vulkan/test/op_tests/linear_weight_int4_test.cpp @@ -20,7 +20,7 @@ // Reference Implementations // -at::Tensor linear_weight_int4_reference_impl( +at::Tensor linear_qga4w_reference_impl( const at::Tensor& x, const at::Tensor& weights_4x2, const int64_t groupsize, @@ -101,7 +101,7 @@ at::Tensor dequantize_and_linear( // Test functions // -void test_reference_linear_int4( +void test_reference_linear_qga4w( const int B, const int M, const int K, @@ -119,7 +119,7 @@ void test_reference_linear_int4( at::Tensor scales_and_zeros = at::rand({k_groups, N, 2}, at::device(at::kCPU).dtype(at::kFloat)); - at::Tensor out = linear_weight_int4_reference_impl( + at::Tensor out = linear_qga4w_reference_impl( x, at::_convert_weight_to_int4pack_for_cpu(weights_int, group_size), group_size, @@ -152,7 +152,7 @@ vkcompute::vkapi::ScalarType from_at_scalartype(c10::ScalarType at_scalartype) { } } -void test_vulkan_linear_int4_impl( +void test_vulkan_linear_qga4w_impl( const int B, const int M, const int K, @@ -174,7 +174,7 @@ void test_vulkan_linear_int4_impl( at::rand({k_groups, N, 2}, at::device(at::kCPU).dtype(at::kFloat)); at::Tensor weights_int = unpack_weights_4x2(weights_4x2); - at::Tensor out_ref = linear_weight_int4_reference_impl( + at::Tensor out_ref = linear_qga4w_reference_impl( x, at::_convert_weight_to_int4pack_for_cpu(weights_int, group_size), group_size, @@ -237,14 +237,14 @@ void test_vulkan_linear_int4_impl( ASSERT_TRUE(at::allclose(vk_out, out_ref, 1e-4, 1e-4)); } -void test_vulkan_linear_int4( +void test_vulkan_linear_qga4w( const int B, const int M, const int K, const int N, const int group_size = 32, const int inner_k_tiles = 8) { - test_vulkan_linear_int4_impl( + test_vulkan_linear_qga4w_impl( B, M, K, @@ -254,7 +254,7 @@ void test_vulkan_linear_int4( vkcompute::utils::kBuffer, vkcompute::utils::kBuffer); - test_vulkan_linear_int4_impl( + test_vulkan_linear_qga4w_impl( B, M, K, @@ -265,30 +265,30 @@ void test_vulkan_linear_int4( vkcompute::utils::kTexture3D); } -TEST(VulkanInt4LinearTest, test_reference_impl) { - test_reference_linear_int4( +TEST(VulkanLinearQGA4WTest, test_reference_impl) { + test_reference_linear_qga4w( /*B = */ 1, /*M = */ 4, /*K = */ 128, /*N = */ 32); } -TEST(VulkanInt4LinearTest, test_vulkan_impl_small_m) { - test_vulkan_linear_int4( +TEST(VulkanLinearQGA4WTest, test_vulkan_impl_small_m) { + test_vulkan_linear_qga4w( /*B = */ 1, /*M = */ 4, /*K = */ 128, /*N = */ 32); - test_vulkan_linear_int4( + test_vulkan_linear_qga4w( /*B = */ 1, /*M = */ 1, /*K = */ 256, /*N = */ 256); } -TEST(VulkanInt4LinearTest, test_vulkan_impl_gemm) { - test_vulkan_linear_int4( +TEST(VulkanLinearQGA4WTest, test_vulkan_impl_gemm) { + test_vulkan_linear_qga4w( /*B = */ 1, /*M = */ 256, /*K = */ 256,