@@ -134,10 +134,10 @@ void dispatchMoeGemmFinalDispatchTmaWarpSpecialized(TmaWarpSpecializedGroupedGem
134
134
if constexpr (Arch::kMinComputeCapability >= 100 && Arch::kMinComputeCapability < 120 )
135
135
{
136
136
bool const dynamic_cga = gemm_config.dynamic_cluster_shape != cutlass_extensions::ClusterShape::Undefined;
137
- auto cluster_shape = enum_to_shape_tuple (gemm_config.dynamic_cluster_shape );
137
+ auto cluster_shape = cutlass_extensions:: enum_to_shape_tuple (gemm_config.dynamic_cluster_shape );
138
138
auto cluster_shape_cute = cute::Shape<int32_t , int32_t , cute::_1>{
139
139
std::get<0 >(cluster_shape), std::get<1 >(cluster_shape), cute::_1{}};
140
- auto cluster_shape_fallback = enum_to_shape_tuple (gemm_config.fallback_cluster_shape );
140
+ auto cluster_shape_fallback = cutlass_extensions:: enum_to_shape_tuple (gemm_config.fallback_cluster_shape );
141
141
auto cluster_shape_cute_fallback = cute::Shape<int32_t , int32_t , cute::_1>{
142
142
std::get<0 >(cluster_shape_fallback), std::get<1 >(cluster_shape_fallback), cute::_1{}};
143
143
if constexpr (!std::is_same_v<T, __nv_fp4_e2m1> && !std::is_same_v<WeightType, __nv_fp4_e2m1>)
@@ -161,8 +161,8 @@ void dispatchMoeGemmFinalDispatchTmaWarpSpecialized(TmaWarpSpecializedGroupedGem
161
161
}};
162
162
bool const tma_epilogue
163
163
= gemm_config.epilogue_schedule == cutlass_extensions::EpilogueScheduleType::TMA;
164
- return func_map[tma_epilogue][dynamic_cga](hopper_input, num_experts, multi_processor_count, stream,
165
- occupancy, workspace_size, cluster_shape_cute, cluster_shape_cute_fallback);
164
+ func_map[tma_epilogue][dynamic_cga](hopper_input, num_experts, multi_processor_count, stream, occupancy ,
165
+ workspace_size, cluster_shape_cute, cluster_shape_cute_fallback);
166
166
}
167
167
else
168
168
{
0 commit comments