Skip to content

[mlir][gpu] Update attribute definitions in gpu::LaunchOp #152106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -804,8 +804,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
Optional<Index>:$clusterSizeY,
Optional<Index>:$clusterSizeZ,
Optional<I32>:$dynamicSharedMemorySize,
OptionalAttr<SymbolRefAttr>:$kernelFunc,
OptionalAttr<SymbolRefAttr>:$kernelModule)>,
OptionalAttr<FlatSymbolRefAttr>:$module,
OptionalAttr<FlatSymbolRefAttr>:$function)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";

Expand Down Expand Up @@ -839,7 +839,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [
- a variadic number of Workgroup memory attributions.
- a variadic number of Private memory attributions.

The `kernelFunc` and `kernelModule` attributes are optional and specifies
The `function` and `module` attributes are optional and specifies
the kernel name and a module in which the kernel should be outlined.

Syntax:
Expand All @@ -850,6 +850,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
`blocks` `(` ssa-id-list `)` `in` ssa-reassignment
`threads` `(` ssa-id-list `)` `in` ssa-reassignment
(dynamic_shared_memory_size ssa-use)?
(`module(` symbol-ref-id `)`)?
(`function(` symbol-ref-id `)`)?
memory-attribution
region attr-dict?
ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
Expand Down Expand Up @@ -907,6 +909,14 @@ def GPU_LaunchOp : GPU_Op<"launch", [
// sizes are immediately usable inside body region.
"some_op"(%cx, %bx, %tx) : (index, index, index) -> ()
}

// Launch with module and function attributes.
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %0, %sz_by = %1, %sz_bz = %2)
threads(%tx, %ty, %tz) in (%sz_tx = %3, %sz_ty = %4, %sz_tz = %5)
module(@kernel_module) function(@kernel_func) {
"some_op"(%bx, %tx) : (index, index) -> ()
%42 = load %val1[%bx] : memref<?xf32, 1>
}
```

Rationale: using operation/block arguments gives analyses a clear way of
Expand All @@ -931,7 +941,9 @@ def GPU_LaunchOp : GPU_Op<"launch", [
CArg<"TypeRange", "{}">:$privateAttributions,
CArg<"Value", "nullptr">:$clusterSizeX,
CArg<"Value", "nullptr">:$clusterSizeY,
CArg<"Value", "nullptr">:$clusterSizeZ)>
CArg<"Value", "nullptr">:$clusterSizeZ,
CArg<"FlatSymbolRefAttr", "nullptr">:$module,
CArg<"FlatSymbolRefAttr", "nullptr">:$function)>,
];

let extraClassDeclaration = [{
Expand Down
51 changes: 49 additions & 2 deletions mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,8 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
Type asyncTokenType, ValueRange asyncDependencies,
TypeRange workgroupAttributions,
TypeRange privateAttributions, Value clusterSizeX,
Value clusterSizeY, Value clusterSizeZ) {
Value clusterSizeY, Value clusterSizeZ,
FlatSymbolRefAttr module, FlatSymbolRefAttr function) {
OpBuilder::InsertionGuard g(builder);

// Add a WorkGroup attribution attribute. This attribute is required to
Expand All @@ -781,6 +782,12 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
if (dynamicSharedMemorySize)
result.addOperands(dynamicSharedMemorySize);

// Add optional module and function attributes.
if (module)
result.addAttribute(getModuleAttrName(result.name), module);
if (function)
result.addAttribute(getFunctionAttrName(result.name), function);

// Create a kernel body region with kNumConfigRegionAttributes + N memory
// attributions, where the first kNumConfigRegionAttributes arguments have
// `index` type and the rest have the same types as the data operands.
Expand Down Expand Up @@ -944,6 +951,21 @@ void LaunchOp::print(OpAsmPrinter &p) {
p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
<< getDynamicSharedMemorySize();

// Print optional module attribute.
StringRef moduleAttrName = getModuleAttrName();
if (auto module = getModule()) {
p << ' ' << moduleAttrName << '(';
p.printSymbolName(*module);
p << ')';
}
// Print optional function attribute.
StringRef functionAttrName = getFunctionAttrName();
if (auto function = getFunction()) {
p << ' ' << functionAttrName << '(';
p.printSymbolName(*function);
p << ')';
}

printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
printAttributions(p, getPrivateKeyword(), getPrivateAttributions());

Expand All @@ -952,7 +974,8 @@ void LaunchOp::print(OpAsmPrinter &p) {
p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{
LaunchOp::getOperandSegmentSizeAttr(),
getNumWorkgroupAttributionsAttrName()});
getNumWorkgroupAttributionsAttrName(),
moduleAttrName, functionAttrName});
}

// Parse the size assignment blocks for blocks and threads. These have the form
Expand Down Expand Up @@ -990,6 +1013,9 @@ parseSizeAssignment(OpAsmParser &parser,
/// `clusters` `(` ssa-id-list `)` `in` ssa-reassignment (Optional)
/// `blocks` `(` ssa-id-list `)` `in` ssa-reassignment
/// `threads` `(` ssa-id-list `)` `in` ssa-reassignment
/// (`dynamic_shared_memory_size` ssa-use)?
/// (`module(` symbol-ref-id `)`)?
/// (`function(` symbol-ref-id `)`)?
/// memory-attribution
/// region attr-dict?
/// ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
Expand Down Expand Up @@ -1060,6 +1086,27 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
return failure();
}

// Parse optional module attribute.
StringRef moduleAttrName = getModuleAttrName(result.name);
if (succeeded(parser.parseOptionalKeyword(moduleAttrName))) {
FlatSymbolRefAttr moduleSymbol;
if (parser.parseLParen() ||
parser.parseAttribute(moduleSymbol, Type(), moduleAttrName,
result.attributes) ||
parser.parseRParen())
return failure();
}
// Parse optional function attribute.
StringRef functionAttrName = getFunctionAttrName(result.name);
if (succeeded(parser.parseOptionalKeyword(functionAttrName))) {
FlatSymbolRefAttr funcSymbol;
if (parser.parseLParen() ||
parser.parseAttribute(funcSymbol, Type(), functionAttrName,
result.attributes) ||
parser.parseRParen())
return failure();
}

// Create the region arguments, it has kNumConfigRegionAttributes arguments
// that correspond to block/thread identifiers and grid/block sizes, all
// having `index` type, a variadic number of WorkGroup Attributions and
Expand Down
9 changes: 4 additions & 5 deletions mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,8 @@ class GpuKernelOutliningPass
auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
SetVector<Value> operands;
std::string kernelFnName;
if (op.getKernelFunc()) {
kernelFnName = op.getKernelFunc()->getRootReference().str();
if (op.getFunction()) {
kernelFnName = op.getFunction()->str();
} else {
kernelFnName =
Twine(op->getParentOfType<SymbolOpInterface>().getName(),
Expand Down Expand Up @@ -403,9 +403,8 @@ class GpuKernelOutliningPass
OpBuilder builder(context);
std::string kernelModuleName;
gpu::GPUModuleOp kernelModule;
if (gpuLaunchOp.getKernelModule()) {
kernelModuleName =
gpuLaunchOp.getKernelModule()->getRootReference().str();
if (gpuLaunchOp.getModule()) {
kernelModuleName = gpuLaunchOp.getModule()->str();
kernelModule =
parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName);
} else {
Expand Down
12 changes: 12 additions & 0 deletions mlir/test/Dialect/GPU/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ module attributes {gpu.container_module} {
return
}

// CHECK-LABEL:func @launch_with_module_func_attr(%{{.*}}: index)
func.func @launch_with_module_func_attr(%sz : index) {
// CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) module(@test_module) function(@test_kernel_func)
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz)
module(@test_module) function(@test_kernel_func) {
// CHECK: gpu.terminator
gpu.terminator
}
return
}

// CHECK-LABEL:func @args(%{{.*}}: index, %{{.*}}: index, %{{.*}}: f32, %{{.*}}: memref<?xf32, 1>) {
func.func @args(%blk : index, %thrd : index, %float : f32, %data : memref<?xf32,1>) {
// CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}})
Expand Down
30 changes: 17 additions & 13 deletions mlir/test/Dialect/GPU/outlining.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ func.func @launch_cluster() {
// CHECK-NEXT: = memref.load %[[KERNEL_ARG1]][%[[TID]]] : memref<?xf32, 1>

// -----
// This test tests the two optional attributes kernelModule and kernelFunc for gpu.launch
// This test tests the two optional attributes `module` and `function` for gpu.launch
// CHECK-LABEL: func.func @testKernelAttributes()
// CHECK: gpu.launch_func @test_module::@test_kernel_func blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
// CHECK: gpu.module @test_module
Expand All @@ -523,15 +523,16 @@ func.func @testKernelAttributes() {
%bDimZ = arith.constant 8 : index

gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ)
module(@test_module) function(@test_kernel_func) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
} {kernelModule = @test_module, kernelFunc = @test_kernel_func}
}
return
}

// -----
// This test tests the two optional attributes kernelModule and kernelFunc for gpu.launch, when kernelModule already exists.
// This test tests the two optional attributes `module` and `function` for gpu.launch, when kernelModule already exists.

// CHECK-LABEL: gpu.module @existing_module
// CHECK: gpu.func @test_kernel_func()
Expand All @@ -556,15 +557,16 @@ func.func @testExistingModule() {
%bDimZ = arith.constant 8 : index

gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ)
module(@existing_module) function(@test_kernel_func) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
} {kernelModule = @existing_module, kernelFunc = @test_kernel_func}
}
return
}

// -----
// This test tests the optional attribute kernelModule for gpu.launch.
// This test tests the optional attribute `module` for gpu.launch.
// CHECK-LABEL: func.func @testKernelModuleOnly()
// CHECK: gpu.launch_func @test_module::@testKernelModuleOnly_kernel blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
// CHECK: gpu.module @test_module
Expand All @@ -578,15 +580,16 @@ func.func @testKernelModuleOnly() {
%bDimZ = arith.constant 8 : index

gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ)
module(@test_module) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
} {kernelModule = @test_module}
}
return
}

// -----
// This test tests the optional attribute kernelFunc for gpu.launch.
// This test tests the optional attribute `function` for gpu.launch.
// CHECK-LABEL: func.func @testKernelFuncOnly()
// CHECK: gpu.launch_func @test_kernel_func::@test_kernel_func blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])

Expand All @@ -601,15 +604,16 @@ func.func @testKernelFuncOnly() {
%bDimZ = arith.constant 8 : index

gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ)
function(@test_kernel_func) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
} {kernelFunc = @test_kernel_func}
}
return
}

// -----
// This test tests gpu.launch when optional attributes kernelModule and kernelFunc are not specified.
// This test tests gpu.launch when optional attributes `module` and `function` are not specified.
// CHECK-LABEL: func.func @testNoAttributes()
// CHECK: gpu.launch_func @testNoAttributes_kernel::@testNoAttributes_kernel blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])

Expand Down