diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index 2ed7d3810b918..f946bb731e2ca 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -804,8 +804,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [ Optional:$clusterSizeY, Optional:$clusterSizeZ, Optional:$dynamicSharedMemorySize, - OptionalAttr:$kernelFunc, - OptionalAttr:$kernelModule)>, + OptionalAttr:$module, + OptionalAttr:$function)>, Results<(outs Optional:$asyncToken)> { let summary = "GPU kernel launch operation"; @@ -839,7 +839,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [ - a variadic number of Workgroup memory attributions. - a variadic number of Private memory attributions. - The `kernelFunc` and `kernelModule` attributes are optional and specifies + The `function` and `module` attributes are optional and specifies the kernel name and a module in which the kernel should be outlined. Syntax: @@ -850,6 +850,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [ `blocks` `(` ssa-id-list `)` `in` ssa-reassignment `threads` `(` ssa-id-list `)` `in` ssa-reassignment (dynamic_shared_memory_size ssa-use)? + (`module(` symbol-ref-id `)`)? + (`function(` symbol-ref-id `)`)? memory-attribution region attr-dict? ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)` @@ -907,6 +909,14 @@ def GPU_LaunchOp : GPU_Op<"launch", [ // sizes are immediately usable inside body region. "some_op"(%cx, %bx, %tx) : (index, index, index) -> () } + + // Launch with module and function attributes. + gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %0, %sz_by = %1, %sz_bz = %2) + threads(%tx, %ty, %tz) in (%sz_tx = %3, %sz_ty = %4, %sz_tz = %5) + module(@kernel_module) function(@kernel_func) { + "some_op"(%bx, %tx) : (index, index) -> () + %42 = load %val1[%bx] : memref + } ``` Rationale: using operation/block arguments gives analyses a clear way of @@ -931,7 +941,9 @@ def GPU_LaunchOp : GPU_Op<"launch", [ CArg<"TypeRange", "{}">:$privateAttributions, CArg<"Value", "nullptr">:$clusterSizeX, CArg<"Value", "nullptr">:$clusterSizeY, - CArg<"Value", "nullptr">:$clusterSizeZ)> + CArg<"Value", "nullptr">:$clusterSizeZ, + CArg<"FlatSymbolRefAttr", "nullptr">:$module, + CArg<"FlatSymbolRefAttr", "nullptr">:$function)>, ]; let extraClassDeclaration = [{ diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 5a72ef17db7f0..2503ccb6a2cfe 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -756,7 +756,8 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result, Type asyncTokenType, ValueRange asyncDependencies, TypeRange workgroupAttributions, TypeRange privateAttributions, Value clusterSizeX, - Value clusterSizeY, Value clusterSizeZ) { + Value clusterSizeY, Value clusterSizeZ, + FlatSymbolRefAttr module, FlatSymbolRefAttr function) { OpBuilder::InsertionGuard g(builder); // Add a WorkGroup attribution attribute. This attribute is required to @@ -781,6 +782,12 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result, if (dynamicSharedMemorySize) result.addOperands(dynamicSharedMemorySize); + // Add optional module and function attributes. + if (module) + result.addAttribute(getModuleAttrName(result.name), module); + if (function) + result.addAttribute(getFunctionAttrName(result.name), function); + // Create a kernel body region with kNumConfigRegionAttributes + N memory // attributions, where the first kNumConfigRegionAttributes arguments have // `index` type and the rest have the same types as the data operands. @@ -944,6 +951,21 @@ void LaunchOp::print(OpAsmPrinter &p) { p << ' ' << getDynamicSharedMemorySizeKeyword() << ' ' << getDynamicSharedMemorySize(); + // Print optional module attribute. + StringRef moduleAttrName = getModuleAttrName(); + if (auto module = getModule()) { + p << ' ' << moduleAttrName << '('; + p.printSymbolName(*module); + p << ')'; + } + // Print optional function attribute. + StringRef functionAttrName = getFunctionAttrName(); + if (auto function = getFunction()) { + p << ' ' << functionAttrName << '('; + p.printSymbolName(*function); + p << ')'; + } + printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions()); printAttributions(p, getPrivateKeyword(), getPrivateAttributions()); @@ -952,7 +974,8 @@ void LaunchOp::print(OpAsmPrinter &p) { p.printRegion(getBody(), /*printEntryBlockArgs=*/false); p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{ LaunchOp::getOperandSegmentSizeAttr(), - getNumWorkgroupAttributionsAttrName()}); + getNumWorkgroupAttributionsAttrName(), + moduleAttrName, functionAttrName}); } // Parse the size assignment blocks for blocks and threads. These have the form @@ -990,6 +1013,9 @@ parseSizeAssignment(OpAsmParser &parser, /// `clusters` `(` ssa-id-list `)` `in` ssa-reassignment (Optional) /// `blocks` `(` ssa-id-list `)` `in` ssa-reassignment /// `threads` `(` ssa-id-list `)` `in` ssa-reassignment +/// (`dynamic_shared_memory_size` ssa-use)? +/// (`module(` symbol-ref-id `)`)? +/// (`function(` symbol-ref-id `)`)? /// memory-attribution /// region attr-dict? /// ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)` @@ -1060,6 +1086,27 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) { return failure(); } + // Parse optional module attribute. + StringRef moduleAttrName = getModuleAttrName(result.name); + if (succeeded(parser.parseOptionalKeyword(moduleAttrName))) { + FlatSymbolRefAttr moduleSymbol; + if (parser.parseLParen() || + parser.parseAttribute(moduleSymbol, Type(), moduleAttrName, + result.attributes) || + parser.parseRParen()) + return failure(); + } + // Parse optional function attribute. + StringRef functionAttrName = getFunctionAttrName(result.name); + if (succeeded(parser.parseOptionalKeyword(functionAttrName))) { + FlatSymbolRefAttr funcSymbol; + if (parser.parseLParen() || + parser.parseAttribute(funcSymbol, Type(), functionAttrName, + result.attributes) || + parser.parseRParen()) + return failure(); + } + // Create the region arguments, it has kNumConfigRegionAttributes arguments // that correspond to block/thread identifiers and grid/block sizes, all // having `index` type, a variadic number of WorkGroup Attributions and diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index 99f5c5b0cf139..d4978ca768747 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -356,8 +356,8 @@ class GpuKernelOutliningPass auto funcWalkResult = func.walk([&](gpu::LaunchOp op) { SetVector operands; std::string kernelFnName; - if (op.getKernelFunc()) { - kernelFnName = op.getKernelFunc()->getRootReference().str(); + if (op.getFunction()) { + kernelFnName = op.getFunction()->str(); } else { kernelFnName = Twine(op->getParentOfType().getName(), @@ -403,9 +403,8 @@ class GpuKernelOutliningPass OpBuilder builder(context); std::string kernelModuleName; gpu::GPUModuleOp kernelModule; - if (gpuLaunchOp.getKernelModule()) { - kernelModuleName = - gpuLaunchOp.getKernelModule()->getRootReference().str(); + if (gpuLaunchOp.getModule()) { + kernelModuleName = gpuLaunchOp.getModule()->str(); kernelModule = parentSymbolTable.lookup(kernelModuleName); } else { diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir index ee1fdfa4d02f0..9cc0bf8f41d5a 100644 --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -17,6 +17,18 @@ module attributes {gpu.container_module} { return } + // CHECK-LABEL:func @launch_with_module_func_attr(%{{.*}}: index) + func.func @launch_with_module_func_attr(%sz : index) { + // CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) module(@test_module) function(@test_kernel_func) + gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz) + threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) + module(@test_module) function(@test_kernel_func) { + // CHECK: gpu.terminator + gpu.terminator + } + return + } + // CHECK-LABEL:func @args(%{{.*}}: index, %{{.*}}: index, %{{.*}}: f32, %{{.*}}: memref) { func.func @args(%blk : index, %thrd : index, %float : f32, %data : memref) { // CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir index d48fa054432d1..04901182a80f5 100644 --- a/mlir/test/Dialect/GPU/outlining.mlir +++ b/mlir/test/Dialect/GPU/outlining.mlir @@ -509,7 +509,7 @@ func.func @launch_cluster() { // CHECK-NEXT: = memref.load %[[KERNEL_ARG1]][%[[TID]]] : memref // ----- -// This test tests the two optional attributes kernelModule and kernelFunc for gpu.launch +// This test tests the two optional attributes `module` and `function` for gpu.launch // CHECK-LABEL: func.func @testKernelAttributes() // CHECK: gpu.launch_func @test_module::@test_kernel_func blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]]) // CHECK: gpu.module @test_module @@ -523,15 +523,16 @@ func.func @testKernelAttributes() { %bDimZ = arith.constant 8 : index gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ) - threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) { + threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) + module(@test_module) function(@test_kernel_func) { "some_op"(%bx, %tx) : (index, index) -> () gpu.terminator - } {kernelModule = @test_module, kernelFunc = @test_kernel_func} + } return } // ----- -// This test tests the two optional attributes kernelModule and kernelFunc for gpu.launch, when kernelModule already exists. +// This test tests the two optional attributes `module` and `function` for gpu.launch, when kernelModule already exists. // CHECK-LABEL: gpu.module @existing_module // CHECK: gpu.func @test_kernel_func() @@ -556,15 +557,16 @@ func.func @testExistingModule() { %bDimZ = arith.constant 8 : index gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ) - threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) { + threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) + module(@existing_module) function(@test_kernel_func) { "some_op"(%bx, %tx) : (index, index) -> () gpu.terminator - } {kernelModule = @existing_module, kernelFunc = @test_kernel_func} + } return } // ----- -// This test tests the optional attribute kernelModule for gpu.launch. +// This test tests the optional attribute `module` for gpu.launch. // CHECK-LABEL: func.func @testKernelModuleOnly() // CHECK: gpu.launch_func @test_module::@testKernelModuleOnly_kernel blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]]) // CHECK: gpu.module @test_module @@ -578,15 +580,16 @@ func.func @testKernelModuleOnly() { %bDimZ = arith.constant 8 : index gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ) - threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) { + threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) + module(@test_module) { "some_op"(%bx, %tx) : (index, index) -> () gpu.terminator - } {kernelModule = @test_module} + } return } // ----- -// This test tests the optional attribute kernelFunc for gpu.launch. +// This test tests the optional attribute `function` for gpu.launch. // CHECK-LABEL: func.func @testKernelFuncOnly() // CHECK: gpu.launch_func @test_kernel_func::@test_kernel_func blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]]) @@ -601,15 +604,16 @@ func.func @testKernelFuncOnly() { %bDimZ = arith.constant 8 : index gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ) - threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) { + threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) + function(@test_kernel_func) { "some_op"(%bx, %tx) : (index, index) -> () gpu.terminator - } {kernelFunc = @test_kernel_func} + } return } // ----- -// This test tests gpu.launch when optional attributes kernelModule and kernelFunc are not specified. +// This test tests gpu.launch when optional attributes `module` and `function` are not specified. // CHECK-LABEL: func.func @testNoAttributes() // CHECK: gpu.launch_func @testNoAttributes_kernel::@testNoAttributes_kernel blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])