[MLIR] Pass count of parameters & gpu binary size to runtime wrappers #66154

nbpatel · 2023-09-12T22:42:05Z

This PR is a breakdown of the big PR #65539 which enables intel gpu integration. In this PR we pass count of parameters and size of gpu binary to runtime wrappers since the SyclRuntimeWrappers (which will come in subsequent PR) requires the spirv size for compilation and also the number of parameters to iterate over the params.

llvmbot · 2023-09-12T22:43:07Z

@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-execution-engine

Changes

This PR is a breakdown of the big PR #65539 which enables intel gpu integration. In this PR we pass count of parameters and size of gpu binary to runtime wrappers since the SyclRuntimeWrappers (which will come in subsequent PR) requires the spirv size for compilation and also the number of parameters to iterate over the params. -- Full diff: https://github.com//pull/66154.diff

4 Files Affected:

(modified) mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp (+21-4)
(modified) mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp (+3-2)
(modified) mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp (+2-2)
(modified) mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir (+4-3)

diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
index eddf3e9a47d0bc8..d710f7652b507cb 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
@@ -101,7 +101,7 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
   FunctionCallBuilder moduleLoadCallBuilder = {
       "mgpuModuleLoad",
       llvmPointerType /* void *module */,
-      {llvmPointerType /* void *cubin */}};
+      {llvmPointerType /* void *cubin */, llvmInt64Type /* size_t size */}};
   FunctionCallBuilder moduleUnloadCallBuilder = {
       "mgpuModuleUnload", llvmVoidType, {llvmPointerType /* void *module */}};
   FunctionCallBuilder moduleGetFunctionCallBuilder = {
@@ -125,7 +125,8 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
           llvmInt32Type,          /* unsigned int sharedMemBytes */
           llvmPointerType,        /* void *hstream */
           llvmPointerPointerType, /* void **kernelParams */
-          llvmPointerPointerType  /* void **extra */
+          llvmPointerPointerType, /* void **extra */
+          llvmInt64Type           /* size_t paramsCount */
       }};
   FunctionCallBuilder streamCreateCallBuilder = {
       "mgpuStreamCreate", llvmPointerType /* void *stream */, {}};
@@ -1134,7 +1135,23 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite(
       loc, rewriter, nameBuffer.str(), binaryAttr.getValue(),
       LLVM::Linkage::Internal, getTypeConverter()->useOpaquePointers());
 
-  auto module = moduleLoadCallBuilder.create(loc, rewriter, data);
+  // Pass the binary size. SPIRV requires binary size.
+  auto gpuBlob = binaryAttr.getValue();
+  auto gpuBlobSize = rewriter.create<mlir::LLVM::ConstantOp>(
+      loc, llvmInt64Type,
+      mlir::IntegerAttr::get(llvmInt64Type,
+                             static_cast<int64_t>(gpuBlob.size())));
+
+  auto module =
+      moduleLoadCallBuilder.create(loc, rewriter, {data, gpuBlobSize});
+
+  // Pass the count of the parameters to runtime wrappers
+  auto paramsCount = rewriter.create<mlir::LLVM::ConstantOp>(
+      loc, llvmInt64Type,
+      mlir::IntegerAttr::get(
+          llvmInt64Type,
+          static_cast<int64_t>(launchOp.getNumKernelOperands())));
+
   // Get the function from the module. The name corresponds to the name of
   // the kernel function.
   auto kernelName = generateKernelNameConstant(
@@ -1158,7 +1175,7 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite(
       {function.getResult(), adaptor.getGridSizeX(), adaptor.getGridSizeY(),
        adaptor.getGridSizeZ(), adaptor.getBlockSizeX(), adaptor.getBlockSizeY(),
        adaptor.getBlockSizeZ(), dynamicSharedMemorySize, stream, kernelParams,
-       /*extra=*/nullpointer});
+       /*extra=*/nullpointer, paramsCount});
 
   if (launchOp.getAsyncToken()) {
     // Async launch: make dependent ops use the same stream.
diff --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
index 1dba677ebe66365..8a53d99c778a63a 100644
--- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
@@ -119,7 +119,8 @@ static bool cusparseLt_initiated = false;
 #endif // MLIR_ENABLE_CUDA_CUSPARSELT
 #endif // MLIR_ENABLE_CUDA_CUSPARSE
 
-extern "C" MLIR_CUDA_WRAPPERS_EXPORT CUmodule mgpuModuleLoad(void *data) {
+extern "C" MLIR_CUDA_WRAPPERS_EXPORT CUmodule
+mgpuModuleLoad(void *data, size_t /*gpuBlobSize*/) {
   ScopedContext scopedContext;
   CUmodule module = nullptr;
   CUDA_REPORT_IF_ERROR(cuModuleLoadData(&module, data));
@@ -144,7 +145,7 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT void
 mgpuLaunchKernel(CUfunction function, intptr_t gridX, intptr_t gridY,
                  intptr_t gridZ, intptr_t blockX, intptr_t blockY,
                  intptr_t blockZ, int32_t smem, CUstream stream, void **params,
-                 void **extra) {
+                 void **extra, size_t /*paramsCount*/) {
   ScopedContext scopedContext;
   int32_t maxShmem = 0;
   CUdevice device = getDefaultCuDevice();
diff --git a/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
index bd3868a8e196f6f..998ff5b8b829f88 100644
--- a/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
@@ -32,7 +32,7 @@
 
 thread_local static int32_t defaultDevice = 0;
 
-extern "C" hipModule_t mgpuModuleLoad(void *data) {
+extern "C" hipModule_t mgpuModuleLoad(void *data, size_t /*gpuBlobSize*/) {
   hipModule_t module = nullptr;
   HIP_REPORT_IF_ERROR(hipModuleLoadData(&module, data));
   return module;
@@ -57,7 +57,7 @@ extern "C" void mgpuLaunchKernel(hipFunction_t function, intptr_t gridX,
                                  intptr_t blockX, intptr_t blockY,
                                  intptr_t blockZ, int32_t smem,
                                  hipStream_t stream, void **params,
-                                 void **extra) {
+                                 void **extra, size_t /*paramsCount*/) {
   HIP_REPORT_IF_ERROR(hipModuleLaunchKernel(function, gridX, gridY, gridZ,
                                             blockX, blockY, blockZ, smem,
                                             stream, params, extra));
diff --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
index 2cdc4e8dbb1ad67..b4efe0714aab9aa 100644
--- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
@@ -34,8 +34,9 @@ module attributes {gpu.container_module} {
   // CHECK: [[ADDRESSOF:%.*]] = llvm.mlir.addressof @[[GLOBAL]]
   // CHECK: [[BINARY:%.*]] = llvm.getelementptr [[ADDRESSOF]]{{\[}}0, 0]
   // CHECK-SAME: -> !llvm.ptr
-
-  // CHECK: [[MODULE:%.*]] = llvm.call @mgpuModuleLoad([[BINARY]])
+  // CHECK: [[BINARYSIZE:%.*]] = llvm.mlir.constant
+  // CHECK: [[MODULE:%.*]] = llvm.call @mgpuModuleLoad([[BINARY]], [[BINARYSIZE]])
+  // CHECK: [[PARAMSCOUNT:%.*]] = llvm.mlir.constant
   // CHECK: [[FUNC:%.*]] = llvm.call @mgpuModuleGetFunction([[MODULE]], {{.*}})
 
   // CHECK: [[STREAM:%.*]] = llvm.call @mgpuStreamCreate
@@ -56,7 +57,7 @@ module attributes {gpu.container_module} {
 
   // CHECK: llvm.call @mgpuLaunchKernel([[FUNC]], [[C8]], [[C8]], [[C8]],
   // CHECK-SAME: [[C8]], [[C8]], [[C8]], [[C256]], [[STREAM]],
-  // CHECK-SAME: [[PARAMS]], [[EXTRA_PARAMS]])
+  // CHECK-SAME: [[PARAMS]], [[EXTRA_PARAMS]], [[PARAMSCOUNT]])
   // CHECK: llvm.call @mgpuStreamSynchronize
   // CHECK: llvm.call @mgpuStreamDestroy
   // CHECK: llvm.call @mgpuModuleUnload

nbpatel · 2023-09-22T16:23:12Z

Ping for review.

nbpatel · 2023-09-25T18:58:25Z

@joker-eph @grypp ping for review

nbpatel · 2023-09-26T17:37:43Z

@grypp @joker-eph can one of you guys help me merge this?

…llvm#66154) This PR is a breakdown of the big PR llvm#65539 which enables intel gpu integration. In this PR we pass count of parameters and size of gpu binary to runtime wrappers since the SyclRuntimeWrappers (which will come in subsequent PR) requires the spirv size for compilation and also the number of parameters to iterate over the params.

Pass count of parameters & gpu binary size to runtime wrappers

25ac63e

nbpatel requested review from a team as code owners September 12, 2023 22:42

llvmbot added mlir:gpu mlir mlir:execution-engine labels Sep 12, 2023

nbpatel changed the title ~~Pass count of parameters & gpu binary size to runtime wrappers~~ [MLIR] Pass count of parameters & gpu binary size to runtime wrappers Sep 12, 2023

joker-eph approved these changes Sep 25, 2023

View reviewed changes

joker-eph merged commit ebfea26 into llvm:main Sep 26, 2023

nbpatel deleted the nishant_paramsCount branch October 2, 2023 20:34

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[MLIR] Pass count of parameters & gpu binary size to runtime wrappers #66154

[MLIR] Pass count of parameters & gpu binary size to runtime wrappers #66154

Uh oh!

nbpatel commented Sep 12, 2023

Uh oh!

llvmbot commented Sep 12, 2023 •

edited

Loading

Uh oh!

nbpatel commented Sep 22, 2023

Uh oh!

nbpatel commented Sep 25, 2023

Uh oh!

nbpatel commented Sep 26, 2023

Uh oh!

Uh oh!

[MLIR] Pass count of parameters & gpu binary size to runtime wrappers #66154

[MLIR] Pass count of parameters & gpu binary size to runtime wrappers #66154

Uh oh!

Conversation

nbpatel commented Sep 12, 2023

Uh oh!

llvmbot commented Sep 12, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

nbpatel commented Sep 22, 2023

Uh oh!

nbpatel commented Sep 25, 2023

Uh oh!

nbpatel commented Sep 26, 2023

Uh oh!

Uh oh!

llvmbot commented Sep 12, 2023 •

edited

Loading