From 389fbe7f92cf9572f91859d69672cf09d9a23d17 Mon Sep 17 00:00:00 2001 From: wangpc Date: Tue, 17 Oct 2023 11:27:27 +0800 Subject: [PATCH 1/5] [OpenCL][RISCV] Support SPIR_KERNEL calling convention X86 supports this calling convention but I don't find any special handling, so I think we can just handle it via CC_RISCV. This should fix #69197. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 + llvm/test/CodeGen/RISCV/spir-kernel-cc.ll | 86 +++++++++++++++++++++ 2 files changed, 88 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/spir-kernel-cc.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e8f001e491cdc..c508a1abfccfe 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/IRBuilder.h" @@ -17000,6 +17001,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( report_fatal_error("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: + case CallingConv::SPIR_KERNEL: break; case CallingConv::GHC: if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) diff --git a/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll b/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll new file mode 100644 index 0000000000000..24f5c54021e3a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f,+d < %s | FileCheck %s -check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+f,+d < %s | FileCheck %s -check-prefix=RV64 + +; Check the SPIR_KERNEL call convention work + +declare dso_local i64 @_Z13get_global_idj(i32 noundef signext) + +define dso_local spir_kernel void @foo(ptr nocapture noundef readonly align 4 %a, ptr nocapture noundef readonly align 4 %b, ptr nocapture noundef writeonly align 4 %c) { +; RV32-LABEL: foo: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s1, -12 +; RV32-NEXT: .cfi_offset s2, -16 +; RV32-NEXT: mv s0, a2 +; RV32-NEXT: mv s1, a1 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: call _Z13get_global_idj +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s2, s2, a0 +; RV32-NEXT: flw fa5, 0(s2) +; RV32-NEXT: add s1, s1, a0 +; RV32-NEXT: flw fa4, 0(s1) +; RV32-NEXT: fadd.s fa5, fa5, fa4 +; RV32-NEXT: add a0, s0, a0 +; RV32-NEXT: fsw fa5, 0(a0) +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: foo: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s1, -24 +; RV64-NEXT: .cfi_offset s2, -32 +; RV64-NEXT: mv s0, a2 +; RV64-NEXT: mv s1, a1 +; RV64-NEXT: mv s2, a0 +; RV64-NEXT: li a0, 0 +; RV64-NEXT: call _Z13get_global_idj +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add s2, s2, a0 +; RV64-NEXT: flw fa5, 0(s2) +; RV64-NEXT: add s1, s1, a0 +; RV64-NEXT: flw fa4, 0(s1) +; RV64-NEXT: fadd.s fa5, fa5, fa4 +; RV64-NEXT: add a0, s0, a0 +; RV64-NEXT: fsw fa5, 0(a0) +; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret +entry: + %call = tail call i64 @_Z13get_global_idj(i32 noundef signext 0) + %sext = shl i64 %call, 32 + %idxprom = ashr exact i64 %sext, 32 + %arrayidx = getelementptr inbounds float, ptr %a, i64 %idxprom + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %idxprom + %1 = load float, ptr %arrayidx2, align 4 + %add = fadd float %0, %1 + %arrayidx4 = getelementptr inbounds float, ptr %c, i64 %idxprom + store float %add, ptr %arrayidx4, align 4 + ret void +} \ No newline at end of file From 96555640388b6ef5b1aa6b1b3ff4c5c974a8ea1a Mon Sep 17 00:00:00 2001 From: wangpc Date: Tue, 17 Oct 2023 11:42:39 +0800 Subject: [PATCH 2/5] fixup! [OpenCL][RISCV] Support SPIR_KERNEL calling convention Remove unnecessary include --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c508a1abfccfe..30dc9cb561d06 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -30,7 +30,6 @@ #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/IRBuilder.h" From 280e142bae0d8b318e29532174ef5dfb802588f1 Mon Sep 17 00:00:00 2001 From: wangpc Date: Tue, 17 Oct 2023 11:46:03 +0800 Subject: [PATCH 3/5] fixup! [OpenCL][RISCV] Support SPIR_KERNEL calling convention Add new line --- llvm/test/CodeGen/RISCV/spir-kernel-cc.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll b/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll index 24f5c54021e3a..437d1ff159143 100644 --- a/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll +++ b/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll @@ -83,4 +83,4 @@ entry: %arrayidx4 = getelementptr inbounds float, ptr %c, i64 %idxprom store float %add, ptr %arrayidx4, align 4 ret void -} \ No newline at end of file +} From ce143a05f6c7fbf809f9c5b5cd23020cc2f65129 Mon Sep 17 00:00:00 2001 From: wangpc Date: Wed, 18 Oct 2023 18:01:58 +0800 Subject: [PATCH 4/5] fixup! [OpenCL][RISCV] Support SPIR_KERNEL calling convention Simplify test --- llvm/test/CodeGen/RISCV/spir-kernel-cc.ll | 78 ++--------------------- 1 file changed, 4 insertions(+), 74 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll b/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll index 437d1ff159143..8731798c76c19 100644 --- a/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll +++ b/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll @@ -2,85 +2,15 @@ ; RUN: llc -mtriple=riscv32 -mattr=+f,+d < %s | FileCheck %s -check-prefix=RV32 ; RUN: llc -mtriple=riscv64 -mattr=+f,+d < %s | FileCheck %s -check-prefix=RV64 -; Check the SPIR_KERNEL call convention work +; Check the SPIR_KERNEL call convention works. -declare dso_local i64 @_Z13get_global_idj(i32 noundef signext) - -define dso_local spir_kernel void @foo(ptr nocapture noundef readonly align 4 %a, ptr nocapture noundef readonly align 4 %b, ptr nocapture noundef writeonly align 4 %c) { +define dso_local spir_kernel void @foo() { ; RV32-LABEL: foo: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: mv s0, a2 -; RV32-NEXT: mv s1, a1 -; RV32-NEXT: mv s2, a0 -; RV32-NEXT: li a0, 0 -; RV32-NEXT: call _Z13get_global_idj -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add s2, s2, a0 -; RV32-NEXT: flw fa5, 0(s2) -; RV32-NEXT: add s1, s1, a0 -; RV32-NEXT: flw fa4, 0(s1) -; RV32-NEXT: fadd.s fa5, fa5, fa4 -; RV32-NEXT: add a0, s0, a0 -; RV32-NEXT: fsw fa5, 0(a0) -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 0(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32: # %bb.0: ; RV32-NEXT: ret ; ; RV64-LABEL: foo: -; RV64: # %bb.0: # %entry -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s1, -24 -; RV64-NEXT: .cfi_offset s2, -32 -; RV64-NEXT: mv s0, a2 -; RV64-NEXT: mv s1, a1 -; RV64-NEXT: mv s2, a0 -; RV64-NEXT: li a0, 0 -; RV64-NEXT: call _Z13get_global_idj -; RV64-NEXT: sext.w a0, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add s2, s2, a0 -; RV64-NEXT: flw fa5, 0(s2) -; RV64-NEXT: add s1, s1, a0 -; RV64-NEXT: flw fa4, 0(s1) -; RV64-NEXT: fadd.s fa5, fa5, fa4 -; RV64-NEXT: add a0, s0, a0 -; RV64-NEXT: fsw fa5, 0(a0) -; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 0(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 32 +; RV64: # %bb.0: ; RV64-NEXT: ret -entry: - %call = tail call i64 @_Z13get_global_idj(i32 noundef signext 0) - %sext = shl i64 %call, 32 - %idxprom = ashr exact i64 %sext, 32 - %arrayidx = getelementptr inbounds float, ptr %a, i64 %idxprom - %0 = load float, ptr %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %idxprom - %1 = load float, ptr %arrayidx2, align 4 - %add = fadd float %0, %1 - %arrayidx4 = getelementptr inbounds float, ptr %c, i64 %idxprom - store float %add, ptr %arrayidx4, align 4 ret void } From 3b209ac7bed6affd3d0920c913851ac3e042b54d Mon Sep 17 00:00:00 2001 From: wangpc Date: Wed, 18 Oct 2023 18:03:24 +0800 Subject: [PATCH 5/5] fixup! [OpenCL][RISCV] Support SPIR_KERNEL calling convention Remove -mattr --- llvm/test/CodeGen/RISCV/spir-kernel-cc.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll b/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll index 8731798c76c19..283f397373566 100644 --- a/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll +++ b/llvm/test/CodeGen/RISCV/spir-kernel-cc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+f,+d < %s | FileCheck %s -check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+f,+d < %s | FileCheck %s -check-prefix=RV64 +; RUN: llc -mtriple=riscv32 < %s | FileCheck %s -check-prefix=RV32 +; RUN: llc -mtriple=riscv64 < %s | FileCheck %s -check-prefix=RV64 ; Check the SPIR_KERNEL call convention works.