Skip to content

[SPIR-V] Implement support of the SPV_INTEL_split_barrier SPIRV extension #112359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/docs/SPIRVUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na
- Adds decorations that can be applied to global (module scope) variables to help code generation for FPGA devices.
* - ``SPV_INTEL_optnone``
- Adds OptNoneINTEL value for Function Control mask that indicates a request to not optimize the function.
* - ``SPV_INTEL_split_barrier``
- Adds SPIR-V instructions to split a control barrier into two separate operations: the first indicates that an invocation has "arrived" at the barrier but should continue executing, and the second indicates that an invocation should "wait" for other invocations to arrive at the barrier before executing further.
* - ``SPV_INTEL_subgroups``
- Allows work items in a subgroup to share data without the use of local memory and work group barriers, and to utilize specialized hardware to load and store blocks of data from images or buffers.
* - ``SPV_INTEL_usm_storage_classes``
Expand Down
27 changes: 21 additions & 6 deletions llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,18 @@ static bool buildAtomicFlagInst(const SPIRV::IncomingCall *Call,
static bool buildBarrierInst(const SPIRV::IncomingCall *Call, unsigned Opcode,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
const SPIRV::DemangledBuiltin *Builtin = Call->Builtin;
const auto *ST =
static_cast<const SPIRVSubtarget *>(&MIRBuilder.getMF().getSubtarget());
if ((Opcode == SPIRV::OpControlBarrierArriveINTEL ||
Opcode == SPIRV::OpControlBarrierWaitINTEL) &&
!ST->canUseExtension(SPIRV::Extension::SPV_INTEL_split_barrier)) {
std::string DiagMsg = std::string(Builtin->Name) +
": the builtin requires the following SPIR-V "
"extension: SPV_INTEL_split_barrier";
report_fatal_error(DiagMsg.c_str(), false);
}

if (Call->isSpirvOp())
return buildOpFromWrapper(MIRBuilder, Opcode, Call, Register(0));

Expand All @@ -896,13 +908,16 @@ static bool buildBarrierInst(const SPIRV::IncomingCall *Call, unsigned Opcode,
if (MemFlags & SPIRV::CLK_IMAGE_MEM_FENCE)
MemSemantics |= SPIRV::MemorySemantics::ImageMemory;

if (Opcode == SPIRV::OpMemoryBarrier) {
std::memory_order MemOrder =
static_cast<std::memory_order>(getIConstVal(Call->Arguments[1], MRI));
MemSemantics = getSPIRVMemSemantics(MemOrder) | MemSemantics;
} else {
if (Opcode == SPIRV::OpMemoryBarrier)
MemSemantics = getSPIRVMemSemantics(static_cast<std::memory_order>(
getIConstVal(Call->Arguments[1], MRI))) |
MemSemantics;
else if (Opcode == SPIRV::OpControlBarrierArriveINTEL)
MemSemantics |= SPIRV::MemorySemantics::Release;
else if (Opcode == SPIRV::OpControlBarrierWaitINTEL)
MemSemantics |= SPIRV::MemorySemantics::Acquire;
else
MemSemantics |= SPIRV::MemorySemantics::SequentiallyConsistent;
}

Register MemSemanticsReg =
MemFlags == MemSemantics
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVBuiltins.td
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,12 @@ defm : DemangledNativeBuiltin<"barrier", OpenCL_std, Barrier, 1, 3, OpControlBar
defm : DemangledNativeBuiltin<"work_group_barrier", OpenCL_std, Barrier, 1, 3, OpControlBarrier>;
defm : DemangledNativeBuiltin<"__spirv_ControlBarrier", OpenCL_std, Barrier, 3, 3, OpControlBarrier>;

// cl_intel_split_work_group_barrier
defm : DemangledNativeBuiltin<"intel_work_group_barrier_arrive", OpenCL_std, Barrier, 1, 2, OpControlBarrierArriveINTEL>;
defm : DemangledNativeBuiltin<"__spirv_ControlBarrierArriveINTEL", OpenCL_std, Barrier, 3, 3, OpControlBarrierArriveINTEL>;
defm : DemangledNativeBuiltin<"intel_work_group_barrier_wait", OpenCL_std, Barrier, 1, 2, OpControlBarrierWaitINTEL>;
defm : DemangledNativeBuiltin<"__spirv_ControlBarrierWaitINTEL", OpenCL_std, Barrier, 3, 3, OpControlBarrierWaitINTEL>;

// Kernel enqueue builtin records:
defm : DemangledNativeBuiltin<"__enqueue_kernel_basic", OpenCL_std, Enqueue, 5, 5, OpEnqueueKernel>;
defm : DemangledNativeBuiltin<"__enqueue_kernel_basic_events", OpenCL_std, Enqueue, 8, 8, OpEnqueueKernel>;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ static const std::map<std::string, SPIRV::Extension::Extension>
{"SPV_INTEL_optnone", SPIRV::Extension::Extension::SPV_INTEL_optnone},
{"SPV_INTEL_usm_storage_classes",
SPIRV::Extension::Extension::SPV_INTEL_usm_storage_classes},
{"SPV_INTEL_split_barrier",
SPIRV::Extension::Extension::SPV_INTEL_split_barrier},
{"SPV_INTEL_subgroups",
SPIRV::Extension::Extension::SPV_INTEL_subgroups},
{"SPV_KHR_uniform_group_instructions",
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,12 @@ def OpNamedBarrierInitialize: UnOp<"OpNamedBarrierInitialize", 328>;
def OpMemoryNamedBarrier: Op<329, (outs), (ins ID:$barr, ID:$mem, ID:$sem),
"OpMemoryNamedBarrier $barr $mem $sem">;

// SPV_INTEL_split_barrier
def OpControlBarrierArriveINTEL: Op<6142, (outs), (ins ID:$exec, ID:$mem, ID:$sem),
"OpControlBarrierArriveINTEL $exec $mem $sem">;
def OpControlBarrierWaitINTEL: Op<6143, (outs), (ins ID:$exec, ID:$mem, ID:$sem),
"OpControlBarrierWaitINTEL $exec $mem $sem">;

// 3.42.21. Group and Subgroup Instructions

def OpGroupAsyncCopy: Op<259, (outs ID:$res), (ins TYPE:$ty, ID:$scope,
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1211,6 +1211,13 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addExtension(SPIRV::Extension::SPV_EXT_arithmetic_fence);
Reqs.addCapability(SPIRV::Capability::ArithmeticFenceEXT);
break;
case SPIRV::OpControlBarrierArriveINTEL:
case SPIRV::OpControlBarrierWaitINTEL:
if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_split_barrier)) {
Reqs.addExtension(SPIRV::Extension::SPV_INTEL_split_barrier);
Reqs.addCapability(SPIRV::Capability::SplitBarrierINTEL);
}
break;
default:
break;
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ defm GlobalVariableFPGADecorationsINTEL : CapabilityOperand<6189, 0, 0, [SPV_INT
defm CacheControlsINTEL : CapabilityOperand<6441, 0, 0, [SPV_INTEL_cache_controls], []>;
defm CooperativeMatrixKHR : CapabilityOperand<6022, 0, 0, [SPV_KHR_cooperative_matrix], []>;
defm ArithmeticFenceEXT : CapabilityOperand<6144, 0, 0, [SPV_EXT_arithmetic_fence], []>;
defm SplitBarrierINTEL : CapabilityOperand<6141, 0, 0, [SPV_INTEL_split_barrier], []>;

//===----------------------------------------------------------------------===//
// Multiclass used to define SourceLanguage enum values and at the same time
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
; Adapted from Khronos Translator test suite: test/CodeGen/SPIRV/extensions/SPV_INTEL_split_barrier/

;; kernel void test(global uint* dst)
;; {
;; intel_work_group_barrier_arrive(CLK_LOCAL_MEM_FENCE);
;; intel_work_group_barrier_wait(CLK_LOCAL_MEM_FENCE);
;; intel_work_group_barrier_arrive(CLK_GLOBAL_MEM_FENCE);
;; intel_work_group_barrier_wait(CLK_GLOBAL_MEM_FENCE);
;;
;; intel_work_group_barrier_arrive(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
;; intel_work_group_barrier_wait(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
;;}

; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR

; CHECK-ERROR: LLVM ERROR: intel_work_group_barrier_arrive: the builtin requires the following SPIR-V extension: SPV_INTEL_split_barrier

; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_split_barrier %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_split_barrier %s -o - -filetype=obj | spirv-val %}

; CHECK: Capability SplitBarrierINTEL
; CHECK: Extension "SPV_INTEL_split_barrier"
; CHECK: %[[#UINT:]] = OpTypeInt 32 0
;
; Scopes:
; CHECK-DAG: %[[#SCOPE_WORK_GROUP:]] = OpConstant %[[#UINT]] 2{{$}}
;
; Memory Semantics:
; 0x2 Acquire + 0x100 WorkgroupMemory
; CHECK-DAG: %[[#ACQUIRE_LOCAL:]] = OpConstant %[[#UINT]] 258
; 0x4 Release + 0x100 WorkgroupMemory
; CHECK-DAG: %[[#RELEASE_LOCAL:]] = OpConstant %[[#UINT]] 260
; 0x2 Acquire + 0x200 CrossWorkgroupMemory
; CHECK-DAG: %[[#ACQUIRE_GLOBAL:]] = OpConstant %[[#UINT]] 514
; 0x4 Release + 0x200 CrossWorkgroupMemory
; CHECK-DAG: %[[#RELEASE_GLOBAL:]] = OpConstant %[[#UINT]] 516
; 0x2 Acquire + 0x100 WorkgroupMemory + 0x200 CrossWorkgroupMemory
; CHECK-DAG: %[[#ACQUIRE_LOCAL_GLOBAL:]] = OpConstant %[[#UINT]] 770
; 0x4 Release + 0x100 WorkgroupMemory + 0x200 CrossWorkgroupMemory
; CHECK-DAG: %[[#RELEASE_LOCAL_GLOBAL:]] = OpConstant %[[#UINT]] 772
;
; CHECK: OpControlBarrierArriveINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#RELEASE_LOCAL]]
; CHECK: OpControlBarrierWaitINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#ACQUIRE_LOCAL]]
; CHECK: OpControlBarrierArriveINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#RELEASE_GLOBAL]]
; CHECK: OpControlBarrierWaitINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#ACQUIRE_GLOBAL]]
;
; CHECK: OpControlBarrierArriveINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#RELEASE_LOCAL_GLOBAL]]
; CHECK: OpControlBarrierWaitINTEL %[[#SCOPE_WORK_GROUP]] %[[#SCOPE_WORK_GROUP]] %[[#ACQUIRE_LOCAL_GLOBAL]]

; Function Attrs: convergent norecurse nounwind
define dso_local spir_kernel void @test(ptr addrspace(1) nocapture noundef readnone align 4 %0) local_unnamed_addr #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
tail call spir_func void @_Z31intel_work_group_barrier_arrivej(i32 noundef 1) #2
tail call spir_func void @_Z29intel_work_group_barrier_waitj(i32 noundef 1) #2
tail call spir_func void @_Z31intel_work_group_barrier_arrivej(i32 noundef 2) #2
tail call spir_func void @_Z29intel_work_group_barrier_waitj(i32 noundef 2) #2
tail call spir_func void @_Z31intel_work_group_barrier_arrivej(i32 noundef 3) #2
tail call spir_func void @_Z29intel_work_group_barrier_waitj(i32 noundef 3) #2
ret void
}

; Function Attrs: convergent
declare dso_local spir_func void @_Z31intel_work_group_barrier_arrivej(i32 noundef) local_unnamed_addr #1

; Function Attrs: convergent
declare dso_local spir_func void @_Z29intel_work_group_barrier_waitj(i32 noundef) local_unnamed_addr #1

attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #2 = { convergent nounwind }

!llvm.module.flags = !{!0, !1}
!opencl.ocl.version = !{!2}
!opencl.spir.version = !{!2}
!llvm.ident = !{!3}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"frame-pointer", i32 2}
!2 = !{i32 1, i32 2}
!3 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project 861386dbd6ff0d91636b7c674c2abb2eccd9d3f2)"}
!4 = !{i32 1}
!5 = !{!"none"}
!6 = !{!"uint*"}
!7 = !{!""}
Loading
Loading