Skip to content

[WIP] Support relocation for descriptor offsets. #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1986,4 +1986,10 @@ def int_amdgcn_fdiv_fast : Intrinsic<
[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable]
>;

// Represent a relocation constant.
def int_amdgcn_reloc_constant : Intrinsic<
[llvm_i32_ty], [llvm_metadata_ty],
[IntrNoMem, IntrSpeculatable]
>;
}
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1607,6 +1607,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}

if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) {
return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
}
llvm_unreachable("Can't get register for value!");
}

Expand Down
21 changes: 17 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1760,11 +1760,23 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,

bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDValue &Offset, bool &Imm) const {

// FIXME: Handle non-constant offsets.
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
if (!C)
if (!C) {
if (ByteOffsetNode.getValueType().isScalarInteger() &&
ByteOffsetNode.getValueType().getSizeInBits() == 32) {
Offset = ByteOffsetNode;
Imm = false;
return true;
}
if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
Offset = ByteOffsetNode.getOperand(0);
Imm = false;
return true;
}
}
return false;
}

SDLoc SL(ByteOffsetNode);
GCNSubtarget::Generation Gen = Subtarget->getGeneration();
Expand Down Expand Up @@ -1829,7 +1841,8 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
// wraparound, because s_load instructions perform the addition in 64 bits.
if ((Addr.getValueType() != MVT::i32 ||
Addr->getFlags().hasNoUnsignedWrap()) &&
CurDAG->isBaseWithConstantOffset(Addr)) {
(CurDAG->isBaseWithConstantOffset(Addr) ||
Addr.getOpcode() == ISD::ADD)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);

Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6046,6 +6046,16 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(1, SL, MVT::i32));
return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ);
}
case Intrinsic::amdgcn_reloc_constant: {
Module *M = const_cast<Module *>(MF.getFunction().getParent());
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
auto SymbolName = cast<MDString>(Metadata->getOperand(0))->getString();
auto RelocSymbol = cast<GlobalVariable>(
M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext())));
SDValue GA = DAG.getTargetGlobalAddress(RelocSymbol, DL, MVT::i32, 0,
SIInstrInfo::MO_ABS32_LO);
return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
}
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
Expand Down
62 changes: 62 additions & 0 deletions llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
; Test that DAG->DAG ISel is able to pick up the S_LOAD_DWORDX4_SGPR instruction that fetches the offset
; from a register.

; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s

; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0, 0 :: (invariant load 16 from %ir.13, addrspace 4)

define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 {
.entry:
%5 = call i64 @llvm.amdgcn.s.getpc() #3
%6 = bitcast i64 %5 to <2 x i32>
%7 = insertelement <2 x i32> %6, i32 %resNode0, i32 0
%8 = bitcast <2 x i32> %7 to i64
%9 = inttoptr i64 %8 to [4294967295 x i8] addrspace(4)*
%10 = call i32 @llvm.amdgcn.reloc.constant(metadata !4)
%11 = zext i32 %10 to i64
%12 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %9, i64 0, i64 %11
%13 = bitcast i8 addrspace(4)* %12 to <4 x i32> addrspace(4)*, !amdgpu.uniform !5
%14 = load <4 x i32>, <4 x i32> addrspace(4)* %13, align 16, !invariant.load !5
%15 = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %14, i32 0, i32 0)
call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %15, <4 x i32> %14, i32 0, i32 0, i32 0)
ret void
}

declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0
; Function Attrs: nounwind writeonly
declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #1

; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.reloc.constant(metadata) #3

; Function Attrs: nounwind readnone speculatable
declare i64 @llvm.amdgcn.s.getpc() #3

; Function Attrs: nounwind readnone
declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg) #1

attributes #0 = { argmemonly nounwind willreturn }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "amdgpu-unroll-threshold"="700" }
attributes #3 = { nounwind readnone speculatable }
attributes #4 = { nounwind writeonly }

!llpc.compute.mode = !{!0}
!llpc.options = !{!1}
!llpc.options.CS = !{!2}
!llpc.user.data.nodes = !{!3, !4, !5, !6}
!amdgpu.pal.metadata.msgpack = !{!7}

!0 = !{i32 2, i32 3, i32 1}
!1 = !{i32 245227952, i32 996822128, i32 2024708198, i32 497230408}
!2 = !{i32 1381820427, i32 1742110173, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64}
!3 = !{!"DescriptorTableVaPtr", i32 0, i32 1, i32 1}
!4 = !{!"DescriptorBuffer", i32 4, i32 8, i32 0, i32 0}
!5 = !{!"DescriptorTableVaPtr", i32 1, i32 1, i32 1}
!6 = !{!"DescriptorBuffer", i32 4, i32 8, i32 1, i32 0}
!7 = !{!"\82\B0amdpal.pipelines\91\88\A4.api\A6Vulkan\B0.hardware_stages\81\A3.cs\82\AB.sgpr_limith\AB.vgpr_limit\CD\01\00\B7.internal_pipeline_hash\92\CF;jLp\0E\9D\E1\B0\CF\1D\A3\22Hx\AE\98f\AA.registers\88\CD.\07\02\CD.\08\03\CD.\09\01\CD.\12\CE\00,\00\00\CD.\13\CD\0F\88\CD.@\CE\10\00\00\00\CD.B\00\CD.C\01\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CFg\D6}\DDR\\\E8\0B\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\02\AEamdpal.version\92\02\03"}
!8 = !{i32 5}
!9 = !{!"doff_0_0_b"}
!10 = !{}
!11 = !{!"doff_1_0_b"}
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -filetype=obj -o %t.o < %s && llvm-readobj -relocations %t.o | FileCheck --check-prefix=ELF %s
; GCN-LABEL: {{^}}ps_main:
; GCN: v_mov_b32_{{.*}} v[[relocreg:[0-9]+]], doff_0_0_b@abs32@lo
; GCN-NEXT: exp {{.*}} v[[relocreg]], {{.*}}
; GCN-NEXT: s_endpgm
; GCN-NEXT: .Lfunc_end

; ELF: Relocations [
; ELF-NEXT: Section (3) .rel.text {
; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32 doff_0_0_b {{.*}}

define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
%rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1)
%rcf = bitcast i32 %rc to float
call void @llvm.amdgcn.exp.f32(i32 immarg 40, i32 immarg 15, float %rcf, float undef, float undef, float undef, i1 immarg false, i1 immarg false) #0
ret void
}

; Function Attrs: inaccessiblememonly nounwind
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #1

; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.reloc.constant(metadata) #2

attributes #0 = { nounwind }
attributes #1 = { inaccessiblememonly nounwind }
attributes #2 = { nounwind readnone speculatable }

!1 = !{!"doff_0_0_b"}