Skip to content

Commit 8d29aa2

Browse files
committed
Only strip invariant.load from special pointers
Other backends (in this case NVPTX) require that `invariant.load` metadata is maintained to generate non-coherent loads. Currently, we unconditionally strip that metadata from all loads, since our other uses of it may have become invalid. x-ref: llvm/llvm-project#112834 JuliaGPU/CUDA.jl#2531
1 parent 20162ea commit 8d29aa2

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

src/llvm-late-gc-lowering.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1979,8 +1979,10 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
19791979
// strip all constant alias information, as it might depend on the gc having
19801980
// preserved a gc root, which stops being true after this pass (#32215)
19811981
// similar to RewriteStatepointsForGC::stripNonValidData, but less aggressive
1982-
if (I->getMetadata(LLVMContext::MD_invariant_load))
1983-
I->setMetadata(LLVMContext::MD_invariant_load, NULL);
1982+
if (auto *LI = dyn_cast<Load>(I)){
1983+
if (isSpecialPtr(LI->getPointerOperand()->getType()) && LI->getMetadata(LLVMContext::MD_invariant_load))
1984+
LI->setMetadata(LLVMContext::MD_invariant_load, NULL);
1985+
}
19841986
if (MDNode *TBAA = I->getMetadata(LLVMContext::MD_tbaa)) {
19851987
if (TBAA->getNumOperands() == 4 && isTBAA(TBAA, {"jtbaa_const", "jtbaa_memoryptr", "jtbaa_memorylen", "tbaa_memoryown"})) {
19861988
MDNode *MutableTBAA = createMutableTBAAAccessTag(TBAA);

test/llvmpasses/late-lower-gc.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,22 @@ top:
9090
ret void
9191
}
9292

93+
; Confirm that `invariant.load` on other loads survive
94+
define void @gc_keep_invariant(float addrspace(1)* %0) {
95+
top:
96+
; CHECK-LABEL: @gc_drop_aliasing
97+
%pgcstack = call {}*** @julia.get_pgcstack()
98+
%1 = bitcast {}*** %pgcstack to {}**
99+
%current_task = getelementptr inbounds {}*, {}** %0, i64 -12
100+
101+
; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
102+
; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
103+
; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
104+
%2 = load float, float addrspace(1)* %0, align 4, !invariant.load
105+
; CHECK-NEXT: = load float, float addrspace(1)* %0, align 4, !invariant.load
106+
ret void
107+
}
108+
93109
define i32 @callee_root({} addrspace(10)* %v0, {} addrspace(10)* %v1) {
94110
top:
95111
; CHECK-LABEL: @callee_root

0 commit comments

Comments
 (0)