Skip to content

Commit f3afdc4

Browse files
authored
AtomicExpand: Fix creating invalid ptrmask for fat pointers (llvm#94955)
The ptrmask intrinsic requires the integer mask to be the index size, not the pointer size.
1 parent 457bedf commit f3afdc4

File tree

4 files changed

+363
-1
lines changed

4 files changed

+363
-1
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
765765
assert(ValueSize < MinWordSize);
766766

767767
PointerType *PtrTy = cast<PointerType>(Addr->getType());
768-
IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
768+
IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
769769
Value *PtrLSB;
770770

771771
if (AddrAlign < MinWordSize) {

llvm/test/CodeGen/AMDGPU/ptrmask.ll

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,160 @@ define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i32(ptr addrspace(3)
6565
ret ptr addrspace(3) %masked
6666
}
6767

68+
define ptr addrspace(7) @v_ptrmask_buffer_fat_ptr_variable_i32(ptr addrspace(7) %ptr, i32 %mask) {
69+
; GCN-LABEL: v_ptrmask_buffer_fat_ptr_variable_i32:
70+
; GCN: ; %bb.0:
71+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72+
; GCN-NEXT: v_and_b32_e32 v4, v4, v5
73+
; GCN-NEXT: s_setpc_b64 s[30:31]
74+
;
75+
; GFX10PLUS-LABEL: v_ptrmask_buffer_fat_ptr_variable_i32:
76+
; GFX10PLUS: ; %bb.0:
77+
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78+
; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v5
79+
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
80+
%masked = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) %ptr, i32 %mask)
81+
ret ptr addrspace(7) %masked
82+
}
83+
84+
define ptr addrspace(7) @v_ptrmask_buffer_fat_ptr_i32_neg8(ptr addrspace(7) %ptr) {
85+
; GCN-LABEL: v_ptrmask_buffer_fat_ptr_i32_neg8:
86+
; GCN: ; %bb.0:
87+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88+
; GCN-NEXT: v_and_b32_e32 v4, -8, v4
89+
; GCN-NEXT: s_setpc_b64 s[30:31]
90+
;
91+
; GFX10PLUS-LABEL: v_ptrmask_buffer_fat_ptr_i32_neg8:
92+
; GFX10PLUS: ; %bb.0:
93+
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94+
; GFX10PLUS-NEXT: v_and_b32_e32 v4, -8, v4
95+
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
96+
%masked = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) %ptr, i32 -8)
97+
ret ptr addrspace(7) %masked
98+
}
99+
100+
define amdgpu_ps ptr addrspace(7) @s_ptrmask_buffer_fat_ptr_variable_i32(ptr addrspace(7) inreg %ptr, i32 inreg %mask) {
101+
; GCN-LABEL: s_ptrmask_buffer_fat_ptr_variable_i32:
102+
; GCN: ; %bb.0:
103+
; GCN-NEXT: s_mov_b32 s8, s4
104+
; GCN-NEXT: s_mov_b32 s1, s3
105+
; GCN-NEXT: s_mov_b32 s0, s2
106+
; GCN-NEXT: s_and_b32 s4, s6, s7
107+
; GCN-NEXT: s_mov_b32 s2, s8
108+
; GCN-NEXT: s_mov_b32 s3, s5
109+
; GCN-NEXT: ; return to shader part epilog
110+
;
111+
; GFX10PLUS-LABEL: s_ptrmask_buffer_fat_ptr_variable_i32:
112+
; GFX10PLUS: ; %bb.0:
113+
; GFX10PLUS-NEXT: s_mov_b32 s8, s4
114+
; GFX10PLUS-NEXT: s_mov_b32 s1, s3
115+
; GFX10PLUS-NEXT: s_mov_b32 s0, s2
116+
; GFX10PLUS-NEXT: s_and_b32 s4, s6, s7
117+
; GFX10PLUS-NEXT: s_mov_b32 s2, s8
118+
; GFX10PLUS-NEXT: s_mov_b32 s3, s5
119+
; GFX10PLUS-NEXT: ; return to shader part epilog
120+
%masked = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) %ptr, i32 %mask)
121+
ret ptr addrspace(7) %masked
122+
}
123+
124+
define amdgpu_ps ptr addrspace(7) @s_ptrmask_buffer_fat_ptr_i32_neg8(ptr addrspace(7) inreg %ptr) {
125+
; GCN-LABEL: s_ptrmask_buffer_fat_ptr_i32_neg8:
126+
; GCN: ; %bb.0:
127+
; GCN-NEXT: s_mov_b32 s7, s4
128+
; GCN-NEXT: s_mov_b32 s1, s3
129+
; GCN-NEXT: s_mov_b32 s0, s2
130+
; GCN-NEXT: s_and_b32 s4, s6, -8
131+
; GCN-NEXT: s_mov_b32 s2, s7
132+
; GCN-NEXT: s_mov_b32 s3, s5
133+
; GCN-NEXT: ; return to shader part epilog
134+
;
135+
; GFX10PLUS-LABEL: s_ptrmask_buffer_fat_ptr_i32_neg8:
136+
; GFX10PLUS: ; %bb.0:
137+
; GFX10PLUS-NEXT: s_mov_b32 s7, s4
138+
; GFX10PLUS-NEXT: s_mov_b32 s1, s3
139+
; GFX10PLUS-NEXT: s_mov_b32 s0, s2
140+
; GFX10PLUS-NEXT: s_and_b32 s4, s6, -8
141+
; GFX10PLUS-NEXT: s_mov_b32 s2, s7
142+
; GFX10PLUS-NEXT: s_mov_b32 s3, s5
143+
; GFX10PLUS-NEXT: ; return to shader part epilog
144+
%masked = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) %ptr, i32 -8)
145+
ret ptr addrspace(7) %masked
146+
}
147+
148+
define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) %ptr, i128 %mask) {
149+
; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128:
150+
; GCN: ; %bb.0:
151+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152+
; GCN-NEXT: v_and_b32_e32 v1, v1, v5
153+
; GCN-NEXT: v_and_b32_e32 v0, v0, v4
154+
; GCN-NEXT: v_and_b32_e32 v3, v3, v7
155+
; GCN-NEXT: v_and_b32_e32 v2, v2, v6
156+
; GCN-NEXT: s_setpc_b64 s[30:31]
157+
;
158+
; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128:
159+
; GFX10PLUS: ; %bb.0:
160+
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161+
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4
162+
; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5
163+
; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6
164+
; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7
165+
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
166+
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 %mask)
167+
ret ptr addrspace(8) %masked
168+
}
169+
170+
define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128_neg8(ptr addrspace(8) %ptr) {
171+
; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8:
172+
; GCN: ; %bb.0:
173+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174+
; GCN-NEXT: v_and_b32_e32 v0, -8, v0
175+
; GCN-NEXT: s_setpc_b64 s[30:31]
176+
;
177+
; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8:
178+
; GFX10PLUS: ; %bb.0:
179+
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180+
; GFX10PLUS-NEXT: v_and_b32_e32 v0, -8, v0
181+
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
182+
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 -8)
183+
ret ptr addrspace(8) %masked
184+
}
185+
186+
define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) inreg %ptr, i128 inreg %mask) {
187+
; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128:
188+
; GCN: ; %bb.0:
189+
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
190+
; GCN-NEXT: s_and_b64 s[2:3], s[4:5], s[8:9]
191+
; GCN-NEXT: ; return to shader part epilog
192+
;
193+
; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128:
194+
; GFX10PLUS: ; %bb.0:
195+
; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
196+
; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[4:5], s[8:9]
197+
; GFX10PLUS-NEXT: ; return to shader part epilog
198+
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 %mask)
199+
ret ptr addrspace(8) %masked
200+
}
201+
202+
define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128_neg8(ptr addrspace(8) inreg %ptr) {
203+
; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8:
204+
; GCN: ; %bb.0:
205+
; GCN-NEXT: s_mov_b32 s1, s3
206+
; GCN-NEXT: s_and_b32 s0, s2, -8
207+
; GCN-NEXT: s_mov_b32 s2, s4
208+
; GCN-NEXT: s_mov_b32 s3, s5
209+
; GCN-NEXT: ; return to shader part epilog
210+
;
211+
; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8:
212+
; GFX10PLUS: ; %bb.0:
213+
; GFX10PLUS-NEXT: s_mov_b32 s1, s3
214+
; GFX10PLUS-NEXT: s_and_b32 s0, s2, -8
215+
; GFX10PLUS-NEXT: s_mov_b32 s2, s4
216+
; GFX10PLUS-NEXT: s_mov_b32 s3, s5
217+
; GFX10PLUS-NEXT: ; return to shader part epilog
218+
%masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 -8)
219+
ret ptr addrspace(8) %masked
220+
}
221+
68222
declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) #0
69223
declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64) #0
70224

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,6 +1262,110 @@ define bfloat @test_atomicrmw_xchg_bf16_global_agent_align4(ptr addrspace(1) %pt
12621262
ret bfloat %res
12631263
}
12641264

1265+
define i16 @test_atomicrmw_xchg_i16_buffer_fat_agent(ptr addrspace(7) %ptr, i16 %value) {
1266+
; CHECK-LABEL: @test_atomicrmw_xchg_i16_buffer_fat_agent(
1267+
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) [[PTR:%.*]], i32 -4)
1268+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(7) [[PTR]] to i32
1269+
; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1270+
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1271+
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]]
1272+
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1273+
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
1274+
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
1275+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[ALIGNEDADDR]], align 4
1276+
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1277+
; CHECK: atomicrmw.start:
1278+
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1279+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1280+
; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]]
1281+
; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg ptr addrspace(7) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] syncscope("agent") seq_cst seq_cst, align 4
1282+
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
1283+
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
1284+
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1285+
; CHECK: atomicrmw.end:
1286+
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1287+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
1288+
; CHECK-NEXT: ret i16 [[EXTRACTED]]
1289+
;
1290+
%res = atomicrmw xchg ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst
1291+
ret i16 %res
1292+
}
1293+
1294+
define i16 @test_atomicrmw_xchg_i16_buffer_fat_agent_align4(ptr addrspace(7) %ptr, i16 %value) {
1295+
; CHECK-LABEL: @test_atomicrmw_xchg_i16_buffer_fat_agent_align4(
1296+
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
1297+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[PTR:%.*]], align 4
1298+
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1299+
; CHECK: atomicrmw.start:
1300+
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1301+
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[LOADED]], -65536
1302+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP1]]
1303+
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(7) [[PTR]], i32 [[LOADED]], i32 [[TMP4]] syncscope("agent") seq_cst seq_cst, align 4
1304+
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
1305+
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
1306+
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1307+
; CHECK: atomicrmw.end:
1308+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
1309+
; CHECK-NEXT: ret i16 [[EXTRACTED]]
1310+
;
1311+
%res = atomicrmw xchg ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst, align 4
1312+
ret i16 %res
1313+
}
1314+
1315+
define i16 @test_atomicrmw_add_i16_buffer_fat_agent(ptr addrspace(7) %ptr, i16 %value) {
1316+
; CHECK-LABEL: @test_atomicrmw_add_i16_buffer_fat_agent(
1317+
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) [[PTR:%.*]], i32 -4)
1318+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(7) [[PTR]] to i32
1319+
; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1320+
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1321+
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]]
1322+
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1323+
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
1324+
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
1325+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[ALIGNEDADDR]], align 4
1326+
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1327+
; CHECK: atomicrmw.start:
1328+
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1329+
; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
1330+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
1331+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1332+
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
1333+
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(7) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
1334+
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1335+
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1336+
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1337+
; CHECK: atomicrmw.end:
1338+
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1339+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
1340+
; CHECK-NEXT: ret i16 [[EXTRACTED]]
1341+
;
1342+
%res = atomicrmw add ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst
1343+
ret i16 %res
1344+
}
1345+
1346+
define i16 @test_atomicrmw_add_i16_buffer_fat_agent_align4(ptr addrspace(7) %ptr, i16 %value) {
1347+
; CHECK-LABEL: @test_atomicrmw_add_i16_buffer_fat_agent_align4(
1348+
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
1349+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[PTR:%.*]], align 4
1350+
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1351+
; CHECK: atomicrmw.start:
1352+
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1353+
; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]]
1354+
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[NEW]], 65535
1355+
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536
1356+
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
1357+
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(7) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
1358+
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1359+
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1360+
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1361+
; CHECK: atomicrmw.end:
1362+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
1363+
; CHECK-NEXT: ret i16 [[EXTRACTED]]
1364+
;
1365+
%res = atomicrmw add ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst, align 4
1366+
ret i16 %res
1367+
}
1368+
12651369
!0 = !{}
12661370
!1 = !{!"foo", !"bar"}
12671371
!2 = !{!3}

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,3 +1608,107 @@ define i8 @test_atomicrmw_dec_i8_flat_agent_align4(ptr %ptr, i8 %value) {
16081608
%res = atomicrmw udec_wrap ptr %ptr, i8 %value syncscope("agent") seq_cst, align 4
16091609
ret i8 %res
16101610
}
1611+
1612+
define i8 @test_atomicrmw_xchg_i8_buffer_fat_agent(ptr addrspace(7) %ptr, i8 %value) {
1613+
; CHECK-LABEL: @test_atomicrmw_xchg_i8_buffer_fat_agent(
1614+
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) [[PTR:%.*]], i32 -4)
1615+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(7) [[PTR]] to i32
1616+
; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1617+
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1618+
; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[TMP2]]
1619+
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1620+
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
1621+
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
1622+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[ALIGNEDADDR]], align 4
1623+
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1624+
; CHECK: atomicrmw.start:
1625+
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1626+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1627+
; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]]
1628+
; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg ptr addrspace(7) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] syncscope("agent") seq_cst seq_cst, align 4
1629+
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
1630+
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
1631+
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1632+
; CHECK: atomicrmw.end:
1633+
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1634+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1635+
; CHECK-NEXT: ret i8 [[EXTRACTED]]
1636+
;
1637+
%res = atomicrmw xchg ptr addrspace(7) %ptr, i8 %value syncscope("agent") seq_cst
1638+
ret i8 %res
1639+
}
1640+
1641+
define i8 @test_atomicrmw_xchg_i8_buffer_fat_agent_align4(ptr addrspace(7) %ptr, i8 %value) {
1642+
; CHECK-LABEL: @test_atomicrmw_xchg_i8_buffer_fat_agent_align4(
1643+
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[VALUE:%.*]] to i32
1644+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[PTR:%.*]], align 4
1645+
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1646+
; CHECK: atomicrmw.start:
1647+
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1648+
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[LOADED]], -256
1649+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP1]]
1650+
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(7) [[PTR]], i32 [[LOADED]], i32 [[TMP4]] syncscope("agent") seq_cst seq_cst, align 4
1651+
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
1652+
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
1653+
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1654+
; CHECK: atomicrmw.end:
1655+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i8
1656+
; CHECK-NEXT: ret i8 [[EXTRACTED]]
1657+
;
1658+
%res = atomicrmw xchg ptr addrspace(7) %ptr, i8 %value syncscope("agent") seq_cst, align 4
1659+
ret i8 %res
1660+
}
1661+
1662+
define i8 @test_atomicrmw_add_i8_buffer_fat_agent(ptr addrspace(7) %ptr, i8 %value) {
1663+
; CHECK-LABEL: @test_atomicrmw_add_i8_buffer_fat_agent(
1664+
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) [[PTR:%.*]], i32 -4)
1665+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(7) [[PTR]] to i32
1666+
; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1667+
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1668+
; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[TMP2]]
1669+
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1670+
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
1671+
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
1672+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[ALIGNEDADDR]], align 4
1673+
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1674+
; CHECK: atomicrmw.start:
1675+
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1676+
; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
1677+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
1678+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1679+
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
1680+
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(7) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
1681+
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1682+
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1683+
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1684+
; CHECK: atomicrmw.end:
1685+
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1686+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1687+
; CHECK-NEXT: ret i8 [[EXTRACTED]]
1688+
;
1689+
%res = atomicrmw add ptr addrspace(7) %ptr, i8 %value syncscope("agent") seq_cst
1690+
ret i8 %res
1691+
}
1692+
1693+
define i8 @test_atomicrmw_add_i8_buffer_fat_agent_align4(ptr addrspace(7) %ptr, i8 %value) {
1694+
; CHECK-LABEL: @test_atomicrmw_add_i8_buffer_fat_agent_align4(
1695+
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[VALUE:%.*]] to i32
1696+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[PTR:%.*]], align 4
1697+
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
1698+
; CHECK: atomicrmw.start:
1699+
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1700+
; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]]
1701+
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[NEW]], 255
1702+
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -256
1703+
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
1704+
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(7) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
1705+
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1706+
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1707+
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1708+
; CHECK: atomicrmw.end:
1709+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i8
1710+
; CHECK-NEXT: ret i8 [[EXTRACTED]]
1711+
;
1712+
%res = atomicrmw add ptr addrspace(7) %ptr, i8 %value syncscope("agent") seq_cst, align 4
1713+
ret i8 %res
1714+
}

0 commit comments

Comments
 (0)