Skip to content

Commit 0dc5d2e

Browse files
authored
[AMDGPU] Add gfx1250 memory lealizer tests run lines. NFC (#160586)
1 parent 83331cc commit 0dc5d2e

File tree

3 files changed

+163
-0
lines changed

3 files changed

+163
-0
lines changed

llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
33
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
4+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
45

56
define amdgpu_kernel void @private_last_use_load_0(ptr addrspace(5) %in, ptr addrspace(1) %out) {
67
; GFX12-LABEL: private_last_use_load_0:
@@ -13,6 +14,17 @@ define amdgpu_kernel void @private_last_use_load_0(ptr addrspace(5) %in, ptr add
1314
; GFX12-NEXT: s_wait_loadcnt 0x0
1415
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1516
; GFX12-NEXT: s_endpgm
17+
;
18+
; GFX1250-LABEL: private_last_use_load_0:
19+
; GFX1250: ; %bb.0: ; %entry
20+
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
21+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
22+
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
23+
; GFX1250-NEXT: s_wait_kmcnt 0x0
24+
; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_LU
25+
; GFX1250-NEXT: s_wait_loadcnt 0x0
26+
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
27+
; GFX1250-NEXT: s_endpgm
1628
entry:
1729
%val = load i32, ptr addrspace(5) %in, align 4, !amdgpu.last.use !{}
1830
store i32 %val, ptr addrspace(1) %out
@@ -36,6 +48,20 @@ define amdgpu_kernel void @private_last_use_load_1(ptr addrspace(5) %in, ptr add
3648
; GFX12-NEXT: s_wait_loadcnt 0x0
3749
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
3850
; GFX12-NEXT: s_endpgm
51+
;
52+
; GFX1250-LABEL: private_last_use_load_1:
53+
; GFX1250: ; %bb.0: ; %entry
54+
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
55+
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
56+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
57+
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
58+
; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
59+
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3
60+
; GFX1250-NEXT: s_wait_kmcnt 0x0
61+
; GFX1250-NEXT: scratch_load_b32 v1, v1, s2 scale_offset th:TH_LOAD_LU
62+
; GFX1250-NEXT: s_wait_loadcnt 0x0
63+
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
64+
; GFX1250-NEXT: s_endpgm
3965
entry:
4066
%tid = call i32 @llvm.amdgcn.workitem.id.x()
4167
%val.gep = getelementptr inbounds i32, ptr addrspace(5) %in, i32 %tid
@@ -57,6 +83,17 @@ define amdgpu_kernel void @private_last_use_and_volatile_load(ptr addrspace(5) %
5783
; GFX12-NEXT: s_wait_loadcnt 0x0
5884
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
5985
; GFX12-NEXT: s_endpgm
86+
;
87+
; GFX1250-LABEL: private_last_use_and_volatile_load:
88+
; GFX1250: ; %bb.0: ; %entry
89+
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
90+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
91+
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
92+
; GFX1250-NEXT: s_wait_kmcnt 0x0
93+
; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_BYPASS scope:SCOPE_SYS
94+
; GFX1250-NEXT: s_wait_loadcnt 0x0
95+
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
96+
; GFX1250-NEXT: s_endpgm
6097
entry:
6198
%val = load volatile i32, ptr addrspace(5) %in, align 4, !amdgpu.last.use !{}
6299
store i32 %val, ptr addrspace(1) %out
@@ -74,6 +111,17 @@ define amdgpu_kernel void @private_last_use_and_nontemporal_load(ptr addrspace(5
74111
; GFX12-NEXT: s_wait_loadcnt 0x0
75112
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
76113
; GFX12-NEXT: s_endpgm
114+
;
115+
; GFX1250-LABEL: private_last_use_and_nontemporal_load:
116+
; GFX1250: ; %bb.0: ; %entry
117+
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
118+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
119+
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
120+
; GFX1250-NEXT: s_wait_kmcnt 0x0
121+
; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_LU
122+
; GFX1250-NEXT: s_wait_loadcnt 0x0
123+
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
124+
; GFX1250-NEXT: s_endpgm
77125
entry:
78126
%val = load i32, ptr addrspace(5) %in, align 4, !amdgpu.last.use !{}, !nontemporal !0
79127
store i32 %val, ptr addrspace(1) %out

llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
1313
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
1414
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
15+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
1516

1617
define amdgpu_kernel void @private_nontemporal_load_0(
1718
; GFX6-LABEL: private_nontemporal_load_0:
@@ -201,6 +202,17 @@ define amdgpu_kernel void @private_nontemporal_load_0(
201202
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
202203
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
203204
; GFX12-CU-NEXT: s_endpgm
205+
;
206+
; GFX1250-LABEL: private_nontemporal_load_0:
207+
; GFX1250: ; %bb.0: ; %entry
208+
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
209+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
210+
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
211+
; GFX1250-NEXT: s_wait_kmcnt 0x0
212+
; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_NT
213+
; GFX1250-NEXT: s_wait_loadcnt 0x0
214+
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
215+
; GFX1250-NEXT: s_endpgm
204216
ptr addrspace(5) %in, ptr addrspace(1) %out) {
205217
entry:
206218
%val = load i32, ptr addrspace(5) %in, align 4, !nontemporal !0
@@ -450,6 +462,20 @@ define amdgpu_kernel void @private_nontemporal_load_1(
450462
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
451463
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
452464
; GFX12-CU-NEXT: s_endpgm
465+
;
466+
; GFX1250-LABEL: private_nontemporal_load_1:
467+
; GFX1250: ; %bb.0: ; %entry
468+
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
469+
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
470+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
471+
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
472+
; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
473+
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3
474+
; GFX1250-NEXT: s_wait_kmcnt 0x0
475+
; GFX1250-NEXT: scratch_load_b32 v1, v1, s2 scale_offset th:TH_LOAD_NT
476+
; GFX1250-NEXT: s_wait_loadcnt 0x0
477+
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
478+
; GFX1250-NEXT: s_endpgm
453479
ptr addrspace(5) %in, ptr addrspace(1) %out) {
454480
entry:
455481
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -627,6 +653,17 @@ define amdgpu_kernel void @private_nontemporal_store_0(
627653
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
628654
; GFX12-CU-NEXT: scratch_store_b32 off, v0, s0 th:TH_STORE_NT
629655
; GFX12-CU-NEXT: s_endpgm
656+
;
657+
; GFX1250-LABEL: private_nontemporal_store_0:
658+
; GFX1250: ; %bb.0: ; %entry
659+
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
660+
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
661+
; GFX1250-NEXT: s_wait_kmcnt 0x0
662+
; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0
663+
; GFX1250-NEXT: s_wait_kmcnt 0x0
664+
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
665+
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 th:TH_STORE_NT
666+
; GFX1250-NEXT: s_endpgm
630667
ptr addrspace(1) %in, ptr addrspace(5) %out) {
631668
entry:
632669
%val = load i32, ptr addrspace(1) %in, align 4
@@ -846,6 +883,20 @@ define amdgpu_kernel void @private_nontemporal_store_1(
846883
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
847884
; GFX12-CU-NEXT: scratch_store_b32 v1, v0, s0 th:TH_STORE_NT
848885
; GFX12-CU-NEXT: s_endpgm
886+
;
887+
; GFX1250-LABEL: private_nontemporal_store_1:
888+
; GFX1250: ; %bb.0: ; %entry
889+
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
890+
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
891+
; GFX1250-NEXT: s_wait_kmcnt 0x0
892+
; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0
893+
; GFX1250-NEXT: s_wait_xcnt 0x0
894+
; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
895+
; GFX1250-NEXT: v_and_b32_e64 v1, v0, s2
896+
; GFX1250-NEXT: s_wait_kmcnt 0x0
897+
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
898+
; GFX1250-NEXT: scratch_store_b32 v1, v0, s0 scale_offset th:TH_STORE_NT
899+
; GFX1250-NEXT: s_endpgm
849900
ptr addrspace(1) %in, ptr addrspace(5) %out) {
850901
entry:
851902
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1047,6 +1098,17 @@ define amdgpu_kernel void @private_nontemporal_volatile_load(
10471098
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
10481099
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
10491100
; GFX12-CU-NEXT: s_endpgm
1101+
;
1102+
; GFX1250-LABEL: private_nontemporal_volatile_load:
1103+
; GFX1250: ; %bb.0: ; %entry
1104+
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
1105+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
1106+
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
1107+
; GFX1250-NEXT: s_wait_kmcnt 0x0
1108+
; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_NT scope:SCOPE_SYS
1109+
; GFX1250-NEXT: s_wait_loadcnt 0x0
1110+
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
1111+
; GFX1250-NEXT: s_endpgm
10501112
ptr addrspace(5) %in, ptr addrspace(1) %out) {
10511113
entry:
10521114
%val = load volatile i32, ptr addrspace(5) %in, align 4, !nontemporal !0

llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
99
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
1010
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
11+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
1112

1213
define amdgpu_kernel void @private_volatile_load_0(
1314
; GFX6-LABEL: private_volatile_load_0:
@@ -155,6 +156,17 @@ define amdgpu_kernel void @private_volatile_load_0(
155156
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
156157
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
157158
; GFX12-CU-NEXT: s_endpgm
159+
;
160+
; GFX1250-LABEL: private_volatile_load_0:
161+
; GFX1250: ; %bb.0: ; %entry
162+
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
163+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
164+
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
165+
; GFX1250-NEXT: s_wait_kmcnt 0x0
166+
; GFX1250-NEXT: scratch_load_b32 v1, off, s2 scope:SCOPE_SYS
167+
; GFX1250-NEXT: s_wait_loadcnt 0x0
168+
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
169+
; GFX1250-NEXT: s_endpgm
158170
ptr addrspace(5) %in, ptr addrspace(1) %out) {
159171
entry:
160172
%val = load volatile i32, ptr addrspace(5) %in, align 4
@@ -340,6 +352,20 @@ define amdgpu_kernel void @private_volatile_load_1(
340352
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
341353
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
342354
; GFX12-CU-NEXT: s_endpgm
355+
;
356+
; GFX1250-LABEL: private_volatile_load_1:
357+
; GFX1250: ; %bb.0: ; %entry
358+
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
359+
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
360+
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
361+
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
362+
; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
363+
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3
364+
; GFX1250-NEXT: s_wait_kmcnt 0x0
365+
; GFX1250-NEXT: scratch_load_b32 v1, v1, s2 scale_offset scope:SCOPE_SYS
366+
; GFX1250-NEXT: s_wait_loadcnt 0x0
367+
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
368+
; GFX1250-NEXT: s_endpgm
343369
ptr addrspace(5) %in, ptr addrspace(1) %out) {
344370
entry:
345371
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -490,6 +516,18 @@ define amdgpu_kernel void @private_volatile_store_0(
490516
; GFX12-CU-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
491517
; GFX12-CU-NEXT: s_wait_storecnt 0x0
492518
; GFX12-CU-NEXT: s_endpgm
519+
;
520+
; GFX1250-LABEL: private_volatile_store_0:
521+
; GFX1250: ; %bb.0: ; %entry
522+
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
523+
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
524+
; GFX1250-NEXT: s_wait_kmcnt 0x0
525+
; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0
526+
; GFX1250-NEXT: s_wait_kmcnt 0x0
527+
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
528+
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
529+
; GFX1250-NEXT: s_wait_storecnt 0x0
530+
; GFX1250-NEXT: s_endpgm
493531
ptr addrspace(1) %in, ptr addrspace(5) %out) {
494532
entry:
495533
%val = load i32, ptr addrspace(1) %in, align 4
@@ -664,6 +702,21 @@ define amdgpu_kernel void @private_volatile_store_1(
664702
; GFX12-CU-NEXT: scratch_store_b32 v1, v0, s0 scope:SCOPE_SYS
665703
; GFX12-CU-NEXT: s_wait_storecnt 0x0
666704
; GFX12-CU-NEXT: s_endpgm
705+
;
706+
; GFX1250-LABEL: private_volatile_store_1:
707+
; GFX1250: ; %bb.0: ; %entry
708+
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
709+
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
710+
; GFX1250-NEXT: s_wait_kmcnt 0x0
711+
; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0
712+
; GFX1250-NEXT: s_wait_xcnt 0x0
713+
; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
714+
; GFX1250-NEXT: v_and_b32_e64 v1, v0, s2
715+
; GFX1250-NEXT: s_wait_kmcnt 0x0
716+
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
717+
; GFX1250-NEXT: scratch_store_b32 v1, v0, s0 scale_offset scope:SCOPE_SYS
718+
; GFX1250-NEXT: s_wait_storecnt 0x0
719+
; GFX1250-NEXT: s_endpgm
667720
ptr addrspace(1) %in, ptr addrspace(5) %out) {
668721
entry:
669722
%tid = call i32 @llvm.amdgcn.workitem.id.x()

0 commit comments

Comments
 (0)