@@ -65,6 +65,160 @@ define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i32(ptr addrspace(3)
65
65
ret ptr addrspace (3 ) %masked
66
66
}
67
67
68
+ define ptr addrspace (7 ) @v_ptrmask_buffer_fat_ptr_variable_i32 (ptr addrspace (7 ) %ptr , i32 %mask ) {
69
+ ; GCN-LABEL: v_ptrmask_buffer_fat_ptr_variable_i32:
70
+ ; GCN: ; %bb.0:
71
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72
+ ; GCN-NEXT: v_and_b32_e32 v4, v4, v5
73
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
74
+ ;
75
+ ; GFX10PLUS-LABEL: v_ptrmask_buffer_fat_ptr_variable_i32:
76
+ ; GFX10PLUS: ; %bb.0:
77
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v5
79
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
80
+ %masked = call ptr addrspace (7 ) @llvm.ptrmask.p7.i32 (ptr addrspace (7 ) %ptr , i32 %mask )
81
+ ret ptr addrspace (7 ) %masked
82
+ }
83
+
84
+ define ptr addrspace (7 ) @v_ptrmask_buffer_fat_ptr_i32_neg8 (ptr addrspace (7 ) %ptr ) {
85
+ ; GCN-LABEL: v_ptrmask_buffer_fat_ptr_i32_neg8:
86
+ ; GCN: ; %bb.0:
87
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88
+ ; GCN-NEXT: v_and_b32_e32 v4, -8, v4
89
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
90
+ ;
91
+ ; GFX10PLUS-LABEL: v_ptrmask_buffer_fat_ptr_i32_neg8:
92
+ ; GFX10PLUS: ; %bb.0:
93
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, -8, v4
95
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
96
+ %masked = call ptr addrspace (7 ) @llvm.ptrmask.p7.i32 (ptr addrspace (7 ) %ptr , i32 -8 )
97
+ ret ptr addrspace (7 ) %masked
98
+ }
99
+
100
+ define amdgpu_ps ptr addrspace (7 ) @s_ptrmask_buffer_fat_ptr_variable_i32 (ptr addrspace (7 ) inreg %ptr , i32 inreg %mask ) {
101
+ ; GCN-LABEL: s_ptrmask_buffer_fat_ptr_variable_i32:
102
+ ; GCN: ; %bb.0:
103
+ ; GCN-NEXT: s_mov_b32 s8, s4
104
+ ; GCN-NEXT: s_mov_b32 s1, s3
105
+ ; GCN-NEXT: s_mov_b32 s0, s2
106
+ ; GCN-NEXT: s_and_b32 s4, s6, s7
107
+ ; GCN-NEXT: s_mov_b32 s2, s8
108
+ ; GCN-NEXT: s_mov_b32 s3, s5
109
+ ; GCN-NEXT: ; return to shader part epilog
110
+ ;
111
+ ; GFX10PLUS-LABEL: s_ptrmask_buffer_fat_ptr_variable_i32:
112
+ ; GFX10PLUS: ; %bb.0:
113
+ ; GFX10PLUS-NEXT: s_mov_b32 s8, s4
114
+ ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
115
+ ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
116
+ ; GFX10PLUS-NEXT: s_and_b32 s4, s6, s7
117
+ ; GFX10PLUS-NEXT: s_mov_b32 s2, s8
118
+ ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
119
+ ; GFX10PLUS-NEXT: ; return to shader part epilog
120
+ %masked = call ptr addrspace (7 ) @llvm.ptrmask.p7.i32 (ptr addrspace (7 ) %ptr , i32 %mask )
121
+ ret ptr addrspace (7 ) %masked
122
+ }
123
+
124
+ define amdgpu_ps ptr addrspace (7 ) @s_ptrmask_buffer_fat_ptr_i32_neg8 (ptr addrspace (7 ) inreg %ptr ) {
125
+ ; GCN-LABEL: s_ptrmask_buffer_fat_ptr_i32_neg8:
126
+ ; GCN: ; %bb.0:
127
+ ; GCN-NEXT: s_mov_b32 s7, s4
128
+ ; GCN-NEXT: s_mov_b32 s1, s3
129
+ ; GCN-NEXT: s_mov_b32 s0, s2
130
+ ; GCN-NEXT: s_and_b32 s4, s6, -8
131
+ ; GCN-NEXT: s_mov_b32 s2, s7
132
+ ; GCN-NEXT: s_mov_b32 s3, s5
133
+ ; GCN-NEXT: ; return to shader part epilog
134
+ ;
135
+ ; GFX10PLUS-LABEL: s_ptrmask_buffer_fat_ptr_i32_neg8:
136
+ ; GFX10PLUS: ; %bb.0:
137
+ ; GFX10PLUS-NEXT: s_mov_b32 s7, s4
138
+ ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
139
+ ; GFX10PLUS-NEXT: s_mov_b32 s0, s2
140
+ ; GFX10PLUS-NEXT: s_and_b32 s4, s6, -8
141
+ ; GFX10PLUS-NEXT: s_mov_b32 s2, s7
142
+ ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
143
+ ; GFX10PLUS-NEXT: ; return to shader part epilog
144
+ %masked = call ptr addrspace (7 ) @llvm.ptrmask.p7.i32 (ptr addrspace (7 ) %ptr , i32 -8 )
145
+ ret ptr addrspace (7 ) %masked
146
+ }
147
+
148
+ define ptr addrspace (8 ) @v_ptrmask_buffer_resource_variable_i128 (ptr addrspace (8 ) %ptr , i128 %mask ) {
149
+ ; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128:
150
+ ; GCN: ; %bb.0:
151
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152
+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v5
153
+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v4
154
+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v7
155
+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v6
156
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
157
+ ;
158
+ ; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128:
159
+ ; GFX10PLUS: ; %bb.0:
160
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4
162
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5
163
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6
164
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7
165
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
166
+ %masked = call ptr addrspace (8 ) @llvm.ptrmask.p8.i128 (ptr addrspace (8 ) %ptr , i128 %mask )
167
+ ret ptr addrspace (8 ) %masked
168
+ }
169
+
170
+ define ptr addrspace (8 ) @v_ptrmask_buffer_resource_variable_i128_neg8 (ptr addrspace (8 ) %ptr ) {
171
+ ; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8:
172
+ ; GCN: ; %bb.0:
173
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174
+ ; GCN-NEXT: v_and_b32_e32 v0, -8, v0
175
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
176
+ ;
177
+ ; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8:
178
+ ; GFX10PLUS: ; %bb.0:
179
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, -8, v0
181
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
182
+ %masked = call ptr addrspace (8 ) @llvm.ptrmask.p8.i128 (ptr addrspace (8 ) %ptr , i128 -8 )
183
+ ret ptr addrspace (8 ) %masked
184
+ }
185
+
186
+ define amdgpu_ps ptr addrspace (8 ) @s_ptrmask_buffer_resource_variable_i128 (ptr addrspace (8 ) inreg %ptr , i128 inreg %mask ) {
187
+ ; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128:
188
+ ; GCN: ; %bb.0:
189
+ ; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
190
+ ; GCN-NEXT: s_and_b64 s[2:3], s[4:5], s[8:9]
191
+ ; GCN-NEXT: ; return to shader part epilog
192
+ ;
193
+ ; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128:
194
+ ; GFX10PLUS: ; %bb.0:
195
+ ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7]
196
+ ; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[4:5], s[8:9]
197
+ ; GFX10PLUS-NEXT: ; return to shader part epilog
198
+ %masked = call ptr addrspace (8 ) @llvm.ptrmask.p8.i128 (ptr addrspace (8 ) %ptr , i128 %mask )
199
+ ret ptr addrspace (8 ) %masked
200
+ }
201
+
202
+ define amdgpu_ps ptr addrspace (8 ) @s_ptrmask_buffer_resource_variable_i128_neg8 (ptr addrspace (8 ) inreg %ptr ) {
203
+ ; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8:
204
+ ; GCN: ; %bb.0:
205
+ ; GCN-NEXT: s_mov_b32 s1, s3
206
+ ; GCN-NEXT: s_and_b32 s0, s2, -8
207
+ ; GCN-NEXT: s_mov_b32 s2, s4
208
+ ; GCN-NEXT: s_mov_b32 s3, s5
209
+ ; GCN-NEXT: ; return to shader part epilog
210
+ ;
211
+ ; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8:
212
+ ; GFX10PLUS: ; %bb.0:
213
+ ; GFX10PLUS-NEXT: s_mov_b32 s1, s3
214
+ ; GFX10PLUS-NEXT: s_and_b32 s0, s2, -8
215
+ ; GFX10PLUS-NEXT: s_mov_b32 s2, s4
216
+ ; GFX10PLUS-NEXT: s_mov_b32 s3, s5
217
+ ; GFX10PLUS-NEXT: ; return to shader part epilog
218
+ %masked = call ptr addrspace (8 ) @llvm.ptrmask.p8.i128 (ptr addrspace (8 ) %ptr , i128 -8 )
219
+ ret ptr addrspace (8 ) %masked
220
+ }
221
+
68
222
declare ptr addrspace (3 ) @llvm.ptrmask.p3.i32 (ptr addrspace (3 ), i32 ) #0
69
223
declare ptr addrspace (1 ) @llvm.ptrmask.p1.i64 (ptr addrspace (1 ), i64 ) #0
70
224
0 commit comments