@@ -138,67 +138,67 @@ define void @store_i64_seq_cst(ptr %mem) {
138
138
; Atomic CmpXchg
139
139
define i8 @cas_strong_i8_sc_sc (ptr %mem ) {
140
140
; PPC32-LABEL: cas_strong_i8_sc_sc:
141
- ; PPC32: # %bb.0:
141
+ ; PPC32: # %bb.0: # %cmpxchg.start
142
142
; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29
143
143
; PPC32-NEXT: lwarx r4, 0, r5
144
- ; PPC32-NEXT: not r3, r3
144
+ ; PPC32-NEXT: not r3, r3
145
145
; PPC32-NEXT: rlwinm r3, r3, 3, 27, 28
146
146
; PPC32-NEXT: srw r6, r4, r3
147
147
; PPC32-NEXT: andi. r6, r6, 255
148
- ; PPC32-NEXT: bne cr0, .LBB8_4
149
- ; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore
148
+ ; PPC32-NEXT: bne cr0, .LBB8_4
149
+ ; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore
150
150
; PPC32-NEXT: li r6, 255
151
151
; PPC32-NEXT: li r7, 1
152
152
; PPC32-NEXT: slw r6, r6, r3
153
- ; PPC32-NEXT: not r6, r6
153
+ ; PPC32-NEXT: not r6, r6
154
154
; PPC32-NEXT: slw r7, r7, r3
155
155
; PPC32-NEXT: sync
156
- ; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore
157
- ; PPC32-NEXT: # =>This Inner Loop Header: Depth=1
156
+ ; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore
157
+ ; PPC32-NEXT: #
158
158
; PPC32-NEXT: and r8, r4, r6
159
159
; PPC32-NEXT: or r8, r8, r7
160
160
; PPC32-NEXT: stwcx. r8, 0, r5
161
- ; PPC32-NEXT: beq cr0, .LBB8_4
162
- ; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload
163
- ; PPC32-NEXT: # in Loop: Header=BB8_2 Depth=1
161
+ ; PPC32-NEXT: beq cr0, .LBB8_4
162
+ ; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload
163
+ ; PPC32-NEXT: #
164
164
; PPC32-NEXT: lwarx r4, 0, r5
165
165
; PPC32-NEXT: srw r8, r4, r3
166
166
; PPC32-NEXT: andi. r8, r8, 255
167
- ; PPC32-NEXT: beq cr0, .LBB8_2
168
- ; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore
167
+ ; PPC32-NEXT: beq cr0, .LBB8_2
168
+ ; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore
169
169
; PPC32-NEXT: srw r3, r4, r3
170
170
; PPC32-NEXT: lwsync
171
171
; PPC32-NEXT: blr
172
172
;
173
173
; PPC64-LABEL: cas_strong_i8_sc_sc:
174
- ; PPC64: # %bb.0:
174
+ ; PPC64: # %bb.0: # %cmpxchg.start
175
175
; PPC64-NEXT: rldicr r5, r3, 0, 61
176
- ; PPC64-NEXT: not r3, r3
176
+ ; PPC64-NEXT: not r3, r3
177
177
; PPC64-NEXT: lwarx r4, 0, r5
178
178
; PPC64-NEXT: rlwinm r3, r3, 3, 27, 28
179
179
; PPC64-NEXT: srw r6, r4, r3
180
180
; PPC64-NEXT: andi. r6, r6, 255
181
- ; PPC64-NEXT: bne cr0, .LBB8_4
182
- ; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
181
+ ; PPC64-NEXT: bne cr0, .LBB8_4
182
+ ; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
183
183
; PPC64-NEXT: li r6, 255
184
184
; PPC64-NEXT: li r7, 1
185
185
; PPC64-NEXT: slw r6, r6, r3
186
- ; PPC64-NEXT: not r6, r6
186
+ ; PPC64-NEXT: not r6, r6
187
187
; PPC64-NEXT: slw r7, r7, r3
188
188
; PPC64-NEXT: sync
189
- ; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore
190
- ; PPC64-NEXT: # =>This Inner Loop Header: Depth=1
189
+ ; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore
190
+ ; PPC64-NEXT: #
191
191
; PPC64-NEXT: and r8, r4, r6
192
192
; PPC64-NEXT: or r8, r8, r7
193
193
; PPC64-NEXT: stwcx. r8, 0, r5
194
- ; PPC64-NEXT: beq cr0, .LBB8_4
195
- ; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload
196
- ; PPC64-NEXT: # in Loop: Header=BB8_2 Depth=1
194
+ ; PPC64-NEXT: beq cr0, .LBB8_4
195
+ ; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload
196
+ ; PPC64-NEXT: #
197
197
; PPC64-NEXT: lwarx r4, 0, r5
198
198
; PPC64-NEXT: srw r8, r4, r3
199
199
; PPC64-NEXT: andi. r8, r8, 255
200
- ; PPC64-NEXT: beq cr0, .LBB8_2
201
- ; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore
200
+ ; PPC64-NEXT: beq cr0, .LBB8_2
201
+ ; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore
202
202
; PPC64-NEXT: srw r3, r4, r3
203
203
; PPC64-NEXT: lwsync
204
204
; PPC64-NEXT: blr
@@ -208,24 +208,24 @@ define i8 @cas_strong_i8_sc_sc(ptr %mem) {
208
208
}
209
209
define i16 @cas_weak_i16_acquire_acquire (ptr %mem ) {
210
210
; PPC32-LABEL: cas_weak_i16_acquire_acquire:
211
- ; PPC32: # %bb.0:
211
+ ; PPC32: # %bb.0: # %cmpxchg.start
212
212
; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29
213
213
; PPC32-NEXT: lwarx r5, 0, r4
214
- ; PPC32-NEXT: clrlwi r3, r3, 30
214
+ ; PPC32-NEXT: clrlwi r3, r3, 30
215
215
; PPC32-NEXT: xori r3, r3, 2
216
216
; PPC32-NEXT: slwi r6, r3, 3
217
217
; PPC32-NEXT: srw r3, r5, r6
218
218
; PPC32-NEXT: andi. r7, r3, 65535
219
- ; PPC32-NEXT: beq cr0, .LBB9_2
220
- ; PPC32-NEXT: # %bb.1: # %cmpxchg.failure
219
+ ; PPC32-NEXT: beq cr0, .LBB9_2
220
+ ; PPC32-NEXT: # %bb.1: # %cmpxchg.failure
221
221
; PPC32-NEXT: lwsync
222
222
; PPC32-NEXT: blr
223
- ; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore
223
+ ; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore
224
224
; PPC32-NEXT: lis r7, 0
225
225
; PPC32-NEXT: ori r7, r7, 65535
226
226
; PPC32-NEXT: slw r7, r7, r6
227
227
; PPC32-NEXT: li r8, 1
228
- ; PPC32-NEXT: not r7, r7
228
+ ; PPC32-NEXT: not r7, r7
229
229
; PPC32-NEXT: slw r6, r8, r6
230
230
; PPC32-NEXT: and r5, r5, r7
231
231
; PPC32-NEXT: or r5, r5, r6
@@ -234,24 +234,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
234
234
; PPC32-NEXT: blr
235
235
;
236
236
; PPC64-LABEL: cas_weak_i16_acquire_acquire:
237
- ; PPC64: # %bb.0:
238
- ; PPC64-NEXT: rldicr r4, r3, 0, 61
239
- ; PPC64-NEXT: clrlwi r3, r3, 30
237
+ ; PPC64: # %bb.0: # %cmpxchg.start
238
+ ; PPC64-NEXT: rldicr r4, r3, 0, 61
239
+ ; PPC64-NEXT: clrlwi r3, r3, 30
240
240
; PPC64-NEXT: lwarx r5, 0, r4
241
241
; PPC64-NEXT: xori r3, r3, 2
242
242
; PPC64-NEXT: slwi r6, r3, 3
243
243
; PPC64-NEXT: srw r3, r5, r6
244
244
; PPC64-NEXT: andi. r7, r3, 65535
245
- ; PPC64-NEXT: beq cr0, .LBB9_2
246
- ; PPC64-NEXT: # %bb.1: # %cmpxchg.failure
245
+ ; PPC64-NEXT: beq cr0, .LBB9_2
246
+ ; PPC64-NEXT: # %bb.1: # %cmpxchg.failure
247
247
; PPC64-NEXT: lwsync
248
248
; PPC64-NEXT: blr
249
- ; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore
249
+ ; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore
250
250
; PPC64-NEXT: lis r7, 0
251
251
; PPC64-NEXT: ori r7, r7, 65535
252
252
; PPC64-NEXT: slw r7, r7, r6
253
253
; PPC64-NEXT: li r8, 1
254
- ; PPC64-NEXT: not r7, r7
254
+ ; PPC64-NEXT: not r7, r7
255
255
; PPC64-NEXT: slw r6, r8, r6
256
256
; PPC64-NEXT: and r5, r5, r7
257
257
; PPC64-NEXT: or r5, r5, r6
@@ -264,24 +264,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
264
264
}
265
265
define i32 @cas_strong_i32_acqrel_acquire (ptr %mem ) {
266
266
; CHECK-LABEL: cas_strong_i32_acqrel_acquire:
267
- ; CHECK: # %bb.0:
268
- ; CHECK-NEXT: mr r4, r3
267
+ ; CHECK: # %bb.0: # %cmpxchg.start
268
+ ; CHECK-NEXT: mr r4, r3
269
269
; CHECK-NEXT: lwarx r3, 0, r3
270
- ; CHECK-NEXT: cmplwi r3, 0
271
- ; CHECK-NEXT: bne cr0, .LBB10_4
272
- ; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
270
+ ; CHECK-NEXT: cmplwi r3, 0
271
+ ; CHECK-NEXT: bne cr0, .LBB10_4
272
+ ; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
273
273
; CHECK-NEXT: li r5, 1
274
274
; CHECK-NEXT: lwsync
275
- ; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore
276
- ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
275
+ ; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore
276
+ ; CHECK-NEXT: #
277
277
; CHECK-NEXT: stwcx. r5, 0, r4
278
- ; CHECK-NEXT: beq cr0, .LBB10_4
279
- ; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
280
- ; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=1
278
+ ; CHECK-NEXT: beq cr0, .LBB10_4
279
+ ; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
280
+ ; CHECK-NEXT: #
281
281
; CHECK-NEXT: lwarx r3, 0, r4
282
- ; CHECK-NEXT: cmplwi r3, 0
283
- ; CHECK-NEXT: beq cr0, .LBB10_2
284
- ; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore
282
+ ; CHECK-NEXT: cmplwi r3, 0
283
+ ; CHECK-NEXT: beq cr0, .LBB10_2
284
+ ; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore
285
285
; CHECK-NEXT: lwsync
286
286
; CHECK-NEXT: blr
287
287
%val = cmpxchg ptr %mem , i32 0 , i32 1 acq_rel acquire
@@ -313,12 +313,12 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
313
313
; PPC32-NEXT: blr
314
314
;
315
315
; PPC64-LABEL: cas_weak_i64_release_monotonic:
316
- ; PPC64: # %bb.0:
317
- ; PPC64-NEXT: mr r4, r3
316
+ ; PPC64: # %bb.0: # %cmpxchg.start
317
+ ; PPC64-NEXT: mr r4, r3
318
318
; PPC64-NEXT: ldarx r3, 0, r3
319
- ; PPC64-NEXT: cmpldi r3, 0
320
- ; PPC64-NEXT: bnelr cr0
321
- ; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
319
+ ; PPC64-NEXT: cmpldi r3, 0
320
+ ; PPC64-NEXT: bnelr cr0
321
+ ; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
322
322
; PPC64-NEXT: li r5, 1
323
323
; PPC64-NEXT: lwsync
324
324
; PPC64-NEXT: stdcx. r5, 0, r4
@@ -473,39 +473,20 @@ define i64 @and_i64_release(ptr %mem, i64 %operand) {
473
473
define half @load_atomic_f16__seq_cst (ptr %ptr ) {
474
474
; PPC32-LABEL: load_atomic_f16__seq_cst:
475
475
; PPC32: # %bb.0:
476
- ; PPC32-NEXT: mflr r0
477
- ; PPC32-NEXT: stwu r1, -16(r1)
478
- ; PPC32-NEXT: stw r0, 20(r1)
479
- ; PPC32-NEXT: .cfi_def_cfa_offset 16
480
- ; PPC32-NEXT: .cfi_offset lr, 4
481
476
; PPC32-NEXT: sync
482
477
; PPC32-NEXT: lhz r3, 0(r3)
483
478
; PPC32-NEXT: cmpw cr7, r3, r3
484
479
; PPC32-NEXT: bne- cr7, .+4
485
480
; PPC32-NEXT: isync
486
- ; PPC32-NEXT: bl __extendhfsf2
487
- ; PPC32-NEXT: lwz r0, 20(r1)
488
- ; PPC32-NEXT: addi r1, r1, 16
489
- ; PPC32-NEXT: mtlr r0
490
481
; PPC32-NEXT: blr
491
482
;
492
483
; PPC64-LABEL: load_atomic_f16__seq_cst:
493
484
; PPC64: # %bb.0:
494
- ; PPC64-NEXT: mflr r0
495
- ; PPC64-NEXT: stdu r1, -112(r1)
496
- ; PPC64-NEXT: std r0, 128(r1)
497
- ; PPC64-NEXT: .cfi_def_cfa_offset 112
498
- ; PPC64-NEXT: .cfi_offset lr, 16
499
485
; PPC64-NEXT: sync
500
486
; PPC64-NEXT: lhz r3, 0(r3)
501
487
; PPC64-NEXT: cmpd cr7, r3, r3
502
488
; PPC64-NEXT: bne- cr7, .+4
503
489
; PPC64-NEXT: isync
504
- ; PPC64-NEXT: bl __extendhfsf2
505
- ; PPC64-NEXT: nop
506
- ; PPC64-NEXT: addi r1, r1, 112
507
- ; PPC64-NEXT: ld r0, 16(r1)
508
- ; PPC64-NEXT: mtlr r0
509
490
; PPC64-NEXT: blr
510
491
%val = load atomic half , ptr %ptr seq_cst , align 2
511
492
ret half %val
@@ -579,44 +560,11 @@ define double @load_atomic_f64__seq_cst(ptr %ptr) {
579
560
}
580
561
581
562
define void @store_atomic_f16__seq_cst (ptr %ptr , half %val1 ) {
582
- ; PPC32-LABEL: store_atomic_f16__seq_cst:
583
- ; PPC32: # %bb.0:
584
- ; PPC32-NEXT: mflr r0
585
- ; PPC32-NEXT: stwu r1, -16(r1)
586
- ; PPC32-NEXT: stw r0, 20(r1)
587
- ; PPC32-NEXT: .cfi_def_cfa_offset 16
588
- ; PPC32-NEXT: .cfi_offset lr, 4
589
- ; PPC32-NEXT: .cfi_offset r30, -8
590
- ; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
591
- ; PPC32-NEXT: mr r30, r3
592
- ; PPC32-NEXT: bl __truncsfhf2
593
- ; PPC32-NEXT: sync
594
- ; PPC32-NEXT: sth r3, 0(r30)
595
- ; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
596
- ; PPC32-NEXT: lwz r0, 20(r1)
597
- ; PPC32-NEXT: addi r1, r1, 16
598
- ; PPC32-NEXT: mtlr r0
599
- ; PPC32-NEXT: blr
600
- ;
601
- ; PPC64-LABEL: store_atomic_f16__seq_cst:
602
- ; PPC64: # %bb.0:
603
- ; PPC64-NEXT: mflr r0
604
- ; PPC64-NEXT: stdu r1, -128(r1)
605
- ; PPC64-NEXT: std r0, 144(r1)
606
- ; PPC64-NEXT: .cfi_def_cfa_offset 128
607
- ; PPC64-NEXT: .cfi_offset lr, 16
608
- ; PPC64-NEXT: .cfi_offset r30, -16
609
- ; PPC64-NEXT: std r30, 112(r1) # 8-byte Folded Spill
610
- ; PPC64-NEXT: mr r30, r3
611
- ; PPC64-NEXT: bl __truncsfhf2
612
- ; PPC64-NEXT: nop
613
- ; PPC64-NEXT: sync
614
- ; PPC64-NEXT: sth r3, 0(r30)
615
- ; PPC64-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
616
- ; PPC64-NEXT: addi r1, r1, 128
617
- ; PPC64-NEXT: ld r0, 16(r1)
618
- ; PPC64-NEXT: mtlr r0
619
- ; PPC64-NEXT: blr
563
+ ; CHECK-LABEL: store_atomic_f16__seq_cst:
564
+ ; CHECK: # %bb.0:
565
+ ; CHECK-NEXT: sync
566
+ ; CHECK-NEXT: sth r4, 0(r3)
567
+ ; CHECK-NEXT: blr
620
568
store atomic half %val1 , ptr %ptr seq_cst , align 2
621
569
ret void
622
570
}
0 commit comments