Skip to content

Commit 35a405b

Browse files
committed
[PowerPC] Change half to use soft promotion rather than PromoteFloat
On PowerPC targets, `half` uses the default legalization of promoting to a `f32`. However, this has some fundamental issues related to inability to round trip. Resolve this by switching to the soft legalization, which passes `f16` as an `i16`. The PowerPC ABI Specification does not define a `_Float16` type, so the calling convention changes are acceptable. Fixes the PowerPC portion of [1]. A similar change was done for MIPS in f0231b6 ("[MIPS] Use softPromoteHalf legalization for fp16 rather than PromoteFloat (#110199)") and for Loongarch in 13280d9 ("[loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791)"). [1]: #97975
1 parent 6d34ab7 commit 35a405b

File tree

10 files changed

+468
-853
lines changed

10 files changed

+468
-853
lines changed

llvm/docs/ReleaseNotes.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ Changes to the MIPS Backend
101101
Changes to the PowerPC Backend
102102
------------------------------
103103

104+
* `half` now uses a soft float ABI, which works correctly in more cases.
105+
104106
Changes to the RISC-V Backend
105107
-----------------------------
106108

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,8 @@ namespace llvm {
801801

802802
bool useSoftFloat() const override;
803803

804+
bool softPromoteHalfType() const override { return true; }
805+
804806
bool hasSPE() const;
805807

806808
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {

llvm/test/CodeGen/PowerPC/atomics.ll

Lines changed: 60 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -138,67 +138,67 @@ define void @store_i64_seq_cst(ptr %mem) {
138138
; Atomic CmpXchg
139139
define i8 @cas_strong_i8_sc_sc(ptr %mem) {
140140
; PPC32-LABEL: cas_strong_i8_sc_sc:
141-
; PPC32: # %bb.0:
141+
; PPC32: # %bb.0: # %cmpxchg.start
142142
; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29
143143
; PPC32-NEXT: lwarx r4, 0, r5
144-
; PPC32-NEXT: not r3, r3
144+
; PPC32-NEXT: not r3, r3
145145
; PPC32-NEXT: rlwinm r3, r3, 3, 27, 28
146146
; PPC32-NEXT: srw r6, r4, r3
147147
; PPC32-NEXT: andi. r6, r6, 255
148-
; PPC32-NEXT: bne cr0, .LBB8_4
149-
; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore
148+
; PPC32-NEXT: bne cr0, .LBB8_4
149+
; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore
150150
; PPC32-NEXT: li r6, 255
151151
; PPC32-NEXT: li r7, 1
152152
; PPC32-NEXT: slw r6, r6, r3
153-
; PPC32-NEXT: not r6, r6
153+
; PPC32-NEXT: not r6, r6
154154
; PPC32-NEXT: slw r7, r7, r3
155155
; PPC32-NEXT: sync
156-
; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore
157-
; PPC32-NEXT: # =>This Inner Loop Header: Depth=1
156+
; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore
157+
; PPC32-NEXT: #
158158
; PPC32-NEXT: and r8, r4, r6
159159
; PPC32-NEXT: or r8, r8, r7
160160
; PPC32-NEXT: stwcx. r8, 0, r5
161-
; PPC32-NEXT: beq cr0, .LBB8_4
162-
; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload
163-
; PPC32-NEXT: # in Loop: Header=BB8_2 Depth=1
161+
; PPC32-NEXT: beq cr0, .LBB8_4
162+
; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload
163+
; PPC32-NEXT: #
164164
; PPC32-NEXT: lwarx r4, 0, r5
165165
; PPC32-NEXT: srw r8, r4, r3
166166
; PPC32-NEXT: andi. r8, r8, 255
167-
; PPC32-NEXT: beq cr0, .LBB8_2
168-
; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore
167+
; PPC32-NEXT: beq cr0, .LBB8_2
168+
; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore
169169
; PPC32-NEXT: srw r3, r4, r3
170170
; PPC32-NEXT: lwsync
171171
; PPC32-NEXT: blr
172172
;
173173
; PPC64-LABEL: cas_strong_i8_sc_sc:
174-
; PPC64: # %bb.0:
174+
; PPC64: # %bb.0: # %cmpxchg.start
175175
; PPC64-NEXT: rldicr r5, r3, 0, 61
176-
; PPC64-NEXT: not r3, r3
176+
; PPC64-NEXT: not r3, r3
177177
; PPC64-NEXT: lwarx r4, 0, r5
178178
; PPC64-NEXT: rlwinm r3, r3, 3, 27, 28
179179
; PPC64-NEXT: srw r6, r4, r3
180180
; PPC64-NEXT: andi. r6, r6, 255
181-
; PPC64-NEXT: bne cr0, .LBB8_4
182-
; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
181+
; PPC64-NEXT: bne cr0, .LBB8_4
182+
; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
183183
; PPC64-NEXT: li r6, 255
184184
; PPC64-NEXT: li r7, 1
185185
; PPC64-NEXT: slw r6, r6, r3
186-
; PPC64-NEXT: not r6, r6
186+
; PPC64-NEXT: not r6, r6
187187
; PPC64-NEXT: slw r7, r7, r3
188188
; PPC64-NEXT: sync
189-
; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore
190-
; PPC64-NEXT: # =>This Inner Loop Header: Depth=1
189+
; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore
190+
; PPC64-NEXT: #
191191
; PPC64-NEXT: and r8, r4, r6
192192
; PPC64-NEXT: or r8, r8, r7
193193
; PPC64-NEXT: stwcx. r8, 0, r5
194-
; PPC64-NEXT: beq cr0, .LBB8_4
195-
; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload
196-
; PPC64-NEXT: # in Loop: Header=BB8_2 Depth=1
194+
; PPC64-NEXT: beq cr0, .LBB8_4
195+
; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload
196+
; PPC64-NEXT: #
197197
; PPC64-NEXT: lwarx r4, 0, r5
198198
; PPC64-NEXT: srw r8, r4, r3
199199
; PPC64-NEXT: andi. r8, r8, 255
200-
; PPC64-NEXT: beq cr0, .LBB8_2
201-
; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore
200+
; PPC64-NEXT: beq cr0, .LBB8_2
201+
; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore
202202
; PPC64-NEXT: srw r3, r4, r3
203203
; PPC64-NEXT: lwsync
204204
; PPC64-NEXT: blr
@@ -208,24 +208,24 @@ define i8 @cas_strong_i8_sc_sc(ptr %mem) {
208208
}
209209
define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
210210
; PPC32-LABEL: cas_weak_i16_acquire_acquire:
211-
; PPC32: # %bb.0:
211+
; PPC32: # %bb.0: # %cmpxchg.start
212212
; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29
213213
; PPC32-NEXT: lwarx r5, 0, r4
214-
; PPC32-NEXT: clrlwi r3, r3, 30
214+
; PPC32-NEXT: clrlwi r3, r3, 30
215215
; PPC32-NEXT: xori r3, r3, 2
216216
; PPC32-NEXT: slwi r6, r3, 3
217217
; PPC32-NEXT: srw r3, r5, r6
218218
; PPC32-NEXT: andi. r7, r3, 65535
219-
; PPC32-NEXT: beq cr0, .LBB9_2
220-
; PPC32-NEXT: # %bb.1: # %cmpxchg.failure
219+
; PPC32-NEXT: beq cr0, .LBB9_2
220+
; PPC32-NEXT: # %bb.1: # %cmpxchg.failure
221221
; PPC32-NEXT: lwsync
222222
; PPC32-NEXT: blr
223-
; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore
223+
; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore
224224
; PPC32-NEXT: lis r7, 0
225225
; PPC32-NEXT: ori r7, r7, 65535
226226
; PPC32-NEXT: slw r7, r7, r6
227227
; PPC32-NEXT: li r8, 1
228-
; PPC32-NEXT: not r7, r7
228+
; PPC32-NEXT: not r7, r7
229229
; PPC32-NEXT: slw r6, r8, r6
230230
; PPC32-NEXT: and r5, r5, r7
231231
; PPC32-NEXT: or r5, r5, r6
@@ -234,24 +234,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
234234
; PPC32-NEXT: blr
235235
;
236236
; PPC64-LABEL: cas_weak_i16_acquire_acquire:
237-
; PPC64: # %bb.0:
238-
; PPC64-NEXT: rldicr r4, r3, 0, 61
239-
; PPC64-NEXT: clrlwi r3, r3, 30
237+
; PPC64: # %bb.0: # %cmpxchg.start
238+
; PPC64-NEXT: rldicr r4, r3, 0, 61
239+
; PPC64-NEXT: clrlwi r3, r3, 30
240240
; PPC64-NEXT: lwarx r5, 0, r4
241241
; PPC64-NEXT: xori r3, r3, 2
242242
; PPC64-NEXT: slwi r6, r3, 3
243243
; PPC64-NEXT: srw r3, r5, r6
244244
; PPC64-NEXT: andi. r7, r3, 65535
245-
; PPC64-NEXT: beq cr0, .LBB9_2
246-
; PPC64-NEXT: # %bb.1: # %cmpxchg.failure
245+
; PPC64-NEXT: beq cr0, .LBB9_2
246+
; PPC64-NEXT: # %bb.1: # %cmpxchg.failure
247247
; PPC64-NEXT: lwsync
248248
; PPC64-NEXT: blr
249-
; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore
249+
; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore
250250
; PPC64-NEXT: lis r7, 0
251251
; PPC64-NEXT: ori r7, r7, 65535
252252
; PPC64-NEXT: slw r7, r7, r6
253253
; PPC64-NEXT: li r8, 1
254-
; PPC64-NEXT: not r7, r7
254+
; PPC64-NEXT: not r7, r7
255255
; PPC64-NEXT: slw r6, r8, r6
256256
; PPC64-NEXT: and r5, r5, r7
257257
; PPC64-NEXT: or r5, r5, r6
@@ -264,24 +264,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
264264
}
265265
define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) {
266266
; CHECK-LABEL: cas_strong_i32_acqrel_acquire:
267-
; CHECK: # %bb.0:
268-
; CHECK-NEXT: mr r4, r3
267+
; CHECK: # %bb.0: # %cmpxchg.start
268+
; CHECK-NEXT: mr r4, r3
269269
; CHECK-NEXT: lwarx r3, 0, r3
270-
; CHECK-NEXT: cmplwi r3, 0
271-
; CHECK-NEXT: bne cr0, .LBB10_4
272-
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
270+
; CHECK-NEXT: cmplwi r3, 0
271+
; CHECK-NEXT: bne cr0, .LBB10_4
272+
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
273273
; CHECK-NEXT: li r5, 1
274274
; CHECK-NEXT: lwsync
275-
; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore
276-
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
275+
; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore
276+
; CHECK-NEXT: #
277277
; CHECK-NEXT: stwcx. r5, 0, r4
278-
; CHECK-NEXT: beq cr0, .LBB10_4
279-
; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
280-
; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=1
278+
; CHECK-NEXT: beq cr0, .LBB10_4
279+
; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
280+
; CHECK-NEXT: #
281281
; CHECK-NEXT: lwarx r3, 0, r4
282-
; CHECK-NEXT: cmplwi r3, 0
283-
; CHECK-NEXT: beq cr0, .LBB10_2
284-
; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore
282+
; CHECK-NEXT: cmplwi r3, 0
283+
; CHECK-NEXT: beq cr0, .LBB10_2
284+
; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore
285285
; CHECK-NEXT: lwsync
286286
; CHECK-NEXT: blr
287287
%val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire
@@ -313,12 +313,12 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
313313
; PPC32-NEXT: blr
314314
;
315315
; PPC64-LABEL: cas_weak_i64_release_monotonic:
316-
; PPC64: # %bb.0:
317-
; PPC64-NEXT: mr r4, r3
316+
; PPC64: # %bb.0: # %cmpxchg.start
317+
; PPC64-NEXT: mr r4, r3
318318
; PPC64-NEXT: ldarx r3, 0, r3
319-
; PPC64-NEXT: cmpldi r3, 0
320-
; PPC64-NEXT: bnelr cr0
321-
; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
319+
; PPC64-NEXT: cmpldi r3, 0
320+
; PPC64-NEXT: bnelr cr0
321+
; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
322322
; PPC64-NEXT: li r5, 1
323323
; PPC64-NEXT: lwsync
324324
; PPC64-NEXT: stdcx. r5, 0, r4
@@ -473,39 +473,20 @@ define i64 @and_i64_release(ptr %mem, i64 %operand) {
473473
define half @load_atomic_f16__seq_cst(ptr %ptr) {
474474
; PPC32-LABEL: load_atomic_f16__seq_cst:
475475
; PPC32: # %bb.0:
476-
; PPC32-NEXT: mflr r0
477-
; PPC32-NEXT: stwu r1, -16(r1)
478-
; PPC32-NEXT: stw r0, 20(r1)
479-
; PPC32-NEXT: .cfi_def_cfa_offset 16
480-
; PPC32-NEXT: .cfi_offset lr, 4
481476
; PPC32-NEXT: sync
482477
; PPC32-NEXT: lhz r3, 0(r3)
483478
; PPC32-NEXT: cmpw cr7, r3, r3
484479
; PPC32-NEXT: bne- cr7, .+4
485480
; PPC32-NEXT: isync
486-
; PPC32-NEXT: bl __extendhfsf2
487-
; PPC32-NEXT: lwz r0, 20(r1)
488-
; PPC32-NEXT: addi r1, r1, 16
489-
; PPC32-NEXT: mtlr r0
490481
; PPC32-NEXT: blr
491482
;
492483
; PPC64-LABEL: load_atomic_f16__seq_cst:
493484
; PPC64: # %bb.0:
494-
; PPC64-NEXT: mflr r0
495-
; PPC64-NEXT: stdu r1, -112(r1)
496-
; PPC64-NEXT: std r0, 128(r1)
497-
; PPC64-NEXT: .cfi_def_cfa_offset 112
498-
; PPC64-NEXT: .cfi_offset lr, 16
499485
; PPC64-NEXT: sync
500486
; PPC64-NEXT: lhz r3, 0(r3)
501487
; PPC64-NEXT: cmpd cr7, r3, r3
502488
; PPC64-NEXT: bne- cr7, .+4
503489
; PPC64-NEXT: isync
504-
; PPC64-NEXT: bl __extendhfsf2
505-
; PPC64-NEXT: nop
506-
; PPC64-NEXT: addi r1, r1, 112
507-
; PPC64-NEXT: ld r0, 16(r1)
508-
; PPC64-NEXT: mtlr r0
509490
; PPC64-NEXT: blr
510491
%val = load atomic half, ptr %ptr seq_cst, align 2
511492
ret half %val
@@ -579,44 +560,11 @@ define double @load_atomic_f64__seq_cst(ptr %ptr) {
579560
}
580561

581562
define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) {
582-
; PPC32-LABEL: store_atomic_f16__seq_cst:
583-
; PPC32: # %bb.0:
584-
; PPC32-NEXT: mflr r0
585-
; PPC32-NEXT: stwu r1, -16(r1)
586-
; PPC32-NEXT: stw r0, 20(r1)
587-
; PPC32-NEXT: .cfi_def_cfa_offset 16
588-
; PPC32-NEXT: .cfi_offset lr, 4
589-
; PPC32-NEXT: .cfi_offset r30, -8
590-
; PPC32-NEXT: stw r30, 8(r1) # 4-byte Folded Spill
591-
; PPC32-NEXT: mr r30, r3
592-
; PPC32-NEXT: bl __truncsfhf2
593-
; PPC32-NEXT: sync
594-
; PPC32-NEXT: sth r3, 0(r30)
595-
; PPC32-NEXT: lwz r30, 8(r1) # 4-byte Folded Reload
596-
; PPC32-NEXT: lwz r0, 20(r1)
597-
; PPC32-NEXT: addi r1, r1, 16
598-
; PPC32-NEXT: mtlr r0
599-
; PPC32-NEXT: blr
600-
;
601-
; PPC64-LABEL: store_atomic_f16__seq_cst:
602-
; PPC64: # %bb.0:
603-
; PPC64-NEXT: mflr r0
604-
; PPC64-NEXT: stdu r1, -128(r1)
605-
; PPC64-NEXT: std r0, 144(r1)
606-
; PPC64-NEXT: .cfi_def_cfa_offset 128
607-
; PPC64-NEXT: .cfi_offset lr, 16
608-
; PPC64-NEXT: .cfi_offset r30, -16
609-
; PPC64-NEXT: std r30, 112(r1) # 8-byte Folded Spill
610-
; PPC64-NEXT: mr r30, r3
611-
; PPC64-NEXT: bl __truncsfhf2
612-
; PPC64-NEXT: nop
613-
; PPC64-NEXT: sync
614-
; PPC64-NEXT: sth r3, 0(r30)
615-
; PPC64-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
616-
; PPC64-NEXT: addi r1, r1, 128
617-
; PPC64-NEXT: ld r0, 16(r1)
618-
; PPC64-NEXT: mtlr r0
619-
; PPC64-NEXT: blr
563+
; CHECK-LABEL: store_atomic_f16__seq_cst:
564+
; CHECK: # %bb.0:
565+
; CHECK-NEXT: sync
566+
; CHECK-NEXT: sth r4, 0(r3)
567+
; CHECK-NEXT: blr
620568
store atomic half %val1, ptr %ptr seq_cst, align 2
621569
ret void
622570
}

llvm/test/CodeGen/PowerPC/f128-conv.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,9 +1349,6 @@ define half @trunc(fp128 %a) nounwind {
13491349
; CHECK-NEXT: std r0, 48(r1)
13501350
; CHECK-NEXT: bl __trunckfhf2
13511351
; CHECK-NEXT: nop
1352-
; CHECK-NEXT: clrlwi r3, r3, 16
1353-
; CHECK-NEXT: mtfprwz f0, r3
1354-
; CHECK-NEXT: xscvhpdp f1, f0
13551352
; CHECK-NEXT: addi r1, r1, 32
13561353
; CHECK-NEXT: ld r0, 16(r1)
13571354
; CHECK-NEXT: mtlr r0
@@ -1364,9 +1361,6 @@ define half @trunc(fp128 %a) nounwind {
13641361
; CHECK-P8-NEXT: std r0, 48(r1)
13651362
; CHECK-P8-NEXT: bl __trunckfhf2
13661363
; CHECK-P8-NEXT: nop
1367-
; CHECK-P8-NEXT: clrldi r3, r3, 48
1368-
; CHECK-P8-NEXT: bl __extendhfsf2
1369-
; CHECK-P8-NEXT: nop
13701364
; CHECK-P8-NEXT: addi r1, r1, 32
13711365
; CHECK-P8-NEXT: ld r0, 16(r1)
13721366
; CHECK-P8-NEXT: mtlr r0
@@ -1379,15 +1373,20 @@ entry:
13791373
define fp128 @ext(half %a) nounwind {
13801374
; CHECK-LABEL: ext:
13811375
; CHECK: # %bb.0: # %entry
1382-
; CHECK-NEXT: xscpsgndp v2, f1, f1
1376+
; CHECK-NEXT: clrlwi r3, r3, 16
1377+
; CHECK-NEXT: mtfprwz f0, r3
1378+
; CHECK-NEXT: xscvhpdp v2, f0
13831379
; CHECK-NEXT: xscvdpqp v2, v2
13841380
; CHECK-NEXT: blr
13851381
;
13861382
; CHECK-P8-LABEL: ext:
13871383
; CHECK-P8: # %bb.0: # %entry
13881384
; CHECK-P8-NEXT: mflr r0
13891385
; CHECK-P8-NEXT: stdu r1, -32(r1)
1386+
; CHECK-P8-NEXT: clrldi r3, r3, 48
13901387
; CHECK-P8-NEXT: std r0, 48(r1)
1388+
; CHECK-P8-NEXT: bl __extendhfsf2
1389+
; CHECK-P8-NEXT: nop
13911390
; CHECK-P8-NEXT: bl __extendsfkf2
13921391
; CHECK-P8-NEXT: nop
13931392
; CHECK-P8-NEXT: addi r1, r1, 32

0 commit comments

Comments
 (0)