From 713d4c66928b4d271b8915cb6cd58023e19013ec Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Mon, 14 Oct 2024 13:40:59 +0000 Subject: [PATCH 1/4] [AArch64] Avoid single-element vector fp converts in streaming[-compatible] functions The single-element vector variants of FCVTZS, FCVTZU, UCVTF, and SCVTF are only supported in streaming[-compatible] functions with `+sme2p2`. Reference: - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/FCVTZS--vector--integer---Floating-point-convert-to-signed-integer--rounding-toward-zero--vector-- - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/UCVTF--vector--integer---Unsigned-integer-convert-to-floating-point--vector-- - https://developer.arm.com/documentation/ddi0602/2024-09/SIMD-FP-Instructions/SCVTF--vector--integer---Signed-integer-convert-to-floating-point--vector-- --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 17 ++- .../sve-streaming-mode-cvt-fp-int-fp.ll | 121 ++++++++++++++++++ 2 files changed, 132 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 325508b62a9f1..bd9da10300c7f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -247,6 +247,11 @@ def HasSMEF16F16orSMEF8F16 def HasNEONandIsStreamingSafe : Predicate<"Subtarget->hasNEON()">, AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">; +// A subet of NEON instructions legal in Streaming SVE mode with +sme2p2. +// TODO: Change to check for hasSME2p2() once FEAT_SME2p2 is implemented. +def HasNEONandIsSME2p2StreamingSafe + : Predicate<"Subtarget->isNeonAvailable()">, + AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, @@ -6237,7 +6242,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), // Some float -> int -> float conversion patterns for which we want to keep the // int values in FP registers using the corresponding NEON instructions to // avoid more costly int <-> fp register transfers. -let Predicates = [HasNEONandIsStreamingSafe] in { +let Predicates = [HasNEONandIsSME2p2StreamingSafe] in { def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), @@ -6247,7 +6252,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; -let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in { +let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in { def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), @@ -6270,9 +6275,9 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))), // fp16: integer extraction from vector must be at least 32-bits to be legal. // Actual extraction result is then an in-reg sign-extension of lower 16-bits. -let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in { -def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract - (v8i16 FPR128:$Rn), (i64 0))), i16)))), +let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in { +def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract + (v8i16 FPR128:$Rn), (i64 0))), i16)))), (SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>; // unsigned 32-bit extracted element is truncated to 16-bits using AND @@ -6367,7 +6372,7 @@ def : Pat <(f64 (uint_to_fp (i32 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; // 64-bits -> double are handled in target specific dag combine: // performIntToFpCombine. -} // let Predicates = [HasNEONandIsStreamingSafe] +} // let Predicates = [HasNEONandIsSME2p2StreamingSafe] //===----------------------------------------------------------------------===// // Advanced SIMD three different-sized vector instructions. diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll new file mode 100644 index 0000000000000..9aadf3133ba19 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -force-streaming-compatible < %s | FileCheck %s +; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING + +target triple = "aarch64-unknown-linux-gnu" + +define double @t1(double %x) { +; CHECK-LABEL: t1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: scvtf d0, x8 +; CHECK-NEXT: ret +; +; NON-STREAMING-LABEL: t1: +; NON-STREAMING: // %bb.0: // %entry +; NON-STREAMING-NEXT: fcvtzs d0, d0 +; NON-STREAMING-NEXT: scvtf d0, d0 +; NON-STREAMING-NEXT: ret +entry: + %conv = fptosi double %x to i64 + %conv1 = sitofp i64 %conv to double + ret double %conv1 +} + +define float @t2(float %x) { +; CHECK-LABEL: t2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w8, s0 +; CHECK-NEXT: scvtf s0, w8 +; CHECK-NEXT: ret +; +; NON-STREAMING-LABEL: t2: +; NON-STREAMING: // %bb.0: // %entry +; NON-STREAMING-NEXT: fcvtzs s0, s0 +; NON-STREAMING-NEXT: scvtf s0, s0 +; NON-STREAMING-NEXT: ret +entry: + %conv = fptosi float %x to i32 + %conv1 = sitofp i32 %conv to float + ret float %conv1 +} + +define half @t3(half %x) { +; CHECK-LABEL: t3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvtzs w8, s0 +; CHECK-NEXT: scvtf s0, w8 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ret +; +; NON-STREAMING-LABEL: t3: +; NON-STREAMING: // %bb.0: // %entry +; NON-STREAMING-NEXT: fcvt s0, h0 +; NON-STREAMING-NEXT: fcvtzs s0, s0 +; NON-STREAMING-NEXT: scvtf s0, s0 +; NON-STREAMING-NEXT: fcvt h0, s0 +; NON-STREAMING-NEXT: ret +entry: + %conv = fptosi half %x to i32 + %conv1 = sitofp i32 %conv to half + ret half %conv1 +} + +define double @t4(double %x) { +; CHECK-LABEL: t4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu x8, d0 +; CHECK-NEXT: ucvtf d0, x8 +; CHECK-NEXT: ret +; +; NON-STREAMING-LABEL: t4: +; NON-STREAMING: // %bb.0: // %entry +; NON-STREAMING-NEXT: fcvtzu d0, d0 +; NON-STREAMING-NEXT: ucvtf d0, d0 +; NON-STREAMING-NEXT: ret +entry: + %conv = fptoui double %x to i64 + %conv1 = uitofp i64 %conv to double + ret double %conv1 +} + +define float @t5(float %x) { +; CHECK-LABEL: t5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: ucvtf s0, w8 +; CHECK-NEXT: ret +; +; NON-STREAMING-LABEL: t5: +; NON-STREAMING: // %bb.0: // %entry +; NON-STREAMING-NEXT: fcvtzu s0, s0 +; NON-STREAMING-NEXT: ucvtf s0, s0 +; NON-STREAMING-NEXT: ret +entry: + %conv = fptoui float %x to i32 + %conv1 = uitofp i32 %conv to float + ret float %conv1 +} + +define half @t6(half %x) { +; CHECK-LABEL: t6: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: ucvtf s0, w8 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ret +; +; NON-STREAMING-LABEL: t6: +; NON-STREAMING: // %bb.0: // %entry +; NON-STREAMING-NEXT: fcvt s0, h0 +; NON-STREAMING-NEXT: fcvtzu s0, s0 +; NON-STREAMING-NEXT: ucvtf s0, s0 +; NON-STREAMING-NEXT: fcvt h0, s0 +; NON-STREAMING-NEXT: ret +entry: + %conv = fptoui half %x to i32 + %conv1 = uitofp i32 %conv to half + ret half %conv1 +} From decbae4c6e0254a6249d23b0c1b67ed76fed99be Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Mon, 14 Oct 2024 14:29:50 +0000 Subject: [PATCH 2/4] Fix typo --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index bd9da10300c7f..a09b2958df18c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -247,7 +247,7 @@ def HasSMEF16F16orSMEF8F16 def HasNEONandIsStreamingSafe : Predicate<"Subtarget->hasNEON()">, AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">; -// A subet of NEON instructions legal in Streaming SVE mode with +sme2p2. +// A subset of NEON instructions legal in Streaming SVE mode with +sme2p2. // TODO: Change to check for hasSME2p2() once FEAT_SME2p2 is implemented. def HasNEONandIsSME2p2StreamingSafe : Predicate<"Subtarget->isNeonAvailable()">, From 122d5eafce8d023a3b63cde788cefe3747d417f8 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Mon, 14 Oct 2024 16:13:28 +0000 Subject: [PATCH 3/4] Fixups --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index a09b2958df18c..f1785457be5cb 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -247,11 +247,6 @@ def HasSMEF16F16orSMEF8F16 def HasNEONandIsStreamingSafe : Predicate<"Subtarget->hasNEON()">, AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">; -// A subset of NEON instructions legal in Streaming SVE mode with +sme2p2. -// TODO: Change to check for hasSME2p2() once FEAT_SME2p2 is implemented. -def HasNEONandIsSME2p2StreamingSafe - : Predicate<"Subtarget->isNeonAvailable()">, - AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, @@ -6242,7 +6237,8 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), // Some float -> int -> float conversion patterns for which we want to keep the // int values in FP registers using the corresponding NEON instructions to // avoid more costly int <-> fp register transfers. -let Predicates = [HasNEONandIsSME2p2StreamingSafe] in { +// TODO: Allow these in streaming[-compatible] functions with +sme2p2. +let Predicates = [HasNEON] in { def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), @@ -6252,7 +6248,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; -let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in { +let Predicates = [HasNEON, HasFullFP16] in { def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), @@ -6275,7 +6271,7 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))), // fp16: integer extraction from vector must be at least 32-bits to be legal. // Actual extraction result is then an in-reg sign-extension of lower 16-bits. -let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in { +let Predicates = [HasNEON, HasFullFP16] in { def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract (v8i16 FPR128:$Rn), (i64 0))), i16)))), (SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>; @@ -6372,7 +6368,7 @@ def : Pat <(f64 (uint_to_fp (i32 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; // 64-bits -> double are handled in target specific dag combine: // performIntToFpCombine. -} // let Predicates = [HasNEONandIsSME2p2StreamingSafe] +} // let Predicates = [HasNEON] //===----------------------------------------------------------------------===// // Advanced SIMD three different-sized vector instructions. From 80227aebfbe5acb692bdbca6ece386c3112296c5 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Tue, 15 Oct 2024 08:50:48 +0000 Subject: [PATCH 4/4] Add TODOs & test updates --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 + ...e-streaming-mode-fixed-length-int-to-fp.ll | 426 +++++++++--------- 2 files changed, 205 insertions(+), 223 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f1785457be5cb..32f2c7c71d175 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6248,6 +6248,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; +// TODO: Allow these in streaming[-compatible] functions with +sme2p2. let Predicates = [HasNEON, HasFullFP16] in { def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; @@ -6271,6 +6272,7 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))), // fp16: integer extraction from vector must be at least 32-bits to be legal. // Actual extraction result is then an in-reg sign-extension of lower 16-bits. +// TODO: Allow these in streaming[-compatible] functions with +sme2p2. let Predicates = [HasNEON, HasFullFP16] in { def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract (v8i16 FPR128:$Rn), (i64 0))), i16)))), diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll index afd3bb7161c15..0c712a15d4de2 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -21,20 +21,20 @@ define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) { ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #14] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #12] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #10] -; NONEON-NOSVE-NEXT: ldr h0, [sp] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #8] ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] @@ -58,36 +58,36 @@ define void @ucvtf_v8i16_v8f16(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: ldr q0, [x0] ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]! ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #30] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #28] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #26] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #24] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #22] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #20] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #18] -; NONEON-NOSVE-NEXT: ldr h0, [sp] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #16] ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] @@ -115,68 +115,68 @@ define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]! ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #62] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #60] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #58] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #56] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #54] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #52] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #50] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #48] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #46] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #44] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #42] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #40] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #38] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #36] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp] ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #34] -; NONEON-NOSVE-NEXT: ldr h0, [sp] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: str h0, [sp, #32] ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] @@ -207,11 +207,11 @@ define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) { ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 -; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] +; NONEON-NOSVE-NEXT: ldrh w9, [sp] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ucvtf s1, w9 +; NONEON-NOSVE-NEXT: stp s1, s0, [sp, #8] ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] ; NONEON-NOSVE-NEXT: add sp, sp, #16 ; NONEON-NOSVE-NEXT: ret @@ -234,15 +234,15 @@ define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) { ; NONEON-NOSVE-NEXT: sub sp, sp, #32 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16] ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] ; NONEON-NOSVE-NEXT: add sp, sp, #32 @@ -271,25 +271,25 @@ define void @ucvtf_v8i16_v8f32(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #22] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #20] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #18] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #16] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32] ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] @@ -328,47 +328,47 @@ define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #46] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #44] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #42] ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #88] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #38] ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #80] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #36] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #34] ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #72] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #32] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62] ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #64] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60] ; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58] ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #120] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54] ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #112] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50] ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #104] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] -; NONEON-NOSVE-NEXT: ucvtf s1, s0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ucvtf s1, w8 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #96] ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] @@ -399,8 +399,8 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) { ; NONEON-NOSVE-NEXT: sub sp, sp, #16 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 ; NONEON-NOSVE-NEXT: str d0, [sp] ; NONEON-NOSVE-NEXT: ldr d0, [sp], #16 ; NONEON-NOSVE-NEXT: ret @@ -424,11 +424,11 @@ define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) { ; NONEON-NOSVE-NEXT: sub sp, sp, #32 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 -; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] +; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] +; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #16] ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] ; NONEON-NOSVE-NEXT: add sp, sp, #32 ; NONEON-NOSVE-NEXT: ret @@ -464,15 +464,13 @@ define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] ; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #16] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #48] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] @@ -529,27 +527,23 @@ define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] ; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #92] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #88] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #80] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #84] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #80] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #72] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #76] +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #64] ; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #128] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #72] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #68] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #64] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] @@ -649,49 +643,42 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] ; NONEON-NOSVE-NEXT: str d1, [sp, #328] ; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #104] -; NONEON-NOSVE-NEXT: str d0, [sp, #168] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #164] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #160] ; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #176] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #160] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: str d0, [sp, #168] +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #152] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #156] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #152] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #144] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #148] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #144] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #136] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #140] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #136] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #192] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #332] +; NONEON-NOSVE-NEXT: ucvtf d1, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #328] ; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #192] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #328] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #184] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #304] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #188] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #184] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #176] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #288] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #180] +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #168] ; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #288] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #176] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #272] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #172] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #168] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #256] ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #224] ; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #256] @@ -1041,10 +1028,9 @@ define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) { ; NONEON-NOSVE-NEXT: sub sp, sp, #32 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 ; NONEON-NOSVE-NEXT: str d0, [sp, #8] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16] ; NONEON-NOSVE-NEXT: add sp, sp, #32 @@ -1073,15 +1059,13 @@ define void @ucvtf_v4i32_v4f64(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #28] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #24] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #20] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #16] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] @@ -1120,27 +1104,23 @@ define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) { ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #60] +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48] ; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #56] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] -; NONEON-NOSVE-NEXT: ldr s0, [sp, #52] -; NONEON-NOSVE-NEXT: ucvtf d1, d0 -; NONEON-NOSVE-NEXT: ldr s0, [sp, #48] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ucvtf d1, w9 +; NONEON-NOSVE-NEXT: ucvtf d0, w8 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] ; NONEON-NOSVE-NEXT: stp q2, q3, [x1] @@ -2984,8 +2964,8 @@ define half @ucvtf_i16_f16(ptr %0) { ; ; NONEON-NOSVE-LABEL: ucvtf_i16_f16: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: ldr h0, [x0] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ldrh w8, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 ; NONEON-NOSVE-NEXT: fcvt h0, s0 ; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 @@ -2996,14 +2976,14 @@ define half @ucvtf_i16_f16(ptr %0) { define float @ucvtf_i16_f32(ptr %0) { ; CHECK-LABEL: ucvtf_i16_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ucvtf s0, s0 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ucvtf s0, w8 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ucvtf_i16_f32: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: ldr h0, [x0] -; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ldrh w8, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 ; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = uitofp i16 %2 to float @@ -3013,14 +2993,14 @@ define float @ucvtf_i16_f32(ptr %0) { define double @ucvtf_i16_f64(ptr %0) { ; CHECK-LABEL: ucvtf_i16_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ucvtf d0, d0 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ucvtf d0, w8 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ucvtf_i16_f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: ldr h0, [x0] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ldrh w8, [x0] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 ; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = uitofp i16 %2 to double @@ -3065,14 +3045,14 @@ define float @ucvtf_i32_f32(ptr %0) { define double @ucvtf_i32_f64(ptr %0) { ; CHECK-LABEL: ucvtf_i32_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ucvtf d0, d0 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ucvtf d0, w8 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ucvtf_i32_f64: ; NONEON-NOSVE: // %bb.0: -; NONEON-NOSVE-NEXT: ldr s0, [x0] -; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 ; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = uitofp i32 %2 to double