diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 4bd36e9eacbc6..8d0feccf7541f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -280,6 +280,10 @@ def HasSMEF16F16orSMEF8F16 def HasNEONandIsStreamingSafe : Predicate<"Subtarget->hasNEON()">, AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">; +// A subset of NEON instructions are legal in Streaming SVE mode only with +sme2p2. +def HasNEONandIsSME2p2StreamingSafe + : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">, + AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, @@ -6191,8 +6195,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), // Some float -> int -> float conversion patterns for which we want to keep the // int values in FP registers using the corresponding NEON instructions to // avoid more costly int <-> fp register transfers. -// TODO: Allow these in streaming[-compatible] functions with +sme2p2. -let Predicates = [HasNEON] in { +let Predicates = [HasNEONandIsSME2p2StreamingSafe] in { def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), @@ -6202,8 +6205,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; -// TODO: Allow these in streaming[-compatible] functions with +sme2p2. -let Predicates = [HasNEON, HasFullFP16] in { +let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in { def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), @@ -6226,8 +6228,7 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))), // fp16: integer extraction from vector must be at least 32-bits to be legal. // Actual extraction result is then an in-reg sign-extension of lower 16-bits. -// TODO: Allow these in streaming[-compatible] functions with +sme2p2. -let Predicates = [HasNEON, HasFullFP16] in { +let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in { def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract (v8i16 FPR128:$Rn), (i64 0))), i16)))), (SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>; diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll index 9aadf3133ba19..f402463de7be8 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -force-streaming-compatible < %s | FileCheck %s -; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING +; RUN: llc -force-streaming-compatible -mattr=+sme2p2 < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS +; RUN: llc < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS target triple = "aarch64-unknown-linux-gnu" @@ -11,11 +12,11 @@ define double @t1(double %x) { ; CHECK-NEXT: scvtf d0, x8 ; CHECK-NEXT: ret ; -; NON-STREAMING-LABEL: t1: -; NON-STREAMING: // %bb.0: // %entry -; NON-STREAMING-NEXT: fcvtzs d0, d0 -; NON-STREAMING-NEXT: scvtf d0, d0 -; NON-STREAMING-NEXT: ret +; USE-NEON-NO-GPRS-LABEL: t1: +; USE-NEON-NO-GPRS: // %bb.0: // %entry +; USE-NEON-NO-GPRS-NEXT: fcvtzs d0, d0 +; USE-NEON-NO-GPRS-NEXT: scvtf d0, d0 +; USE-NEON-NO-GPRS-NEXT: ret entry: %conv = fptosi double %x to i64 %conv1 = sitofp i64 %conv to double @@ -29,11 +30,11 @@ define float @t2(float %x) { ; CHECK-NEXT: scvtf s0, w8 ; CHECK-NEXT: ret ; -; NON-STREAMING-LABEL: t2: -; NON-STREAMING: // %bb.0: // %entry -; NON-STREAMING-NEXT: fcvtzs s0, s0 -; NON-STREAMING-NEXT: scvtf s0, s0 -; NON-STREAMING-NEXT: ret +; USE-NEON-NO-GPRS-LABEL: t2: +; USE-NEON-NO-GPRS: // %bb.0: // %entry +; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0 +; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0 +; USE-NEON-NO-GPRS-NEXT: ret entry: %conv = fptosi float %x to i32 %conv1 = sitofp i32 %conv to float @@ -49,13 +50,13 @@ define half @t3(half %x) { ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: ret ; -; NON-STREAMING-LABEL: t3: -; NON-STREAMING: // %bb.0: // %entry -; NON-STREAMING-NEXT: fcvt s0, h0 -; NON-STREAMING-NEXT: fcvtzs s0, s0 -; NON-STREAMING-NEXT: scvtf s0, s0 -; NON-STREAMING-NEXT: fcvt h0, s0 -; NON-STREAMING-NEXT: ret +; USE-NEON-NO-GPRS-LABEL: t3: +; USE-NEON-NO-GPRS: // %bb.0: // %entry +; USE-NEON-NO-GPRS-NEXT: fcvt s0, h0 +; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0 +; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0 +; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0 +; USE-NEON-NO-GPRS-NEXT: ret entry: %conv = fptosi half %x to i32 %conv1 = sitofp i32 %conv to half @@ -69,11 +70,11 @@ define double @t4(double %x) { ; CHECK-NEXT: ucvtf d0, x8 ; CHECK-NEXT: ret ; -; NON-STREAMING-LABEL: t4: -; NON-STREAMING: // %bb.0: // %entry -; NON-STREAMING-NEXT: fcvtzu d0, d0 -; NON-STREAMING-NEXT: ucvtf d0, d0 -; NON-STREAMING-NEXT: ret +; USE-NEON-NO-GPRS-LABEL: t4: +; USE-NEON-NO-GPRS: // %bb.0: // %entry +; USE-NEON-NO-GPRS-NEXT: fcvtzu d0, d0 +; USE-NEON-NO-GPRS-NEXT: ucvtf d0, d0 +; USE-NEON-NO-GPRS-NEXT: ret entry: %conv = fptoui double %x to i64 %conv1 = uitofp i64 %conv to double @@ -87,11 +88,11 @@ define float @t5(float %x) { ; CHECK-NEXT: ucvtf s0, w8 ; CHECK-NEXT: ret ; -; NON-STREAMING-LABEL: t5: -; NON-STREAMING: // %bb.0: // %entry -; NON-STREAMING-NEXT: fcvtzu s0, s0 -; NON-STREAMING-NEXT: ucvtf s0, s0 -; NON-STREAMING-NEXT: ret +; USE-NEON-NO-GPRS-LABEL: t5: +; USE-NEON-NO-GPRS: // %bb.0: // %entry +; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0 +; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0 +; USE-NEON-NO-GPRS-NEXT: ret entry: %conv = fptoui float %x to i32 %conv1 = uitofp i32 %conv to float @@ -107,13 +108,13 @@ define half @t6(half %x) { ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: ret ; -; NON-STREAMING-LABEL: t6: -; NON-STREAMING: // %bb.0: // %entry -; NON-STREAMING-NEXT: fcvt s0, h0 -; NON-STREAMING-NEXT: fcvtzu s0, s0 -; NON-STREAMING-NEXT: ucvtf s0, s0 -; NON-STREAMING-NEXT: fcvt h0, s0 -; NON-STREAMING-NEXT: ret +; USE-NEON-NO-GPRS-LABEL: t6: +; USE-NEON-NO-GPRS: // %bb.0: // %entry +; USE-NEON-NO-GPRS-NEXT: fcvt s0, h0 +; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0 +; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0 +; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0 +; USE-NEON-NO-GPRS-NEXT: ret entry: %conv = fptoui half %x to i32 %conv1 = uitofp i32 %conv to half