From e63d8ff7d0bc9be9a3fee44449e174dd89ce3c5c Mon Sep 17 00:00:00 2001 From: Yu Li Date: Tue, 22 Apr 2025 08:24:40 +0000 Subject: [PATCH 1/4] [AARch64] Funnel Shift now uses rev32/rev64 instructions --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 8 +++++ llvm/test/CodeGen/AArch64/aarch64-fshl-rev.ll | 29 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/aarch64-fshl-rev.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index a060a2f597ccd..e7aa9c53a8343 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5679,6 +5679,14 @@ def : Pat<(v4i32 (bswap (v4i32 V128:$Rn))), def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))), (v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>; +// Patterns for funnel shifts to be matched to equivalent REV instructions +def : Pat<(v2i64 (or (v2i64 (AArch64vshl (v2i64 V128:$Rn), (i32 32))), (v2i64 (AArch64vlshr (v2i64 V128:$Rn), (i32 32))))), + (v2i64 (REV64v4i32 (v2i64 V128:$Rn)))>; +def : Pat<(v4i32 (or (v4i32 (AArch64vshl (v4i32 V128:$Rn), (i32 16))), (v4i32 (AArch64vlshr (v4i32 V128:$Rn), (i32 16))))), + (v4i32 (REV32v8i16 (v4i32 V128:$Rn)))>; +def : Pat<(v2i32 (or (v2i32 (AArch64vshl (v2i32 V64:$Rn), (i32 16))), (v2i32 (AArch64vlshr (v2i32 V64:$Rn), (i32 16))))), + (v2i32 (REV32v4i16 (v2i32 V64:$Rn)))>; + //===----------------------------------------------------------------------===// // Advanced SIMD three vector instructions. //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/aarch64-fshl-rev.ll b/llvm/test/CodeGen/AArch64/aarch64-fshl-rev.ll new file mode 100644 index 0000000000000..503bc307e7777 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-fshl-rev.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s + +define <2 x i64> @fhsl_to_rev2i64(<2 x i64> %r) { +; CHECK-LABEL: fhsl_to_rev2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: rev64 v0.4s, v0.4s +; CHECK-NEXT: ret + %or = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %r, <2 x i64> %r, <2 x i64> splat (i64 32)) + ret <2 x i64> %or +} + +define <4 x i32> @fshl_to_rev4i32(<4 x i32> %r) { +; CHECK-LABEL: fshl_to_rev4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: rev32 v0.8h, v0.8h +; CHECK-NEXT: ret + %or = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %r, <4 x i32> %r, <4 x i32> splat (i32 16)) + ret <4 x i32> %or +} + +define <2 x i32> @fshl_to_rev2i32(<2 x i32> %r) { +; CHECK-LABEL: fshl_to_rev2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: rev32 v0.4h, v0.4h +; CHECK-NEXT: ret + %or = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %r, <2 x i32> %r, <2 x i32> splat (i32 16)) + ret <2 x i32> %or +} From eb402d9e49bd81990db4a0bd663e45b479a9bd96 Mon Sep 17 00:00:00 2001 From: Yu Li Date: Wed, 23 Apr 2025 09:18:50 +0000 Subject: [PATCH 2/4] Minor and aesthetic changes based on PR comments --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 9 ++-- llvm/test/CodeGen/AArch64/aarch64-fshl-rev.ll | 29 ----------- llvm/test/CodeGen/AArch64/fsh.ll | 50 +++++++++++++++++++ 3 files changed, 56 insertions(+), 32 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/aarch64-fshl-rev.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index e7aa9c53a8343..b998381672fa0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5680,11 +5680,14 @@ def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))), (v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>; // Patterns for funnel shifts to be matched to equivalent REV instructions -def : Pat<(v2i64 (or (v2i64 (AArch64vshl (v2i64 V128:$Rn), (i32 32))), (v2i64 (AArch64vlshr (v2i64 V128:$Rn), (i32 32))))), +def : Pat<(v2i64 (or (v2i64 (AArch64vshl (v2i64 V128:$Rn), (i32 32))), + (v2i64 (AArch64vlshr (v2i64 V128:$Rn), (i32 32))))), (v2i64 (REV64v4i32 (v2i64 V128:$Rn)))>; -def : Pat<(v4i32 (or (v4i32 (AArch64vshl (v4i32 V128:$Rn), (i32 16))), (v4i32 (AArch64vlshr (v4i32 V128:$Rn), (i32 16))))), +def : Pat<(v4i32 (or (v4i32 (AArch64vshl (v4i32 V128:$Rn), (i32 16))), + (v4i32 (AArch64vlshr (v4i32 V128:$Rn), (i32 16))))), (v4i32 (REV32v8i16 (v4i32 V128:$Rn)))>; -def : Pat<(v2i32 (or (v2i32 (AArch64vshl (v2i32 V64:$Rn), (i32 16))), (v2i32 (AArch64vlshr (v2i32 V64:$Rn), (i32 16))))), +def : Pat<(v2i32 (or (v2i32 (AArch64vshl (v2i32 V64:$Rn), (i32 16))), + (v2i32 (AArch64vlshr (v2i32 V64:$Rn), (i32 16))))), (v2i32 (REV32v4i16 (v2i32 V64:$Rn)))>; //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/aarch64-fshl-rev.ll b/llvm/test/CodeGen/AArch64/aarch64-fshl-rev.ll deleted file mode 100644 index 503bc307e7777..0000000000000 --- a/llvm/test/CodeGen/AArch64/aarch64-fshl-rev.ll +++ /dev/null @@ -1,29 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s - -define <2 x i64> @fhsl_to_rev2i64(<2 x i64> %r) { -; CHECK-LABEL: fhsl_to_rev2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: rev64 v0.4s, v0.4s -; CHECK-NEXT: ret - %or = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %r, <2 x i64> %r, <2 x i64> splat (i64 32)) - ret <2 x i64> %or -} - -define <4 x i32> @fshl_to_rev4i32(<4 x i32> %r) { -; CHECK-LABEL: fshl_to_rev4i32: -; CHECK: // %bb.0: -; CHECK-NEXT: rev32 v0.8h, v0.8h -; CHECK-NEXT: ret - %or = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %r, <4 x i32> %r, <4 x i32> splat (i32 16)) - ret <4 x i32> %or -} - -define <2 x i32> @fshl_to_rev2i32(<2 x i32> %r) { -; CHECK-LABEL: fshl_to_rev2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: rev32 v0.4h, v0.4h -; CHECK-NEXT: ret - %or = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %r, <2 x i32> %r, <2 x i32> splat (i32 16)) - ret <2 x i32> %or -} diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll index 2cee2f2b2686c..f45ab61db1e6d 100644 --- a/llvm/test/CodeGen/AArch64/fsh.ll +++ b/llvm/test/CodeGen/AArch64/fsh.ll @@ -4506,3 +4506,53 @@ entry: %d = call <2 x i128> @llvm.fshr(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) ret <2 x i128> %d } + + + +define <2 x i64> @fhsl_to_rev2i64(<2 x i64> %r) { +; CHECK-SD-LABEL: fhsl_to_rev2i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: rev64 v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fhsl_to_rev2i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: shl v1.2d, v0.2d, #32 +; CHECK-GI-NEXT: ushr v0.2d, v0.2d, #32 +; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret + %or = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %r, <2 x i64> %r, <2 x i64> splat (i64 32)) + ret <2 x i64> %or +} + +define <4 x i32> @fshl_to_rev4i32(<4 x i32> %r) { +; CHECK-SD-LABEL: fshl_to_rev4i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: rev32 v0.8h, v0.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshl_to_rev4i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: shl v1.4s, v0.4s, #16 +; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #16 +; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret + %or = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %r, <4 x i32> %r, <4 x i32> splat (i32 16)) + ret <4 x i32> %or +} + +define <2 x i32> @fshl_to_rev2i32(<2 x i32> %r) { +; CHECK-SD-LABEL: fshl_to_rev2i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: rev32 v0.4h, v0.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshl_to_rev2i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: shl v1.2s, v0.2s, #16 +; CHECK-GI-NEXT: ushr v0.2s, v0.2s, #16 +; CHECK-GI-NEXT: orr v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ret + %or = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %r, <2 x i32> %r, <2 x i32> splat (i32 16)) + ret <2 x i32> %or +} From ac5d7a3d5553590f499d01aafdc8b5106e18faa9 Mon Sep 17 00:00:00 2001 From: Yu Li Date: Wed, 23 Apr 2025 09:33:05 +0000 Subject: [PATCH 3/4] Minor spelling mistake --- llvm/test/CodeGen/AArch64/fsh.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll index f45ab61db1e6d..e29f35cfc2173 100644 --- a/llvm/test/CodeGen/AArch64/fsh.ll +++ b/llvm/test/CodeGen/AArch64/fsh.ll @@ -4509,13 +4509,13 @@ entry: -define <2 x i64> @fhsl_to_rev2i64(<2 x i64> %r) { -; CHECK-SD-LABEL: fhsl_to_rev2i64: +define <2 x i64> @fshl_to_rev2i64(<2 x i64> %r) { +; CHECK-SD-LABEL: fshl_to_rev2i64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: rev64 v0.4s, v0.4s ; CHECK-SD-NEXT: ret ; -; CHECK-GI-LABEL: fhsl_to_rev2i64: +; CHECK-GI-LABEL: fshl_to_rev2i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: shl v1.2d, v0.2d, #32 ; CHECK-GI-NEXT: ushr v0.2d, v0.2d, #32 From fd9140aadceddf87ba3b4e6f7692b14a0746d760 Mon Sep 17 00:00:00 2001 From: Yu Li Date: Wed, 23 Apr 2025 13:25:04 +0000 Subject: [PATCH 4/4] Whitespace change --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d21791a78d5a1..7f7e1d20ae604 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5690,13 +5690,13 @@ def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))), // Patterns for funnel shifts to be matched to equivalent REV instructions def : Pat<(v2i64 (or (v2i64 (AArch64vshl (v2i64 V128:$Rn), (i32 32))), - (v2i64 (AArch64vlshr (v2i64 V128:$Rn), (i32 32))))), + (v2i64 (AArch64vlshr (v2i64 V128:$Rn), (i32 32))))), (v2i64 (REV64v4i32 (v2i64 V128:$Rn)))>; def : Pat<(v4i32 (or (v4i32 (AArch64vshl (v4i32 V128:$Rn), (i32 16))), - (v4i32 (AArch64vlshr (v4i32 V128:$Rn), (i32 16))))), + (v4i32 (AArch64vlshr (v4i32 V128:$Rn), (i32 16))))), (v4i32 (REV32v8i16 (v4i32 V128:$Rn)))>; def : Pat<(v2i32 (or (v2i32 (AArch64vshl (v2i32 V64:$Rn), (i32 16))), - (v2i32 (AArch64vlshr (v2i32 V64:$Rn), (i32 16))))), + (v2i32 (AArch64vlshr (v2i32 V64:$Rn), (i32 16))))), (v2i32 (REV32v4i16 (v2i32 V64:$Rn)))>; //===----------------------------------------------------------------------===//