Skip to content

Commit d283705

Browse files
[AArch64][SVE] Fix definition of bfloat fcvt intrinsics. (#110281)
Affected intrinsics: llvm.aarch64.sve.fcvt.bf16f32 llvm.aarch64.sve.fcvtnt.bf16f32 The named intrinsics took a predicate based on the smallest element type when it should be based on the largest. The intrinsics have been replace by v2 equivalents and affected code ported to use them. Patch includes changes to getSVEPredicateBitCast() that ensure the generated code for the auto-upgraded old intrinsics is unchanged.
1 parent 7506872 commit d283705

File tree

12 files changed

+129
-62
lines changed

12 files changed

+129
-62
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -943,11 +943,6 @@ defm SVFCVTZS_S64_F16 : SInstCvtMXZ<"svcvt_s64[_f16]", "ddPO", "dPO", "l", "aar
943943
defm SVFCVTZS_S32_F32 : SInstCvtMXZ<"svcvt_s32[_f32]", "ddPM", "dPM", "i", "aarch64_sve_fcvtzs", [IsOverloadCvt]>;
944944
defm SVFCVTZS_S64_F32 : SInstCvtMXZ<"svcvt_s64[_f32]", "ddPM", "dPM", "l", "aarch64_sve_fcvtzs_i64f32">;
945945

946-
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
947-
defm SVCVT_BF16_F32 : SInstCvtMXZ<"svcvt_bf16[_f32]", "ddPM", "dPM", "b", "aarch64_sve_fcvt_bf16f32">;
948-
def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "ddPM", "b", MergeOp1, "aarch64_sve_fcvtnt_bf16f32", [IsOverloadNone, VerifyRuntimeMode]>;
949-
}
950-
951946
// svcvt_s##_f64
952947
defm SVFCVTZS_S32_F64 : SInstCvtMXZ<"svcvt_s32[_f64]", "ttPd", "tPd", "d", "aarch64_sve_fcvtzs_i32f64">;
953948
defm SVFCVTZS_S64_F64 : SInstCvtMXZ<"svcvt_s64[_f64]", "ddPN", "dPN", "l", "aarch64_sve_fcvtzs", [IsOverloadCvt]>;
@@ -1003,19 +998,26 @@ defm SVFCVT_F32_F64 : SInstCvtMXZ<"svcvt_f32[_f64]", "MMPd", "MPd", "d", "aarc
1003998
defm SVFCVT_F64_F16 : SInstCvtMXZ<"svcvt_f64[_f16]", "ddPO", "dPO", "d", "aarch64_sve_fcvt_f64f16">;
1004999
defm SVFCVT_F64_F32 : SInstCvtMXZ<"svcvt_f64[_f32]", "ddPM", "dPM", "d", "aarch64_sve_fcvt_f64f32">;
10051000

1006-
let SVETargetGuard = "sve2", SMETargetGuard = "sme" in {
1007-
defm SVCVTLT_F32 : SInstCvtMX<"svcvtlt_f32[_f16]", "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">;
1008-
defm SVCVTLT_F64 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">;
1001+
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
1002+
defm SVCVT_BF16_F32 : SInstCvtMXZ<"svcvt_bf16[_f32]", "$$Pd", "$Pd", "f", "aarch64_sve_fcvt_bf16f32_v2">;
1003+
1004+
def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "$$Pd", "f", MergeOp1, "aarch64_sve_fcvtnt_bf16f32_v2", [IsOverloadNone, VerifyRuntimeMode]>;
1005+
// SVCVTNT_X_BF16_F32 : Implemented as macro by SveEmitter.cpp
1006+
}
10091007

1010-
defm SVCVTX_F32 : SInstCvtMXZ<"svcvtx_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvtx_f32f64">;
1008+
let SVETargetGuard = "sve2", SMETargetGuard = "sme" in {
1009+
defm SVCVTLT_F32_F16 : SInstCvtMX<"svcvtlt_f32[_f16]", "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">;
1010+
defm SVCVTLT_F64_F32 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">;
10111011

1012-
def SVCVTNT_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32", [IsOverloadNone, VerifyRuntimeMode]>;
1013-
def SVCVTNT_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
1014-
// SVCVTNT_X : Implemented as macro by SveEmitter.cpp
1012+
defm SVCVTX_F32_F64 : SInstCvtMXZ<"svcvtx_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvtx_f32f64">;
10151013

1016-
def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
1017-
// SVCVTXNT_X_F32 : Implemented as macro by SveEmitter.cpp
1014+
def SVCVTNT_F16_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32", [IsOverloadNone, VerifyRuntimeMode]>;
1015+
def SVCVTNT_F32_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
1016+
// SVCVTNT_X_F16_F32 : Implemented as macro by SveEmitter.cpp
1017+
// SVCVTNT_X_F32_F64 : Implemented as macro by SveEmitter.cpp
10181018

1019+
def SVCVTXNT_F32_F64 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>;
1020+
// SVCVTXNT_X_F32_F64 : Implemented as macro by SveEmitter.cpp
10191021
}
10201022

10211023
////////////////////////////////////////////////////////////////////////////////

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@
2424

2525
// CHECK-LABEL: @test_svcvt_bf16_f32_x(
2626
// CHECK-NEXT: entry:
27-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
28-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
27+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
28+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
2929
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3030
//
3131
// CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_xu10__SVBool_tu13__SVFloat32_t(
3232
// CPP-CHECK-NEXT: entry:
33-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
34-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
33+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
34+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
3535
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3636
//
3737
svbfloat16_t test_svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR {
@@ -40,14 +40,14 @@ svbfloat16_t test_svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR {
4040

4141
// CHECK-LABEL: @test_svcvt_bf16_f32_z(
4242
// CHECK-NEXT: entry:
43-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
44-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
43+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
44+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
4545
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
4646
//
4747
// CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_zu10__SVBool_tu13__SVFloat32_t(
4848
// CPP-CHECK-NEXT: entry:
49-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
50-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
49+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
50+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
5151
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
5252
//
5353
svbfloat16_t test_svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR {
@@ -56,14 +56,14 @@ svbfloat16_t test_svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR {
5656

5757
// CHECK-LABEL: @test_svcvt_bf16_f32_m(
5858
// CHECK-NEXT: entry:
59-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
60-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
59+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
60+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
6161
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
6262
//
6363
// CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_mu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
6464
// CPP-CHECK-NEXT: entry:
65-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
66-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
65+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
66+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
6767
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
6868
//
6969
svbfloat16_t test_svcvt_bf16_f32_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR {

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@
2424

2525
// CHECK-LABEL: @test_svcvtnt_bf16_f32_x(
2626
// CHECK-NEXT: entry:
27-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
28-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
27+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
28+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
2929
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3030
//
3131
// CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_xu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
3232
// CPP-CHECK-NEXT: entry:
33-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
34-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
33+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
34+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
3535
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
3636
//
3737
svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t op) MODE_ATTR {
@@ -40,14 +40,14 @@ svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t
4040

4141
// CHECK-LABEL: @test_svcvtnt_bf16_f32_m(
4242
// CHECK-NEXT: entry:
43-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
44-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
43+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
44+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
4545
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
4646
//
4747
// CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_mu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t(
4848
// CPP-CHECK-NEXT: entry:
49-
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
50-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
49+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
50+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
5151
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
5252
//
5353
svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t even, svbool_t pg, svfloat32_t op) MODE_ATTR {

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2184,8 +2184,8 @@ def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1
21842184
def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
21852185
def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
21862186

2187-
def int_aarch64_sve_fcvt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
2188-
def int_aarch64_sve_fcvtnt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
2187+
def int_aarch64_sve_fcvt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
2188+
def int_aarch64_sve_fcvtnt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
21892189

21902190
def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
21912191
def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
846846
return false; // No other 'aarch64.sve.bf*'.
847847
}
848848

849+
// 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
850+
if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
851+
NewFn = nullptr;
852+
return true;
853+
}
854+
849855
if (Name.consume_front("addqv")) {
850856
// 'aarch64.sve.addqv'.
851857
if (!F->getReturnType()->isFPOrFPVectorTy())
@@ -4072,6 +4078,35 @@ static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
40724078
return Rep;
40734079
}
40744080

4081+
static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4082+
Function *F, IRBuilder<> &Builder) {
4083+
Intrinsic::ID NewID =
4084+
StringSwitch<Intrinsic::ID>(Name)
4085+
.Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4086+
.Case("sve.fcvtnt.bf16f32", Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4087+
.Default(Intrinsic::not_intrinsic);
4088+
if (NewID == Intrinsic::not_intrinsic)
4089+
llvm_unreachable("Unhandled Intrinsic!");
4090+
4091+
SmallVector<Value *, 3> Args(CI->args());
4092+
4093+
// The original intrinsics incorrectly used a predicate based on the smallest
4094+
// element type rather than the largest.
4095+
Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4096+
Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4097+
4098+
if (Args[1]->getType() != BadPredTy)
4099+
llvm_unreachable("Unexpected predicate type!");
4100+
4101+
Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4102+
BadPredTy, Args[1]);
4103+
Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
4104+
GoodPredTy, Args[1]);
4105+
4106+
Function *NewF = Intrinsic::getDeclaration(CI->getModule(), NewID);
4107+
return Builder.CreateCall(NewF, Args, CI->getName());
4108+
}
4109+
40754110
static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
40764111
IRBuilder<> &Builder) {
40774112
if (Name == "mve.vctp64.old") {
@@ -4325,6 +4360,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
43254360

43264361
bool IsX86 = Name.consume_front("x86.");
43274362
bool IsNVVM = Name.consume_front("nvvm.");
4363+
bool IsAArch64 = Name.consume_front("aarch64.");
43284364
bool IsARM = Name.consume_front("arm.");
43294365
bool IsAMDGCN = Name.consume_front("amdgcn.");
43304366
bool IsDbg = Name.consume_front("dbg.");
@@ -4336,6 +4372,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
43364372
Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
43374373
} else if (IsX86) {
43384374
Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4375+
} else if (IsAArch64) {
4376+
Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
43394377
} else if (IsARM) {
43404378
Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
43414379
} else if (IsAMDGCN) {

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5549,10 +5549,17 @@ static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
55495549
"Only expect to cast between legal scalable predicate types!");
55505550

55515551
// Return the operand if the cast isn't changing type,
5552-
// e.g. <n x 16 x i1> -> <n x 16 x i1>
55535552
if (InVT == VT)
55545553
return Op;
55555554

5555+
// Look through casts to <vscale x 16 x i1> when their input has more lanes
5556+
// than VT. This will increase the chances of removing casts that introduce
5557+
// new lanes, which have to be explicitly zero'd.
5558+
if (Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
5559+
Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
5560+
Op.getOperand(1).getValueType().bitsGT(VT))
5561+
Op = Op.getOperand(1);
5562+
55565563
SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
55575564

55585565
// We only have to zero the lanes if new lanes are being defined, e.g. when

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2425,8 +2425,8 @@ let Predicates = [HasBF16, HasSVEorSME] in {
24252425
defm BFMLALT_ZZZ : sve2_fp_mla_long<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt>;
24262426
defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>;
24272427
defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>;
2428-
defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32, AArch64fcvtr_mt>;
2429-
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
2428+
defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
2429+
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
24302430
} // End HasBF16, HasSVEorSME
24312431

24322432
let Predicates = [HasSVEorSME] in {

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2157,7 +2157,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21572157
switch (IID) {
21582158
default:
21592159
break;
2160-
case Intrinsic::aarch64_sve_fcvt_bf16f32:
2160+
case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
21612161
case Intrinsic::aarch64_sve_fcvt_f16f32:
21622162
case Intrinsic::aarch64_sve_fcvt_f16f64:
21632163
case Intrinsic::aarch64_sve_fcvt_f32f16:
@@ -2188,7 +2188,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
21882188
case Intrinsic::aarch64_sve_ucvtf_f32i64:
21892189
case Intrinsic::aarch64_sve_ucvtf_f64i32:
21902190
return instCombineSVEAllOrNoActiveUnary(IC, II);
2191-
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
2191+
case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
21922192
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
21932193
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
21942194
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8811,7 +8811,7 @@ multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op,
88118811
SDPatternOperator ir_op = null_frag> {
88128812
def NAME : sve_bfloat_convert<N, asm>;
88138813

8814-
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i1, nxv4f32, !cast<Instruction>(NAME)>;
8814+
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
88158815
def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
88168816
def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
88178817
}

0 commit comments

Comments
 (0)