diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 5a3083bc3ef7..eea4b7c8a6a4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -1390,7 +1390,9 @@ static mlir::Type GetNeonType(CIRGenFunction *CGF, NeonTypeFlags TypeFlags, // so we use v16i8 to represent poly128 and get pattern matched. llvm_unreachable("NYI"); case NeonTypeFlags::Float32: - llvm_unreachable("NYI"); + return mlir::cir::VectorType::get(CGF->getBuilder().getContext(), + CGF->getCIRGenModule().FloatTy, + V1Ty ? 1 : (2 << IsQuad)); case NeonTypeFlags::Float64: llvm_unreachable("NYI"); } @@ -1616,9 +1618,6 @@ mlir::Value buildNeonCall(unsigned int builtinID, CIRGenFunction &cgf, if (shift > 0) llvm_unreachable("Argument shift NYI"); - if (builtinID != clang::NEON::BI__builtin_neon_vrndns_f32) - llvm_unreachable("NYT"); - CIRGenBuilderTy &builder = cgf.getBuilder(); for (unsigned j = 0; j < argTypes.size(); ++j) { if (isConstrainedFPIntrinsic) { @@ -2409,7 +2408,9 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, } case NEON::BI__builtin_neon_vrnda_v: case NEON::BI__builtin_neon_vrndaq_v: { - llvm_unreachable("NYI"); + assert(!MissingFeatures::buildConstrainedFPCall()); + return buildNeonCall(BuiltinID, *this, {Ty}, Ops, "llvm.round", Ty, + getLoc(E->getExprLoc())); } case NEON::BI__builtin_neon_vrndih_f16: { llvm_unreachable("NYI"); diff --git a/clang/test/CIR/CodeGen/arm-neon-directed-rounding.c b/clang/test/CIR/CodeGen/arm-neon-directed-rounding.c index 5487919f8050..92b4a9298eac 100644 --- a/clang/test/CIR/CodeGen/arm-neon-directed-rounding.c +++ b/clang/test/CIR/CodeGen/arm-neon-directed-rounding.c @@ -38,3 +38,63 @@ float32_t test_vrndns_f32(float32_t a) { // LLVM: store float [[RES_COPY1]], ptr [[RET_P:%.*]], align 4, // LLVM: [[RET_VAL:%.*]] = load float, ptr [[RET_P]], align 4, // LLVM: ret float [[RET_VAL]] + +float32x2_t test_vrnda_f32(float32x2_t a) { + return vrnda_f32(a); +} + +// CIR: cir.func internal private @vrnda_f32(%arg0: !cir.vector +// CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.vector, !cir.ptr> +// CIR: [[INTRIN_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr>, !cir.vector +// CIR: [[INTRIN_ARG_CAST:%.*]] = cir.cast(bitcast, [[INTRIN_ARG]] : !cir.vector), !cir.vector +// CIR: [[INTRIN_ARG_BACK:%.*]] = cir.cast(bitcast, [[INTRIN_ARG_CAST]] : !cir.vector), !cir.vector +// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.round" [[INTRIN_ARG_BACK]] : (!cir.vector) -> !cir.vector +// CIR: cir.return {{%.*}} : !cir.vector + +// CIR-LABEL: test_vrnda_f32 +// CIR: cir.store %arg0, [[ARG_SAVE0:%.*]] : !cir.vector, !cir.ptr> +// CIR: [[FUNC_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr>, !cir.vector +// CIR: [[FUNC_RES:%.*]] = cir.call @vrnda_f32([[FUNC_ARG]]) : (!cir.vector) -> !cir.vector +// CIR: cir.store [[FUNC_RES]], [[RET_P:%.*]] : !cir.vector, !cir.ptr> +// CIR: [[RET_VAL:%.*]] = cir.load [[RET_P]] : !cir.ptr>, !cir.vector +// CIR: cir.return [[RET_VAL]] : !cir.vector + +// LLVM: define dso_local <2 x float> @test_vrnda_f32(<2 x float> [[ARG:%.*]]) +// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE:%.*]], align 8 +// LLVM: [[P0:%.*]] = load <2 x float>, ptr [[ARG_SAVE]], align 8, +// LLVM: store <2 x float> [[P0]], ptr [[P0_SAVE:%.*]], align 8, +// LLVM: [[INTRIN_ARG:%.*]] = load <2 x float>, ptr [[P0_SAVE]], align 8, +// LLVM: [[INTRIN_RES:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[INTRIN_ARG]]) +// LLVM: store <2 x float> [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 8, +// LLVM: [[RES_COPY0:%.*]] = load <2 x float>, ptr [[RES_SAVE0]], align 8, +// LLVM: store <2 x float> [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 8, +// LLVM: [[RES_COPY1:%.*]] = load <2 x float>, ptr [[RES_SAVE1]], align 8, +// LLVM: store <2 x float> [[RES_COPY1]], ptr [[RET_P:%.*]], align 8, +// LLVM: [[RET_VAL:%.*]] = load <2 x float>, ptr [[RET_P]], align 8, +// LLVM: ret <2 x float> [[RET_VAL]] + +float32x4_t test_vrndaq_f32(float32x4_t a) { + return vrndaq_f32(a); +} + +// CIR: cir.func internal private @vrndaq_f32(%arg0: !cir.vector +// CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.vector, !cir.ptr> +// CIR: [[INTRIN_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr>, !cir.vector +// CIR: [[INTRIN_ARG_CAST:%.*]] = cir.cast(bitcast, [[INTRIN_ARG]] : !cir.vector), !cir.vector +// CIR: [[INTRIN_ARG_BACK:%.*]] = cir.cast(bitcast, [[INTRIN_ARG_CAST]] : !cir.vector), !cir.vector +// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.round" [[INTRIN_ARG_BACK]] : (!cir.vector) -> !cir.vector +// CIR: cir.return {{%.*}} : !cir.vector + +// LLVM: define dso_local <4 x float> @test_vrndaq_f32(<4 x float> [[ARG:%.*]]) +// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE:%.*]], align 16 +// LLVM: [[P0:%.*]] = load <4 x float>, ptr [[ARG_SAVE]], align 16, +// LLVM: store <4 x float> [[P0]], ptr [[P0_SAVE:%.*]], align 16, +// LLVM: [[INTRIN_ARG:%.*]] = load <4 x float>, ptr [[P0_SAVE]], align 16, +// LLVM: [[INTRIN_RES:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[INTRIN_ARG]]) +// LLVM: store <4 x float> [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 16, +// LLVM: [[RES_COPY0:%.*]] = load <4 x float>, ptr [[RES_SAVE0]], align 16, +// LLVM: store <4 x float> [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 16, +// LLVM: [[RES_COPY1:%.*]] = load <4 x float>, ptr [[RES_SAVE1]], align 16, +// LLVM: store <4 x float> [[RES_COPY1]], ptr [[RET_P:%.*]], align 16, +// LLVM: [[RET_VAL:%.*]] = load <4 x float>, ptr [[RET_P]], align 16, +// LLVM: ret <4 x float> [[RET_VAL]]