Skip to content

Commit 421d3ec

Browse files
committed
[CostModel] Handle vector struct results and cost llvm.sincos
This patch updates the cost model to cost intrinsics that return multiple values (in structs) correctly. Previously, the cost model only thought intrinsics that return `VectorType` need scalarizing, which meant it cost intrinsics that return multiple vectors (that need scalarizing) way too cheap (giving it the cost of a single function call). This patch also adds a custom cost for llvm.sincos when a vector function library is available, as certain VFs can be expanded (later in code gen) to a vector function, reducing the cost to a single call (+ the possible loads from the vector function returns values via output pointers).
1 parent c833746 commit 421d3ec

File tree

6 files changed

+216
-62
lines changed

6 files changed

+216
-62
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,13 @@ class IntrinsicCostAttributes {
126126
// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
127127
// arguments and the return value will be computed based on types.
128128
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
129+
TargetLibraryInfo const *LibInfo = nullptr;
129130

130131
public:
131132
IntrinsicCostAttributes(
132133
Intrinsic::ID Id, const CallBase &CI,
133134
InstructionCost ScalarCost = InstructionCost::getInvalid(),
134-
bool TypeBasedOnly = false);
135+
bool TypeBasedOnly = false, TargetLibraryInfo const *LibInfo = nullptr);
135136

136137
IntrinsicCostAttributes(
137138
Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
@@ -145,7 +146,8 @@ class IntrinsicCostAttributes {
145146
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
146147
ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
147148
const IntrinsicInst *I = nullptr,
148-
InstructionCost ScalarCost = InstructionCost::getInvalid());
149+
InstructionCost ScalarCost = InstructionCost::getInvalid(),
150+
TargetLibraryInfo const *LibInfo = nullptr);
149151

150152
Intrinsic::ID getID() const { return IID; }
151153
const IntrinsicInst *getInst() const { return II; }
@@ -154,6 +156,7 @@ class IntrinsicCostAttributes {
154156
InstructionCost getScalarizationCost() const { return ScalarizationCost; }
155157
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
156158
const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
159+
const TargetLibraryInfo *getLibInfo() const { return LibInfo; }
157160

158161
bool isTypeBasedOnly() const {
159162
return Arguments.empty();

llvm/include/llvm/CodeGen/BasicTTIImpl.h

+95-18
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/SmallVector.h"
2323
#include "llvm/Analysis/LoopInfo.h"
2424
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
25+
#include "llvm/Analysis/TargetLibraryInfo.h"
2526
#include "llvm/Analysis/TargetTransformInfo.h"
2627
#include "llvm/Analysis/TargetTransformInfoImpl.h"
2728
#include "llvm/Analysis/ValueTracking.h"
@@ -285,6 +286,64 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
285286
return false;
286287
}
287288

289+
/// Several intrinsics that return structs (including llvm.sincos[pi] and
290+
/// llvm.modf) can be lowered to a vector library call (for certain VFs). The
291+
/// vector library functions correspond to the scalar calls (e.g. sincos or
292+
/// modf), which unlike the intrinsic return values via output pointers. This
293+
/// helper checks if a vector call exists for the given intrinsic, and returns
294+
/// the cost, which includes the cost of the mask (if required), and the loads
295+
/// for values returned via output pointers. \p LC is the scalar libcall and
296+
/// \p CallRetElementIndex (optional) is the struct element which is mapped to
297+
/// the call return value. If std::nullopt is returned, the no vector library
298+
/// call is available, so the intrinsic should be assigned the default cost
299+
/// (e.g. scalarization).
300+
std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost(
301+
const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,
302+
RTLIB::Libcall LC, std::optional<unsigned> CallRetElementIndex = {}) {
303+
Type *RetTy = ICA.getReturnType();
304+
// Vector variants of the intrinsic can be mapped to a vector library call.
305+
auto const *LibInfo = ICA.getLibInfo();
306+
if (!LibInfo || !isa<StructType>(RetTy) ||
307+
!isVectorizedStructTy(cast<StructType>(RetTy)))
308+
return std::nullopt;
309+
310+
// Find associated libcall.
311+
const char *LCName = getTLI()->getLibcallName(LC);
312+
if (!LCName)
313+
return std::nullopt;
314+
315+
// Search for a corresponding vector variant.
316+
LLVMContext &Ctx = RetTy->getContext();
317+
ElementCount VF = getVectorizedTypeVF(RetTy);
318+
VecDesc const *VD = nullptr;
319+
for (bool Masked : {false, true}) {
320+
if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked)))
321+
break;
322+
}
323+
if (!VD)
324+
return std::nullopt;
325+
326+
// Cost the call + mask.
327+
auto Cost =
328+
thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);
329+
if (VD->isMasked())
330+
Cost += thisT()->getShuffleCost(
331+
TargetTransformInfo::SK_Broadcast,
332+
VectorType::get(IntegerType::getInt1Ty(Ctx), VF), {}, CostKind, 0,
333+
nullptr, {});
334+
335+
// Lowering to a library call (with output pointers) may require us to emit
336+
// reloads for the results.
337+
for (auto [Idx, VectorTy] : enumerate(getContainedTypes(RetTy))) {
338+
if (Idx == CallRetElementIndex)
339+
continue;
340+
Cost += thisT()->getMemoryOpCost(
341+
Instruction::Load, VectorTy,
342+
thisT()->getDataLayout().getABITypeAlign(VectorTy), 0, CostKind);
343+
}
344+
return Cost;
345+
}
346+
288347
protected:
289348
explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
290349
: BaseT(DL) {}
@@ -1726,9 +1785,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
17261785

17271786
Type *RetTy = ICA.getReturnType();
17281787

1729-
ElementCount RetVF =
1730-
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1731-
: ElementCount::getFixed(1));
1788+
ElementCount RetVF = isVectorizedTy(RetTy) ? getVectorizedTypeVF(RetTy)
1789+
: ElementCount::getFixed(1);
1790+
17321791
const IntrinsicInst *I = ICA.getInst();
17331792
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
17341793
FastMathFlags FMF = ICA.getFlags();
@@ -1997,6 +2056,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19972056
}
19982057
case Intrinsic::experimental_vector_match:
19992058
return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
2059+
case Intrinsic::sincos: {
2060+
Type *Ty = getContainedTypes(RetTy).front();
2061+
EVT VT = getTLI()->getValueType(DL, Ty);
2062+
RTLIB::Libcall LC = RTLIB::getSINCOS(VT.getScalarType());
2063+
if (auto Cost =
2064+
getMultipleResultIntrinsicVectorLibCallCost(ICA, CostKind, LC))
2065+
return *Cost;
2066+
// Otherwise, fallback to default scalarization cost.
2067+
break;
2068+
}
20002069
}
20012070

20022071
// Assume that we need to scalarize this intrinsic.)
@@ -2005,10 +2074,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
20052074
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
20062075
if (RetVF.isVector() && !RetVF.isScalable()) {
20072076
ScalarizationCost = 0;
2008-
if (!RetTy->isVoidTy())
2009-
ScalarizationCost += getScalarizationOverhead(
2010-
cast<VectorType>(RetTy),
2011-
/*Insert*/ true, /*Extract*/ false, CostKind);
2077+
if (!RetTy->isVoidTy()) {
2078+
for (Type *VectorTy : getContainedTypes(RetTy)) {
2079+
ScalarizationCost += getScalarizationOverhead(
2080+
cast<VectorType>(VectorTy),
2081+
/*Insert*/ true, /*Extract*/ false, CostKind);
2082+
}
2083+
}
20122084
ScalarizationCost +=
20132085
getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind);
20142086
}
@@ -2689,27 +2761,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
26892761
// Else, assume that we need to scalarize this intrinsic. For math builtins
26902762
// this will emit a costly libcall, adding call overhead and spills. Make it
26912763
// very expensive.
2692-
if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2764+
if (isVectorizedTy(RetTy)) {
2765+
ArrayRef<Type *> RetVTys = getContainedTypes(RetTy);
2766+
26932767
// Scalable vectors cannot be scalarized, so return Invalid.
2694-
if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
2695-
return isa<ScalableVectorType>(Ty);
2696-
}))
2768+
if (any_of(concat<Type *const>(RetVTys, Tys),
2769+
[](Type *Ty) { return isa<ScalableVectorType>(Ty); }))
26972770
return InstructionCost::getInvalid();
26982771

2699-
InstructionCost ScalarizationCost =
2700-
SkipScalarizationCost
2701-
? ScalarizationCostPassed
2702-
: getScalarizationOverhead(RetVTy, /*Insert*/ true,
2703-
/*Extract*/ false, CostKind);
2772+
InstructionCost ScalarizationCost = ScalarizationCostPassed;
2773+
if (!SkipScalarizationCost) {
2774+
ScalarizationCost = 0;
2775+
for (Type *RetVTy : RetVTys) {
2776+
ScalarizationCost += getScalarizationOverhead(
2777+
cast<VectorType>(RetVTy), /*Insert*/ true,
2778+
/*Extract*/ false, CostKind);
2779+
}
2780+
}
27042781

2705-
unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
2782+
unsigned ScalarCalls = getVectorizedTypeVF(RetTy).getFixedValue();
27062783
SmallVector<Type *, 4> ScalarTys;
27072784
for (Type *Ty : Tys) {
27082785
if (Ty->isVectorTy())
27092786
Ty = Ty->getScalarType();
27102787
ScalarTys.push_back(Ty);
27112788
}
2712-
IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
2789+
IntrinsicCostAttributes Attrs(IID, toScalarizedTy(RetTy), ScalarTys, FMF);
27132790
InstructionCost ScalarCost =
27142791
thisT()->getIntrinsicInstrCost(Attrs, CostKind);
27152792
for (Type *Ty : Tys) {

llvm/lib/Analysis/CostModel.cpp

+13-5
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@
1717
//===----------------------------------------------------------------------===//
1818

1919
#include "llvm/Analysis/CostModel.h"
20+
#include "llvm/Analysis/TargetLibraryInfo.h"
2021
#include "llvm/Analysis/TargetTransformInfo.h"
2122
#include "llvm/IR/Function.h"
2223
#include "llvm/IR/IntrinsicInst.h"
2324
#include "llvm/IR/PassManager.h"
2425
#include "llvm/Pass.h"
2526
#include "llvm/Support/CommandLine.h"
2627
#include "llvm/Support/raw_ostream.h"
28+
2729
using namespace llvm;
2830

2931
static cl::opt<TargetTransformInfo::TargetCostKind> CostKind(
@@ -42,25 +44,31 @@ static cl::opt<bool> TypeBasedIntrinsicCost("type-based-intrinsic-cost",
4244
cl::desc("Calculate intrinsics cost based only on argument types"),
4345
cl::init(false));
4446

47+
static cl::opt<bool> LibCallBasedIntrinsicCost(
48+
"libcall-based-intrinsic-cost",
49+
cl::desc("Calculate intrinsics cost using target library info"),
50+
cl::init(false));
51+
4552
#define CM_NAME "cost-model"
4653
#define DEBUG_TYPE CM_NAME
4754

4855
PreservedAnalyses CostModelPrinterPass::run(Function &F,
4956
FunctionAnalysisManager &AM) {
5057
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
58+
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
5159
OS << "Printing analysis 'Cost Model Analysis' for function '" << F.getName() << "':\n";
5260
for (BasicBlock &B : F) {
5361
for (Instruction &Inst : B) {
5462
// TODO: Use a pass parameter instead of cl::opt CostKind to determine
5563
// which cost kind to print.
5664
InstructionCost Cost;
5765
auto *II = dyn_cast<IntrinsicInst>(&Inst);
58-
if (II && TypeBasedIntrinsicCost) {
59-
IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II,
60-
InstructionCost::getInvalid(), true);
66+
if (II && (LibCallBasedIntrinsicCost || TypeBasedIntrinsicCost)) {
67+
IntrinsicCostAttributes ICA(
68+
II->getIntrinsicID(), *II, InstructionCost::getInvalid(),
69+
/*TypeBasedOnly=*/TypeBasedIntrinsicCost, &TLI);
6170
Cost = TTI.getIntrinsicInstrCost(ICA, CostKind);
62-
}
63-
else {
71+
} else {
6472
Cost = TTI.getInstructionCost(&Inst, CostKind);
6573
}
6674

llvm/lib/Analysis/TargetTransformInfo.cpp

+8-9
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
6969

7070
IntrinsicCostAttributes::IntrinsicCostAttributes(
7171
Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost,
72-
bool TypeBasedOnly)
72+
bool TypeBasedOnly, TargetLibraryInfo const *LibInfo)
7373
: II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id),
74-
ScalarizationCost(ScalarizationCost) {
74+
ScalarizationCost(ScalarizationCost), LibInfo(LibInfo) {
7575

7676
if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
7777
FMF = FPMO->getFastMathFlags();
@@ -101,13 +101,12 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
101101
ParamTys.push_back(Argument->getType());
102102
}
103103

104-
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
105-
ArrayRef<const Value *> Args,
106-
ArrayRef<Type *> Tys,
107-
FastMathFlags Flags,
108-
const IntrinsicInst *I,
109-
InstructionCost ScalarCost)
110-
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
104+
IntrinsicCostAttributes::IntrinsicCostAttributes(
105+
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
106+
ArrayRef<Type *> Tys, FastMathFlags Flags, const IntrinsicInst *I,
107+
InstructionCost ScalarCost, TargetLibraryInfo const *LibInfo)
108+
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost),
109+
LibInfo(LibInfo) {
111110
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
112111
Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
113112
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "sincos"
2+
; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
3+
; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL -passes="print<cost-model>" -libcall-based-intrinsic-cost -cost-kind=throughput 2>&1 -disable-output | FileCheck %s -check-prefix=CHECK-VECLIB
4+
5+
define void @sincos(
6+
; CHECK-LABEL: 'sincos'
7+
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, half } @llvm.sincos.f16(half %x_f16)
8+
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float %x_f32)
9+
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double %x_f64)
10+
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 %x_f128)
11+
;
12+
; CHECK: Cost Model: Found an estimated cost of 50 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> %x_v8xf16)
13+
; CHECK: Cost Model: Found an estimated cost of 58 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %x_v4xf32)
14+
; CHECK: Cost Model: Found an estimated cost of 26 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %x_v2xf64)
15+
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> %x_v1xf128)
16+
;
17+
; CHECK: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> %x_nxv8xf16)
18+
; CHECK: Cost Model: Invalid cost for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> %x_nxv4xf32)
19+
; CHECK: Cost Model: Invalid cost for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> %x_nxv2xf64)
20+
; CHECK: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> %x_nxv1xf128)
21+
22+
; CHECK-VECLIB-LABEL: 'sincos'
23+
; CHECK-VECLIB: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, half } @llvm.sincos.f16(half %x_f16)
24+
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float %x_f32)
25+
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double %x_f64)
26+
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 %x_f128)
27+
;
28+
; CHECK-VECLIB: Cost Model: Found an estimated cost of 50 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> %x_v8xf16)
29+
; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %x_v4xf32)
30+
; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %x_v2xf64)
31+
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> %x_v1xf128)
32+
;
33+
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> %x_nxv8xf16)
34+
; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> %x_nxv4xf32)
35+
; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> %x_nxv2xf64)
36+
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> %x_nxv1xf128)
37+
38+
half %x_f16,
39+
float %x_f32,
40+
double %x_f64,
41+
fp128 %x_f128,
42+
<8 x half> %x_v8xf16,
43+
<4 x float> %x_v4xf32,
44+
<2 x double> %x_v2xf64,
45+
<1 x fp128> %x_v1xf128,
46+
<vscale x 8 x half> %x_nxv8xf16,
47+
<vscale x 4 x float> %x_nxv4xf32,
48+
<vscale x 2 x double> %x_nxv2xf64,
49+
<vscale x 1 x fp128> %x_nxv1xf128
50+
) {
51+
%f16 = call { half, half } @llvm.sincos.f16(half %x_f16)
52+
%f32 = call { float, float } @llvm.sincos.f32(float %x_f32)
53+
%f64 = call { double, double } @llvm.sincos.f64(double %x_f64)
54+
%f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 %x_f128)
55+
56+
%v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> %x_v8xf16)
57+
%v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %x_v4xf32)
58+
%v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %x_v2xf64)
59+
%v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> %x_v1xf128)
60+
61+
%nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.v8f16(<vscale x 8 x half> %x_nxv8xf16)
62+
%nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.v4f32(<vscale x 4 x float> %x_nxv4xf32)
63+
%nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.v2f64(<vscale x 2 x double> %x_nxv2xf64)
64+
%nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.v1f128(<vscale x 1 x fp128> %x_nxv1xf128)
65+
66+
ret void
67+
}

0 commit comments

Comments
 (0)