Skip to content

[LV] Teach the loop vectorizer llvm.sincos is trivially vectorizable #128035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::atan2:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::sincos:
case Intrinsic::tan:
case Intrinsic::sinh:
case Intrinsic::cosh:
Expand Down Expand Up @@ -185,6 +186,7 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
case Intrinsic::ucmp:
case Intrinsic::scmp:
return OpdIdx == -1 || OpdIdx == 0;
case Intrinsic::sincos:
case Intrinsic::is_fpclass:
case Intrinsic::vp_is_fpclass:
return OpdIdx == 0;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2926,7 +2926,8 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
[&](Type *Ty) { return maybeVectorizeType(Ty, VF); });

IntrinsicCostAttributes CostAttrs(ID, RetTy, Arguments, ParamTys, FMF,
dyn_cast<IntrinsicInst>(CI));
dyn_cast<IntrinsicInst>(CI),
InstructionCost::getInvalid(), TLI);
return TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1176,7 +1176,8 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
IntrinsicCostAttributes CostAttrs(
VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()),
InstructionCost::getInvalid(), &Ctx.TLI);
return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
}

Expand Down
251 changes: 251 additions & 0 deletions llvm/test/Transforms/LoopVectorize/AArch64/sincos.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|extractvalue|store)" --version 5
; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve < %s -S -o - -debug-only=loop-vectorize 2>%t.1 | FileCheck %s --check-prefix=CHECK
; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve -vector-library=ArmPL < %s -S -o - -debug-only=loop-vectorize 2>%t.2 | FileCheck %s --check-prefix=CHECK-ARMPL
; RUN: FileCheck --input-file=%t.1 --check-prefix=CHECK-COST %s
; RUN: FileCheck --input-file=%t.2 --check-prefix=CHECK-COST-ARMPL %s
; REQUIRES: asserts

; CHECK-COST-LABEL: sincos_f32
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)

; CHECK-COST-ARMPL-LABEL: sincos_f32
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)

define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
; CHECK-LABEL: define void @sincos_f32(
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK: [[ENTRY:.*:]]
; CHECK: [[VECTOR_PH:.*:]]
; CHECK: [[VECTOR_BODY:.*:]]
; CHECK: [[TMP3:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]])
; CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 0
; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1
; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4
; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4
; CHECK: [[MIDDLE_BLOCK:.*:]]
; CHECK: [[SCALAR_PH:.*:]]
; CHECK: [[FOR_BODY:.*:]]
; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]])
; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0
; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1
; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4
; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4
; CHECK: [[EXIT:.*:]]
;
; CHECK-ARMPL-LABEL: define void @sincos_f32(
; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-ARMPL: [[ENTRY:.*:]]
; CHECK-ARMPL: [[VECTOR_PH:.*:]]
; CHECK-ARMPL: [[VECTOR_BODY:.*:]]
; CHECK-ARMPL: [[TMP12:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
; CHECK-ARMPL: [[TMP13:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD1:%.*]])
; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP12]], 0
; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP13]], 0
; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP12]], 1
; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP13]], 1
; CHECK-ARMPL: store <vscale x 4 x float> [[TMP14]], ptr [[TMP19:%.*]], align 4
; CHECK-ARMPL: store <vscale x 4 x float> [[TMP15]], ptr [[TMP22:%.*]], align 4
; CHECK-ARMPL: store <vscale x 4 x float> [[TMP16]], ptr [[TMP24:%.*]], align 4
; CHECK-ARMPL: store <vscale x 4 x float> [[TMP17]], ptr [[TMP27:%.*]], align 4
; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]]
; CHECK-ARMPL: [[SCALAR_PH:.*:]]
; CHECK-ARMPL: [[FOR_BODY:.*:]]
; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]])
; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0
; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1
; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4
; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4
; CHECK-ARMPL: [[EXIT:.*:]]
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
%in_val = load float, ptr %arrayidx, align 4
%call = tail call { float, float } @llvm.sincos.f32(float %in_val)
%extract_a = extractvalue { float, float } %call, 0
%extract_b = extractvalue { float, float } %call, 1
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
store float %extract_a, ptr %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
store float %extract_b, ptr %arrayidx4, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %exit, label %for.body

exit:
ret void
}

; CHECK-COST-LABEL: sincos_f64
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val)
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)

; CHECK-COST-ARMPL-LABEL: sincos_f64
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { double, double } @llvm.sincos.f64(double %in_val)
; CHECK-COST-ARMPL: Cost of 12 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)

define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
; CHECK-LABEL: define void @sincos_f64(
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] {
; CHECK: [[ENTRY:.*:]]
; CHECK: [[VECTOR_PH:.*:]]
; CHECK: [[VECTOR_BODY:.*:]]
; CHECK: [[TMP3:%.*]] = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
; CHECK: [[TMP4:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 0
; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1
; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8
; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8
; CHECK: [[MIDDLE_BLOCK:.*:]]
; CHECK: [[SCALAR_PH:.*:]]
; CHECK: [[FOR_BODY:.*:]]
; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]])
; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0
; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1
; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8
; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8
; CHECK: [[EXIT:.*:]]
;
; CHECK-ARMPL-LABEL: define void @sincos_f64(
; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] {
; CHECK-ARMPL: [[ENTRY:.*:]]
; CHECK-ARMPL: [[VECTOR_PH:.*:]]
; CHECK-ARMPL: [[VECTOR_BODY:.*:]]
; CHECK-ARMPL: [[TMP12:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
; CHECK-ARMPL: [[TMP13:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> [[WIDE_LOAD1:%.*]])
; CHECK-ARMPL: [[TMP14:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP12]], 0
; CHECK-ARMPL: [[TMP15:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP13]], 0
; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP12]], 1
; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP13]], 1
; CHECK-ARMPL: store <vscale x 2 x double> [[TMP14]], ptr [[TMP19:%.*]], align 8
; CHECK-ARMPL: store <vscale x 2 x double> [[TMP15]], ptr [[TMP22:%.*]], align 8
; CHECK-ARMPL: store <vscale x 2 x double> [[TMP16]], ptr [[TMP24:%.*]], align 8
; CHECK-ARMPL: store <vscale x 2 x double> [[TMP17]], ptr [[TMP27:%.*]], align 8
; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]]
; CHECK-ARMPL: [[SCALAR_PH:.*:]]
; CHECK-ARMPL: [[FOR_BODY:.*:]]
; CHECK-ARMPL: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]])
; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0
; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1
; CHECK-ARMPL: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8
; CHECK-ARMPL: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8
; CHECK-ARMPL: [[EXIT:.*:]]
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
%in_val = load double, ptr %arrayidx, align 8
%call = tail call { double, double } @llvm.sincos.f64(double %in_val)
%extract_a = extractvalue { double, double } %call, 0
%extract_b = extractvalue { double, double } %call, 1
%arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv
store double %extract_a, ptr %arrayidx2, align 8
%arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv
store double %extract_b, ptr %arrayidx4, align 8
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %exit, label %for.body

exit:
ret void
}

; CHECK-COST-LABEL: predicated_sincos
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
; CHECK-COST: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of 58 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST: Cost of Invalid for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)

; CHECK-COST-ARMPL-LABEL: predicated_sincos
; CHECK-COST-ARMPL: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { float, float } @llvm.sincos.f32(float %in_val)
; CHECK-COST-ARMPL: Cost of 26 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 12 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)
; CHECK-COST-ARMPL: Cost of 13 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sincos(ir<%in_val>)

define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
; CHECK-LABEL: define void @predicated_sincos(
; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] {
; CHECK: [[ENTRY:.*:]]
; CHECK: [[FOR_BODY:.*:]]
; CHECK: [[IF_THEN:.*:]]
; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]])
; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0
; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1
; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4
; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4
; CHECK: [[IF_MERGE:.*:]]
; CHECK: [[FOR_END:.*:]]
;
; CHECK-ARMPL-LABEL: define void @predicated_sincos(
; CHECK-ARMPL-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] {
; CHECK-ARMPL: [[ENTRY:.*:]]
; CHECK-ARMPL: [[VECTOR_PH:.*:]]
; CHECK-ARMPL: [[VECTOR_BODY:.*:]]
; CHECK-ARMPL: [[TMP15:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
; CHECK-ARMPL: [[TMP16:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP15]], 0
; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP15]], 1
; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr [[TMP19:%.*]], i32 4, <vscale x 4 x i1> [[TMP14:%.*]])
; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[TMP17]], ptr [[TMP21:%.*]], i32 4, <vscale x 4 x i1> [[TMP14]])
; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]]
; CHECK-ARMPL: [[SCALAR_PH:.*:]]
; CHECK-ARMPL: [[FOR_BODY:.*:]]
; CHECK-ARMPL: [[IF_THEN:.*:]]
; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]])
; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0
; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1
; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4
; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4
; CHECK-ARMPL: [[IF_MERGE:.*:]]
; CHECK-ARMPL: [[FOR_END:.*:]]
;
entry:
br label %for.body

for.body:
%iv = phi i64 [ %iv.next, %if.merge ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
%in_val = load float, ptr %arrayidx, align 4
%if_cond = fcmp olt float %in_val, %x
br i1 %if_cond, label %if.then, label %if.merge

if.then:
%call = tail call { float, float } @llvm.sincos.f32(float %in_val)
%extract_a = extractvalue { float, float } %call, 0
%extract_b = extractvalue { float, float } %call, 1
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
store float %extract_a, ptr %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
store float %extract_b, ptr %arrayidx4, align 4
br label %if.merge

if.merge:
%iv.next = add nuw nsw i64 %iv, 1
%cond = icmp slt i64 %iv.next, 1024
br i1 %cond, label %for.body, label %for.end

for.end:
ret void
}
Loading