Skip to content

Commit 3fd742f

Browse files
committed
address pr comments
1 parent 64f8cf1 commit 3fd742f

File tree

4 files changed

+66
-34
lines changed

4 files changed

+66
-34
lines changed

llvm/lib/Transforms/Scalar/Scalarizer.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -699,9 +699,10 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
699699
}
700700

701701
bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
702-
703-
return TTI->isTargetIntrinsicTriviallyScalarizable(ID) ||
704-
isTriviallyVectorizable(ID);
702+
if (isTriviallyVectorizable(ID))
703+
return true;
704+
return Function::isTargetIntrinsic(ID) &&
705+
TTI->isTargetIntrinsicTriviallyScalarizable(ID);
705706
}
706707

707708
/// If a call to a vector typed intrinsic function, split into a scalar call per

llvm/test/CodeGen/DirectX/frac.ll

+32-16
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
12
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
23

34
; Make sure dxil operation function calls for frac are generated for float and half.
45

56
define noundef half @frac_half(half noundef %a) {
7+
; CHECK-LABEL: define noundef half @frac_half(
8+
; CHECK-SAME: half noundef [[A:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: [[DX_FRAC1:%.*]] = call half @dx.op.unary.f16(i32 22, half [[A]])
11+
; CHECK-NEXT: ret half [[DX_FRAC1]]
12+
;
613
entry:
7-
; CHECK:call half @dx.op.unary.f16(i32 22, half %{{.*}})
814
%dx.frac = call half @llvm.dx.frac.f16(half %a)
915
ret half %dx.frac
1016
}
1117

1218
define noundef float @frac_float(float noundef %a) #0 {
19+
; CHECK-LABEL: define noundef float @frac_float(
20+
; CHECK-SAME: float noundef [[A:%.*]]) {
21+
; CHECK-NEXT: [[ENTRY:.*:]]
22+
; CHECK-NEXT: [[DX_FRAC1:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A]])
23+
; CHECK-NEXT: ret float [[DX_FRAC1]]
24+
;
1325
entry:
14-
; CHECK:call float @dx.op.unary.f32(i32 22, float %{{.*}})
1526
%dx.frac = call float @llvm.dx.frac.f32(float %a)
1627
ret float %dx.frac
1728
}
1829

1930
define noundef <4 x float> @frac_float4(<4 x float> noundef %a) #0 {
31+
; CHECK-LABEL: define noundef <4 x float> @frac_float4(
32+
; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
33+
; CHECK-NEXT: [[ENTRY:.*:]]
34+
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x float> [[A]], i64 0
35+
; CHECK-NEXT: [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I0]])
36+
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x float> [[A]], i64 1
37+
; CHECK-NEXT: [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I1]])
38+
; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x float> [[A]], i64 2
39+
; CHECK-NEXT: [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I2]])
40+
; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x float> [[A]], i64 3
41+
; CHECK-NEXT: [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I3]])
42+
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x float> poison, float [[DOTI04]], i64 0
43+
; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x float> [[DOTUPTO0]], float [[DOTI13]], i64 1
44+
; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x float> [[DOTUPTO1]], float [[DOTI22]], i64 2
45+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> [[DOTUPTO2]], float [[DOTI31]], i64 3
46+
; CHECK-NEXT: ret <4 x float> [[TMP0]]
47+
;
2048
entry:
21-
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
22-
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee0]])
23-
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
24-
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee1]])
25-
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
26-
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee2]])
27-
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
28-
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee3]])
29-
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
30-
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
31-
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
32-
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
33-
%2 = call <4 x float> @llvm.dx.frac.v4f32(<4 x float> %a)
49+
%2 = call <4 x float> @llvm.dx.frac.v4f32(<4 x float> %a)
3450
ret <4 x float> %2
3551
}
3652

3753
declare half @llvm.dx.frac.f16(half)
3854
declare float @llvm.dx.frac.f32(float)
39-
declare <4 x float> @llvm.dx.frac.v4f32(<4 x float>)
55+
declare <4 x float> @llvm.dx.frac.v4f32(<4 x float>)

llvm/test/CodeGen/DirectX/llc-pipeline.ll

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
; CHECK-LABEL: Pass Arguments:
77
; CHECK-NEXT: Target Library Information
8+
; CHECK-NEXT: Target Transform Information
89
; CHECK-NEXT: ModulePass Manager
910
; CHECK-NEXT: DXIL Intrinsic Expansion
1011
; CHECK-NEXT: FunctionPass Manager

llvm/test/CodeGen/DirectX/rsqrt.ll

+29-15
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,52 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
12
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
23

34
; Make sure dxil operation function calls for rsqrt are generated for float and half.
45

56
; CHECK-LABEL: rsqrt_float
67
define noundef float @rsqrt_float(float noundef %a) {
8+
; CHECK-SAME: float noundef [[A:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: [[DX_RSQRT1:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A]])
11+
; CHECK-NEXT: ret float [[DX_RSQRT1]]
12+
;
713
entry:
8-
; CHECK: call float @dx.op.unary.f32(i32 25, float %{{.*}})
914
%dx.rsqrt = call float @llvm.dx.rsqrt.f32(float %a)
1015
ret float %dx.rsqrt
1116
}
1217

1318
; CHECK-LABEL: rsqrt_half
1419
define noundef half @rsqrt_half(half noundef %a) {
20+
; CHECK-SAME: half noundef [[A:%.*]]) {
21+
; CHECK-NEXT: [[ENTRY:.*:]]
22+
; CHECK-NEXT: [[DX_RSQRT1:%.*]] = call half @dx.op.unary.f16(i32 25, half [[A]])
23+
; CHECK-NEXT: ret half [[DX_RSQRT1]]
24+
;
1525
entry:
16-
; CHECK: call half @dx.op.unary.f16(i32 25, half %{{.*}})
1726
%dx.rsqrt = call half @llvm.dx.rsqrt.f16(half %a)
1827
ret half %dx.rsqrt
1928
}
2029

2130
define noundef <4 x float> @rsqrt_float4(<4 x float> noundef %a) #0 {
31+
; CHECK-LABEL: define noundef <4 x float> @rsqrt_float4(
32+
; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
33+
; CHECK-NEXT: [[ENTRY:.*:]]
34+
; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x float> [[A]], i64 0
35+
; CHECK-NEXT: [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I0]])
36+
; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x float> [[A]], i64 1
37+
; CHECK-NEXT: [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I1]])
38+
; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x float> [[A]], i64 2
39+
; CHECK-NEXT: [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I2]])
40+
; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x float> [[A]], i64 3
41+
; CHECK-NEXT: [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I3]])
42+
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x float> poison, float [[DOTI04]], i64 0
43+
; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x float> [[DOTUPTO0]], float [[DOTI13]], i64 1
44+
; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x float> [[DOTUPTO1]], float [[DOTI22]], i64 2
45+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> [[DOTUPTO2]], float [[DOTI31]], i64 3
46+
; CHECK-NEXT: ret <4 x float> [[TMP0]]
47+
;
2248
entry:
23-
; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
24-
; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee0]])
25-
; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
26-
; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee1]])
27-
; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
28-
; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee2]])
29-
; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
30-
; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee3]])
31-
; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
32-
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
33-
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
34-
; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
35-
%2 = call <4 x float> @llvm.dx.rsqrt.v4f32(<4 x float> %a)
49+
%2 = call <4 x float> @llvm.dx.rsqrt.v4f32(<4 x float> %a)
3650
ret <4 x float> %2
3751
}
3852

0 commit comments

Comments
 (0)