Skip to content

Commit f9afbe9

Browse files
author
Leon Clark
committed
Address review comments.
1 parent 375226a commit f9afbe9

File tree

2 files changed

+9
-10
lines changed

2 files changed

+9
-10
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1272,7 +1272,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
12721272
// The 64-bit versions produce 32-bit results, but only on the SALU.
12731273
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
12741274
.legalFor({{S32, S32}, {S32, S64}})
1275-
.customIf(scalarNarrowerThan(0, 32))
1275+
.customIf(scalarNarrowerThan(1, 32))
12761276
.clampScalar(0, S32, S32)
12771277
.clampScalar(1, S32, S64)
12781278
.scalarize(0)
@@ -4169,7 +4169,8 @@ bool AMDGPULegalizerInfo::legalizeCTLZ_ZERO_UNDEF(MachineInstr &MI,
41694169
auto ShiftAmt = B.buildConstant(S32, 32u - NumBits);
41704170
auto Extend = B.buildAnyExt(S32, {Src}).getReg(0u);
41714171
auto Shift = B.buildLShr(S32, {Extend}, ShiftAmt);
4172-
B.buildInstr(AMDGPU::G_AMDGPU_FFBH_U32, {Dst}, {Shift});
4172+
auto Ctlz = B.buildInstr(AMDGPU::G_AMDGPU_FFBH_U32, {S32}, {Shift});
4173+
B.buildTrunc(Dst, Ctlz);
41734174
MI.eraseFromParent();
41744175
return true;
41754176
}

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir

+6-8
Original file line numberDiff line numberDiff line change
@@ -174,14 +174,12 @@ body: |
174174
; CHECK: liveins: $vgpr0
175175
; CHECK-NEXT: {{ $}}
176176
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
177-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
178-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
179-
; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32)
180-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
181-
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C1]]
182-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
183-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
184-
; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32)
177+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
178+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
179+
; CHECK-NEXT: [[FFBH:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR]](s32)
180+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
181+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FFBH]], [[C1]]
182+
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
185183
%0:_(s32) = COPY $vgpr0
186184
%1:_(s7) = G_TRUNC %0
187185
%2:_(s7) = G_CTLZ_ZERO_UNDEF %1

0 commit comments

Comments
 (0)