diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 037a2f9f7b153..52868ca260290 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17392,14 +17392,22 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_log_clampf: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: - case AMDGPU::BI__builtin_amdgcn_ldexpf: - case AMDGPU::BI__builtin_amdgcn_ldexph: { + case AMDGPU::BI__builtin_amdgcn_ldexpf: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()}); return Builder.CreateCall(F, {Src0, Src1}); } + case AMDGPU::BI__builtin_amdgcn_ldexph: { + // The raw instruction has a different behavior for out of bounds exponent + // values (implicit truncation instead of saturate to short_min/short_max). + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty}); + return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)}); + } case AMDGPU::BI__builtin_amdgcn_frexp_mant: case AMDGPU::BI__builtin_amdgcn_frexp_mantf: case AMDGPU::BI__builtin_amdgcn_frexp_manth: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl index 756f90b282a9a..ff8618a5c727e 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl @@ -52,7 +52,8 @@ void test_cos_f16(global half* out, half a) } // CHECK-LABEL: @test_ldexp_f16 -// CHECK: call half @llvm.ldexp.f16.i32 +// CHECK: [[TRUNC:%[0-9a-z]+]] = trunc i32 +// CHECK: call half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]]) void test_ldexp_f16(global half* out, half a, int b) { *out = __builtin_amdgcn_ldexph(a, b);