From 53a21df7d261eea1abcf9f95b83912bcad2615ed Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Thu, 13 Mar 2025 16:02:32 -0700 Subject: [PATCH 01/17] dot2add working for dxil without sema check --- clang/include/clang/Basic/Builtins.td | 6 + clang/lib/CodeGen/CGHLSLRuntime.h | 1 + clang/lib/Headers/hlsl/hlsl_intrinsics.h | 366 +++++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsDirectX.td | 4 + llvm/include/llvm/IR/IntrinsicsSPIRV.td | 5 + llvm/lib/Target/DirectX/DXIL.td | 11 + llvm/lib/Target/DirectX/DXILOpLowering.cpp | 33 ++ 7 files changed, 426 insertions(+) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index b2c7ddb43de55..f392d21646273 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4891,6 +4891,12 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLDot2Add : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_dot2add"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_dot4add_i8packed"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 68151c0f0ea24..d47f8ef1896b6 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -101,6 +101,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot) GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot) GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot) + GENERATE_HLSL_INTRINSIC_FUNCTION(Dot2Add, dot2add) GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed) GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddU8Packed, dot4add_u8packed) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAllTrue, wave_all) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index fd799b8d874ae..9eba2e6d1bf82 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -176,6 +176,372 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR X, } //===----------------------------------------------------------------------===// +<<<<<<< Updated upstream +======= +// dot product builtins +//===----------------------------------------------------------------------===// + +/// \fn K dot(T X, T Y) +/// \brief Return the dot product (a scalar value) of \a X and \a Y. +/// \param X The X input value. +/// \param Y The Y input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +half dot(half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +half dot(half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +half dot(half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +half dot(half4, half4); + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int16_t dot(int16_t, int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int16_t dot(int16_t2, int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int16_t dot(int16_t3, int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int16_t dot(int16_t4, int16_t4); + +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint16_t dot(uint16_t, uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint16_t dot(uint16_t2, uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint16_t dot(uint16_t3, uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint16_t dot(uint16_t4, uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +float dot(float, float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +float dot(float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +float dot(float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +float dot(float4, float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +double dot(double, double); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int dot(int, int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int dot(int2, int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int dot(int3, int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int dot(int4, int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint dot(uint, uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint dot(uint2, uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint dot(uint3, uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint dot(uint4, uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int64_t dot(int64_t, int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int64_t dot(int64_t2, int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int64_t dot(int64_t3, int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +int64_t dot(int64_t4, int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint64_t dot(uint64_t, uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint64_t dot(uint64_t2, uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint64_t dot(uint64_t3, uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) +uint64_t dot(uint64_t4, uint64_t4); + +//===----------------------------------------------------------------------===// +// dot4add builtins +//===----------------------------------------------------------------------===// + +/// \fn int dot4add_i8packed(uint A, uint B, int C) + +_HLSL_AVAILABILITY(shadermodel, 6.4) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_i8packed) +int dot4add_i8packed(uint, uint, int); + +/// \fn uint dot4add_u8packed(uint A, uint B, uint C) + +_HLSL_AVAILABILITY(shadermodel, 6.4) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_u8packed) +uint dot4add_u8packed(uint, uint, uint); + +//===----------------------------------------------------------------------===// +// dot2add builtins +//===----------------------------------------------------------------------===// + +/// \fn float dot2add(half2 a, half2 b, float c) + +_HLSL_AVAILABILITY(shadermodel, 6.4) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot2add) +float dot2add(half2, half2, float); + +//===----------------------------------------------------------------------===// +// exp builtins +//===----------------------------------------------------------------------===// + +/// \fn T exp(T x) +/// \brief Returns the base-e exponential, or \a e**x, of the specified value. +/// \param x The specified input value. +/// +/// The return value is the base-e exponential of the \a x parameter. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +half exp(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +half2 exp(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +half3 exp(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +half4 exp(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +float exp(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +float2 exp(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +float3 exp(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) +float4 exp(float4); + +//===----------------------------------------------------------------------===// +// exp2 builtins +//===----------------------------------------------------------------------===// + +/// \fn T exp2(T x) +/// \brief Returns the base 2 exponential, or \a 2**x, of the specified value. +/// \param x The specified input value. +/// +/// The base 2 exponential of the \a x parameter. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +half exp2(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +half2 exp2(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +half3 exp2(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +half4 exp2(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +float exp2(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +float2 exp2(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +float3 exp2(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) +float4 exp2(float4); + +//===----------------------------------------------------------------------===// +// firstbithigh builtins +//===----------------------------------------------------------------------===// + +/// \fn T firstbithigh(T Val) +/// \brief Returns the location of the first set bit starting from the highest +/// order bit and working downward, per component. +/// \param Val the input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(uint64_t4); + +//===----------------------------------------------------------------------===// +// firstbitlow builtins +//===----------------------------------------------------------------------===// + +/// \fn T firstbitlow(T Val) +/// \brief Returns the location of the first set bit starting from the lowest +/// order bit and working upward, per component. +/// \param Val the input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint64_t4); + +//===----------------------------------------------------------------------===// +// floor builtins +//===----------------------------------------------------------------------===// + +/// \fn T floor(T Val) +/// \brief Returns the largest integer that is less than or equal to the input +/// value, \a Val. +/// \param Val The input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +half floor(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +half2 floor(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +half3 floor(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +half4 floor(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +float floor(float); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +float2 floor(float2); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +float3 floor(float3); +_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) +float4 floor(float4); + +//===----------------------------------------------------------------------===// +>>>>>>> Stashed changes // fmod builtins //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index ead7286f4311c..775d325feeb14 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -100,6 +100,10 @@ def int_dx_udot : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>], [IntrNoMem, Commutative] >; +def int_dx_dot2add : + DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_float_ty], + [IntrNoMem, Commutative]>; def int_dx_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_dx_dot4add_u8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 4389b86745d7f..7cdebbf72cfe3 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -89,6 +89,11 @@ let TargetPrefix = "spv" in { DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>], [IntrNoMem, Commutative] >; + + def int_spv_dot2add : DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_float_ty], + [IntrNoMem, Commutative]>; + def int_spv_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_spv_dot4add_u8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_spv_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>; diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 1d8904bdf5514..b1e7406ead675 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -1077,6 +1077,17 @@ def RawBufferStore : DXILOp<140, rawBufferStore> { let stages = [Stages]; } +def Dot2AddHalf : DXILOp<162, dot2AddHalf> { + let Doc = "dot product of 2 vectors of half having size = 2, returns " + "float"; + let intrinsics = [IntrinSelect]; + let arguments = [FloatTy, HalfTy, HalfTy, HalfTy, HalfTy]; + let result = FloatTy; + let overloads = [Overloads]; + let stages = [Stages]; + let attributes = [Attributes]; +} + def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> { let Doc = "signed dot product of 4 x i8 vectors packed into i32, with " "accumulate to i32"; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index dff9f3e03079e..10a726aba5ff3 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -75,6 +75,27 @@ static SmallVector argVectorFlatten(CallInst *Orig, return NewOperands; } +static SmallVector argVectorFlattenExcludeLastElement(CallInst *Orig, + IRBuilder<> &Builder) { + // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening. + unsigned NumOperands = Orig->getNumOperands() - 2; + assert(NumOperands > 0); + Value *Arg0 = Orig->getOperand(0); + [[maybe_unused]] auto *VecArg0 = dyn_cast(Arg0->getType()); + assert(VecArg0); + SmallVector NewOperands = populateOperands(Arg0, Builder); + for (unsigned I = 1; I < NumOperands; ++I) { + Value *Arg = Orig->getOperand(I); + [[maybe_unused]] auto *VecArg = dyn_cast(Arg->getType()); + assert(VecArg); + assert(VecArg0->getElementType() == VecArg->getElementType()); + assert(VecArg0->getNumElements() == VecArg->getNumElements()); + auto NextOperandList = populateOperands(Arg, Builder); + NewOperands.append(NextOperandList.begin(), NextOperandList.end()); + } + return NewOperands; +} + namespace { class OpLowerer { Module &M; @@ -168,6 +189,18 @@ class OpLowerer { } } else if (IsVectorArgExpansion) { Args = argVectorFlatten(CI, OpBuilder.getIRB()); + } else if (F.getIntrinsicID() == Intrinsic::dx_dot2add) { + unsigned NumOperands = CI->getNumOperands() - 1; + assert(NumOperands > 0); + Value *LastArg = CI->getOperand(NumOperands - 1); + + Args.push_back(LastArg); + + //dbgs() << "Value of LastArg" << LastArg->getName() << "\n"; + + + //Args = populateOperands(LastArg, OpBuilder.getIRB()); + Args.append(argVectorFlattenExcludeLastElement(CI, OpBuilder.getIRB())); } else { Args.append(CI->arg_begin(), CI->arg_end()); } From 9aacccda792026ee8a713795abd540c862c60134 Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Tue, 18 Mar 2025 17:33:30 -0700 Subject: [PATCH 02/17] WIP --- .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 10 + clang/lib/Headers/hlsl/hlsl_intrinsics.h | 366 ------------------ clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 15 + llvm/test/CodeGen/DirectX/dot2add.ll | 8 + 4 files changed, 33 insertions(+), 366 deletions(-) create mode 100644 clang/test/CodeGenHLSL/builtins/dot2add.hlsl create mode 100644 llvm/test/CodeGen/DirectX/dot2add.ll diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index 0de04f13b7ec4..d1208260c9c74 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -972,6 +972,16 @@ uint64_t dot(uint64_t3, uint64_t3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) uint64_t dot(uint64_t4, uint64_t4); +//===----------------------------------------------------------------------===// +// dot2add builtins +//===----------------------------------------------------------------------===// + +/// \fn float dot2add(half2 a, half2 b, float c) + +_HLSL_AVAILABILITY(shadermodel, 6.4) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot2add) +float dot2add(half2, half2, float); + //===----------------------------------------------------------------------===// // dot4add builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 9eba2e6d1bf82..fd799b8d874ae 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -176,372 +176,6 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR X, } //===----------------------------------------------------------------------===// -<<<<<<< Updated upstream -======= -// dot product builtins -//===----------------------------------------------------------------------===// - -/// \fn K dot(T X, T Y) -/// \brief Return the dot product (a scalar value) of \a X and \a Y. -/// \param X The X input value. -/// \param Y The Y input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -half dot(half, half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -half dot(half2, half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -half dot(half3, half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -half dot(half4, half4); - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int16_t dot(int16_t, int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int16_t dot(int16_t2, int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int16_t dot(int16_t3, int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int16_t dot(int16_t4, int16_t4); - -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint16_t dot(uint16_t, uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint16_t dot(uint16_t2, uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint16_t dot(uint16_t3, uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint16_t dot(uint16_t4, uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -float dot(float, float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -float dot(float2, float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -float dot(float3, float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -float dot(float4, float4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -double dot(double, double); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int dot(int, int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int dot(int2, int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int dot(int3, int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int dot(int4, int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint dot(uint, uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint dot(uint2, uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint dot(uint3, uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint dot(uint4, uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int64_t dot(int64_t, int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int64_t dot(int64_t2, int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int64_t dot(int64_t3, int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -int64_t dot(int64_t4, int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint64_t dot(uint64_t, uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint64_t dot(uint64_t2, uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint64_t dot(uint64_t3, uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) -uint64_t dot(uint64_t4, uint64_t4); - -//===----------------------------------------------------------------------===// -// dot4add builtins -//===----------------------------------------------------------------------===// - -/// \fn int dot4add_i8packed(uint A, uint B, int C) - -_HLSL_AVAILABILITY(shadermodel, 6.4) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_i8packed) -int dot4add_i8packed(uint, uint, int); - -/// \fn uint dot4add_u8packed(uint A, uint B, uint C) - -_HLSL_AVAILABILITY(shadermodel, 6.4) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_u8packed) -uint dot4add_u8packed(uint, uint, uint); - -//===----------------------------------------------------------------------===// -// dot2add builtins -//===----------------------------------------------------------------------===// - -/// \fn float dot2add(half2 a, half2 b, float c) - -_HLSL_AVAILABILITY(shadermodel, 6.4) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot2add) -float dot2add(half2, half2, float); - -//===----------------------------------------------------------------------===// -// exp builtins -//===----------------------------------------------------------------------===// - -/// \fn T exp(T x) -/// \brief Returns the base-e exponential, or \a e**x, of the specified value. -/// \param x The specified input value. -/// -/// The return value is the base-e exponential of the \a x parameter. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -half exp(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -half2 exp(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -half3 exp(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -half4 exp(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -float exp(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -float2 exp(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -float3 exp(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp) -float4 exp(float4); - -//===----------------------------------------------------------------------===// -// exp2 builtins -//===----------------------------------------------------------------------===// - -/// \fn T exp2(T x) -/// \brief Returns the base 2 exponential, or \a 2**x, of the specified value. -/// \param x The specified input value. -/// -/// The base 2 exponential of the \a x parameter. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -half exp2(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -half2 exp2(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -half3 exp2(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -half4 exp2(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -float exp2(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -float2 exp2(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -float3 exp2(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) -float4 exp2(float4); - -//===----------------------------------------------------------------------===// -// firstbithigh builtins -//===----------------------------------------------------------------------===// - -/// \fn T firstbithigh(T Val) -/// \brief Returns the location of the first set bit starting from the highest -/// order bit and working downward, per component. -/// \param Val the input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int16_t4); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint64_t4); - -//===----------------------------------------------------------------------===// -// firstbitlow builtins -//===----------------------------------------------------------------------===// - -/// \fn T firstbitlow(T Val) -/// \brief Returns the location of the first set bit starting from the lowest -/// order bit and working upward, per component. -/// \param Val the input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(int16_t4); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint firstbitlow(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint2 firstbitlow(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint3 firstbitlow(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) -uint4 firstbitlow(uint64_t4); - -//===----------------------------------------------------------------------===// -// floor builtins -//===----------------------------------------------------------------------===// - -/// \fn T floor(T Val) -/// \brief Returns the largest integer that is less than or equal to the input -/// value, \a Val. -/// \param Val The input value. - -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -half floor(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -half2 floor(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -half3 floor(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -half4 floor(half4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -float floor(float); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -float2 floor(float2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -float3 floor(float3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) -float4 floor(float4); - -//===----------------------------------------------------------------------===// ->>>>>>> Stashed changes // fmod builtins //===----------------------------------------------------------------------===// diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl new file mode 100644 index 0000000000000..07e438063c29d --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s -DTARGET=dx +// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s -DTARGET=spv + +// Test basic lowering to runtime function call. + +// CHECK-LABEL: test +float test(half2 p1, half2 p2, float p3) { + return dot2add(p1, p2, p3); +} + +// CHECK: declare [[TY]] @llvm.[[TARGET]].dot4add.i8packed([[TY]], [[TY]], [[TY]]) diff --git a/llvm/test/CodeGen/DirectX/dot2add.ll b/llvm/test/CodeGen/DirectX/dot2add.ll new file mode 100644 index 0000000000000..b1019c36b56e8 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/dot2add.ll @@ -0,0 +1,8 @@ +; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s + +define noundef float @dot2add_simple(<2 x half> noundef %a, <2 x half> noundef %b, float %c) { +entry: +; CHECK: call float @dx.op.dot2AddHalf(i32 162, float %c, half %0, half %1, half %2, half %3) + %ret = call float @llvm.dx.dot2add(<2 x half> %a, <2 x half> %b, float %c) + ret float %ret +} \ No newline at end of file From 7c81ed62b95fd603c07a16f23a4e545d07004997 Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Wed, 19 Mar 2025 02:53:09 -0700 Subject: [PATCH 03/17] WIP --- clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl index 07e438063c29d..ee7cb79bb26c4 100644 --- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl @@ -1,14 +1,13 @@ // RUN: %clang_cc1 -finclude-default-header -triple \ // RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s -DTARGET=dx -// RUN: %clang_cc1 -finclude-default-header -triple \ -// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ -// RUN: FileCheck %s -DTARGET=spv // Test basic lowering to runtime function call. // CHECK-LABEL: test float test(half2 p1, half2 p2, float p3) { + // CHECK-DXIL: %hlsl.dot2add = call reassoc nnan ninf nsz arcp afn float @llvm.dx.dot2add.v2f32(<2 x float> %0, <2 x float> %1, float %2) + // CHECK: ret float %hlsl.dot2add return dot2add(p1, p2, p3); } From b7ff3bfbef9af9ed7bbcaa55e85876c7800c4aab Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Wed, 19 Mar 2025 19:30:56 -0700 Subject: [PATCH 04/17] WIP: Mostly wrote everything. --- clang/lib/CodeGen/CGHLSLRuntime.h | 1 - .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 10 ----- .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 8 ++++ clang/lib/Headers/hlsl/hlsl_intrinsics.h | 12 +++++ clang/lib/Sema/SemaHLSL.cpp | 45 +++++++++++++++---- .../SemaHLSL/BuiltIns/Dot2Add-errors.hlsl | 11 +++++ llvm/include/llvm/IR/IntrinsicsSPIRV.td | 5 --- llvm/lib/Target/DirectX/DXILOpLowering.cpp | 22 ++++++--- 8 files changed, 84 insertions(+), 30 deletions(-) create mode 100644 clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index d47f8ef1896b6..68151c0f0ea24 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -101,7 +101,6 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot) GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot) GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot) - GENERATE_HLSL_INTRINSIC_FUNCTION(Dot2Add, dot2add) GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed) GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddU8Packed, dot4add_u8packed) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAllTrue, wave_all) diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index d1208260c9c74..0de04f13b7ec4 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -972,16 +972,6 @@ uint64_t dot(uint64_t3, uint64_t3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot) uint64_t dot(uint64_t4, uint64_t4); -//===----------------------------------------------------------------------===// -// dot2add builtins -//===----------------------------------------------------------------------===// - -/// \fn float dot2add(half2 a, half2 b, float c) - -_HLSL_AVAILABILITY(shadermodel, 6.4) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot2add) -float dot2add(half2, half2, float); - //===----------------------------------------------------------------------===// // dot4add builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index 8cdd63d7e07bb..3c15f2b38d80f 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -45,6 +45,14 @@ distance_vec_impl(vector X, vector Y) { return length_vec_impl(X - Y); } +constexpr float dot2add_impl(half2 a, half2 b, float c) { +#if defined(__DIRECTX__) + return __builtin_hlsl_dot2add(a, b, c); +#else + return dot(a, b) + c; +#endif +} + template constexpr T reflect_impl(T I, T N) { return I - 2 * N * I * N; } diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index fd799b8d874ae..874c1c96f87bb 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -175,6 +175,18 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR X, return __detail::distance_vec_impl(X, Y); } +//===----------------------------------------------------------------------===// +// dot2add builtins +//===----------------------------------------------------------------------===// + +/// \fn float dot2add(half2 a, half2 b, float c) +/// \brief Dot product of 2 vector of type half and add a float scalar value. + +_HLSL_AVAILABILITY(shadermodel, 6.4) +const inline float dot2add(half2 a, half2 b, float c) { + return __detail::dot2add_impl(a, b, c); +} + //===----------------------------------------------------------------------===// // fmod builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index fe600386e6fa9..e45acc675171b 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1989,7 +1989,7 @@ void SemaHLSL::diagnoseAvailabilityViolations(TranslationUnitDecl *TU) { } // Helper function for CheckHLSLBuiltinFunctionCall -static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { +static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall, unsigned NumArgs) { assert(TheCall->getNumArgs() > 1); ExprResult A = TheCall->getArg(0); @@ -1999,7 +1999,7 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { SourceLocation BuiltinLoc = TheCall->getBeginLoc(); bool AllBArgAreVectors = true; - for (unsigned i = 1; i < TheCall->getNumArgs(); ++i) { + for (unsigned i = 1; i < NumArgs; ++i) { ExprResult B = TheCall->getArg(i); QualType ArgTyB = B.get()->getType(); auto *VecTyB = ArgTyB->getAs(); @@ -2050,6 +2050,10 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { return false; } +static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { + return CheckVectorElementCallArgs(S, TheCall, TheCall->getNumArgs()); +} + static bool CheckAllArgsHaveSameType(Sema *S, CallExpr *TheCall) { assert(TheCall->getNumArgs() > 1); QualType ArgTy0 = TheCall->getArg(0)->getType(); @@ -2092,10 +2096,10 @@ static bool CheckArgTypeIsCorrect( return false; } -static bool CheckAllArgTypesAreCorrect( - Sema *S, CallExpr *TheCall, QualType ExpectedType, +static bool CheckArgTypesAreCorrect( + Sema *S, CallExpr *TheCall, unsigned NumArgs, QualType ExpectedType, llvm::function_ref Check) { - for (unsigned i = 0; i < TheCall->getNumArgs(); ++i) { + for (unsigned i = 0; i < NumArgs; ++i) { Expr *Arg = TheCall->getArg(i); if (CheckArgTypeIsCorrect(S, Arg, ExpectedType, Check)) { return true; @@ -2104,6 +2108,13 @@ static bool CheckAllArgTypesAreCorrect( return false; } +static bool CheckAllArgTypesAreCorrect( + Sema *S, CallExpr *TheCall, QualType ExpectedType, + llvm::function_ref Check) { + return CheckArgTypesAreCorrect(S, TheCall, TheCall->getNumArgs(), + ExpectedType, Check); +} + static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) { auto checkAllFloatTypes = [](clang::QualType PassedType) -> bool { return !PassedType->hasFloatingRepresentation(); @@ -2147,15 +2158,17 @@ static bool CheckModifiableLValue(Sema *S, CallExpr *TheCall, return true; } -static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall) { +static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall, + unsigned NumArgs, QualType ExpectedType) { auto checkDoubleVector = [](clang::QualType PassedType) -> bool { if (const auto *VecTy = PassedType->getAs()) return VecTy->getElementType()->isDoubleType(); return false; }; - return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy, - checkDoubleVector); + return CheckArgTypesAreCorrect(S, TheCall, NumArgs, + ExpectedType, checkDoubleVector); } + static bool CheckFloatingOrIntRepresentation(Sema *S, CallExpr *TheCall) { auto checkAllSignedTypes = [](clang::QualType PassedType) -> bool { return !PassedType->hasIntegerRepresentation() && @@ -2471,7 +2484,21 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; if (SemaRef.BuiltinVectorToScalarMath(TheCall)) return true; - if (CheckNoDoubleVectors(&SemaRef, TheCall)) + if (CheckNoDoubleVectors(&SemaRef, TheCall, + TheCall->getNumArgs(), SemaRef.Context.FloatTy)) + return true; + break; + } + case Builtin::BI__builtin_hlsl_dot2add: { + if (SemaRef.checkArgCount(TheCall, 3)) + return true; + if (CheckVectorElementCallArgs(&SemaRef, TheCall, TheCall->getNumArgs() - 1)) + return true; + if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), SemaRef.getASTContext().FloatTy)) + return true; + if (CheckNoDoubleVectors(&SemaRef, TheCall, + TheCall->getNumArgs() - 1, + SemaRef.Context.HalfTy)) return true; break; } diff --git a/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl new file mode 100644 index 0000000000000..61282a319dafd --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify + +bool test_too_few_arg() { + return __builtin_hlsl_dot2add(); + // expected-error@-1 {{too few arguments to function call, expected 3, have 0}} +} + +bool test_too_many_arg(half2 p1, half2 p2, float p3) { + return __builtin_hlsl_dot2add(p1, p2, p3, p1); + // expected-error@-1 {{too many arguments to function call, expected 3, have 4}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 7cdebbf72cfe3..4389b86745d7f 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -89,11 +89,6 @@ let TargetPrefix = "spv" in { DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>], [IntrNoMem, Commutative] >; - - def int_spv_dot2add : DefaultAttrsIntrinsic<[llvm_float_ty], - [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_float_ty], - [IntrNoMem, Commutative]>; - def int_spv_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_spv_dot4add_u8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_spv_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 10a726aba5ff3..f7ed0c5071d75 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -55,9 +55,8 @@ static SmallVector populateOperands(Value *Arg, IRBuilder<> &Builder) { } static SmallVector argVectorFlatten(CallInst *Orig, - IRBuilder<> &Builder) { - // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening. - unsigned NumOperands = Orig->getNumOperands() - 1; + IRBuilder<> &Builder, + unsigned NumOperands) { assert(NumOperands > 0); Value *Arg0 = Orig->getOperand(0); [[maybe_unused]] auto *VecArg0 = dyn_cast(Arg0->getType()); @@ -75,6 +74,12 @@ static SmallVector argVectorFlatten(CallInst *Orig, return NewOperands; } +static SmallVector argVectorFlatten(CallInst *Orig, + IRBuilder<> &Builder) { + // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening. + return argVectorFlatten(Orig, Builder, Orig->getNumOperands() - 1); +} +/* static SmallVector argVectorFlattenExcludeLastElement(CallInst *Orig, IRBuilder<> &Builder) { // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening. @@ -95,7 +100,7 @@ static SmallVector argVectorFlattenExcludeLastElement(CallInst *Orig, } return NewOperands; } - +*/ namespace { class OpLowerer { Module &M; @@ -190,7 +195,13 @@ class OpLowerer { } else if (IsVectorArgExpansion) { Args = argVectorFlatten(CI, OpBuilder.getIRB()); } else if (F.getIntrinsicID() == Intrinsic::dx_dot2add) { - unsigned NumOperands = CI->getNumOperands() - 1; + // arg[NumOperands-1] is a pointer and is not needed by our flattening. + // arg[NumOperands-2] also does not need to be flattened because it is a scalar. + unsigned NumOperands = CI->getNumOperands() - 2; + Args.push_back(CI->getArgOperand(NumOperands)); + Args.append(argVectorFlatten(CI, OpBuilder.getIRB(), NumOperands)); + + /*unsigned NumOperands = CI->getNumOperands() - 1; assert(NumOperands > 0); Value *LastArg = CI->getOperand(NumOperands - 1); @@ -201,6 +212,7 @@ class OpLowerer { //Args = populateOperands(LastArg, OpBuilder.getIRB()); Args.append(argVectorFlattenExcludeLastElement(CI, OpBuilder.getIRB())); + */ } else { Args.append(CI->arg_begin(), CI->arg_end()); } From e56509e44db1a43bea830fd7bb3b6765495b5c0b Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Thu, 20 Mar 2025 13:22:33 -0700 Subject: [PATCH 05/17] End to end working code for dot2add --- clang/include/clang/Basic/Builtins.td | 4 ++-- clang/lib/Sema/SemaHLSL.cpp | 2 ++ clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 17 ++++++++++------- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index f392d21646273..1b09ce103cbea 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4893,8 +4893,8 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> { def HLSLDot2Add : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_dot2add"]; - let Attributes = [NoThrow, Const]; - let Prototype = "void(...)"; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "float(...)"; } def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> { diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index e45acc675171b..f14e07f8c38d4 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2490,8 +2490,10 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { break; } case Builtin::BI__builtin_hlsl_dot2add: { + // Check number of arguments should be 3 if (SemaRef.checkArgCount(TheCall, 3)) return true; + // Check first two arguments should be vectors of same length if (CheckVectorElementCallArgs(&SemaRef, TheCall, TheCall->getNumArgs() - 1)) return true; if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), SemaRef.getASTContext().FloatTy)) diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl index ee7cb79bb26c4..ce325327a01b5 100644 --- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl @@ -1,14 +1,17 @@ // RUN: %clang_cc1 -finclude-default-header -triple \ // RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \ -// RUN: FileCheck %s -DTARGET=dx +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-DXIL +// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV // Test basic lowering to runtime function call. -// CHECK-LABEL: test float test(half2 p1, half2 p2, float p3) { - // CHECK-DXIL: %hlsl.dot2add = call reassoc nnan ninf nsz arcp afn float @llvm.dx.dot2add.v2f32(<2 x float> %0, <2 x float> %1, float %2) - // CHECK: ret float %hlsl.dot2add + // CHECK-SPIRV: %[[MUL:.*]] = call {{.*}} float @llvm.spv.fdot.v2f32(<2 x float> %1, <2 x float> %2) + // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr, align 4 + // CHECK-SPIRV: %[[RES:.*]] = fadd {{.*}} float %[[MUL]], %[[C]] + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f32(<2 x float> %0, <2 x float> %1, float %2) + // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); -} - -// CHECK: declare [[TY]] @llvm.[[TARGET]].dot4add.i8packed([[TY]], [[TY]], [[TY]]) +} \ No newline at end of file From 275f4816940ae263e8438c2016cd14a2265ee356 Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Thu, 20 Mar 2025 15:06:37 -0700 Subject: [PATCH 06/17] Update SemaCheck and set return type and make void as return type in Builtin --- clang/include/clang/Basic/Builtins.td | 2 +- clang/lib/Sema/SemaHLSL.cpp | 24 ++++++++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 1b09ce103cbea..6b55f70fbd69d 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4894,7 +4894,7 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> { def HLSLDot2Add : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_dot2add"]; let Attributes = [NoThrow, Const, CustomTypeChecking]; - let Prototype = "float(...)"; + let Prototype = "void(...)"; } def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> { diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index f14e07f8c38d4..11ba798ddaa2b 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2493,15 +2493,27 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { // Check number of arguments should be 3 if (SemaRef.checkArgCount(TheCall, 3)) return true; - // Check first two arguments should be vectors of same length - if (CheckVectorElementCallArgs(&SemaRef, TheCall, TheCall->getNumArgs() - 1)) + + // Check first two arguments are vector of length 2 with half data type + auto checkHalfVectorOfSize2 = [](clang::QualType PassedType) -> bool { + if (const auto *VecTy = PassedType->getAs()) + return !(VecTy->getNumElements() == 2 && + VecTy->getElementType()->isHalfType()); return true; - if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), SemaRef.getASTContext().FloatTy)) + }; + if(CheckArgTypeIsCorrect(&SemaRef, TheCall->getArg(0), + SemaRef.getASTContext().HalfTy, + checkHalfVectorOfSize2)) return true; - if (CheckNoDoubleVectors(&SemaRef, TheCall, - TheCall->getNumArgs() - 1, - SemaRef.Context.HalfTy)) + if(CheckArgTypeIsCorrect(&SemaRef, TheCall->getArg(1), + SemaRef.getASTContext().HalfTy, + checkHalfVectorOfSize2)) + return true; + + // Check third argument is a float + if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), SemaRef.getASTContext().FloatTy)) return true; + TheCall->setType(TheCall->getArg(2)->getType()); break; } case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: From 54346f263c90d5aba96b2006ac13a811c5c70e78 Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Thu, 20 Mar 2025 15:25:01 -0700 Subject: [PATCH 07/17] Revert not required change --- clang/lib/Sema/SemaHLSL.cpp | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 11ba798ddaa2b..c1a9ceb0d135b 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1989,7 +1989,7 @@ void SemaHLSL::diagnoseAvailabilityViolations(TranslationUnitDecl *TU) { } // Helper function for CheckHLSLBuiltinFunctionCall -static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall, unsigned NumArgs) { +static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { assert(TheCall->getNumArgs() > 1); ExprResult A = TheCall->getArg(0); @@ -1999,7 +1999,7 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall, unsigned NumA SourceLocation BuiltinLoc = TheCall->getBeginLoc(); bool AllBArgAreVectors = true; - for (unsigned i = 1; i < NumArgs; ++i) { + for (unsigned i = 1; i < TheCall->getNumArgs(); ++i) { ExprResult B = TheCall->getArg(i); QualType ArgTyB = B.get()->getType(); auto *VecTyB = ArgTyB->getAs(); @@ -2050,10 +2050,6 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall, unsigned NumA return false; } -static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { - return CheckVectorElementCallArgs(S, TheCall, TheCall->getNumArgs()); -} - static bool CheckAllArgsHaveSameType(Sema *S, CallExpr *TheCall) { assert(TheCall->getNumArgs() > 1); QualType ArgTy0 = TheCall->getArg(0)->getType(); @@ -2096,10 +2092,10 @@ static bool CheckArgTypeIsCorrect( return false; } -static bool CheckArgTypesAreCorrect( - Sema *S, CallExpr *TheCall, unsigned NumArgs, QualType ExpectedType, +static bool CheckAllArgTypesAreCorrect( + Sema *S, CallExpr *TheCall, QualType ExpectedType, llvm::function_ref Check) { - for (unsigned i = 0; i < NumArgs; ++i) { + for (unsigned i = 0; i < TheCall->getNumArgs(); ++i) { Expr *Arg = TheCall->getArg(i); if (CheckArgTypeIsCorrect(S, Arg, ExpectedType, Check)) { return true; @@ -2108,13 +2104,6 @@ static bool CheckArgTypesAreCorrect( return false; } -static bool CheckAllArgTypesAreCorrect( - Sema *S, CallExpr *TheCall, QualType ExpectedType, - llvm::function_ref Check) { - return CheckArgTypesAreCorrect(S, TheCall, TheCall->getNumArgs(), - ExpectedType, Check); -} - static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) { auto checkAllFloatTypes = [](clang::QualType PassedType) -> bool { return !PassedType->hasFloatingRepresentation(); @@ -2158,17 +2147,15 @@ static bool CheckModifiableLValue(Sema *S, CallExpr *TheCall, return true; } -static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall, - unsigned NumArgs, QualType ExpectedType) { +static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall) { auto checkDoubleVector = [](clang::QualType PassedType) -> bool { if (const auto *VecTy = PassedType->getAs()) return VecTy->getElementType()->isDoubleType(); return false; }; - return CheckArgTypesAreCorrect(S, TheCall, NumArgs, - ExpectedType, checkDoubleVector); + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy, + checkDoubleVector); } - static bool CheckFloatingOrIntRepresentation(Sema *S, CallExpr *TheCall) { auto checkAllSignedTypes = [](clang::QualType PassedType) -> bool { return !PassedType->hasIntegerRepresentation() && @@ -2484,8 +2471,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; if (SemaRef.BuiltinVectorToScalarMath(TheCall)) return true; - if (CheckNoDoubleVectors(&SemaRef, TheCall, - TheCall->getNumArgs(), SemaRef.Context.FloatTy)) + if (CheckNoDoubleVectors(&SemaRef, TheCall)) return true; break; } From 0beb7c72854b38affa7529d03a87e69278c93ce6 Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Thu, 20 Mar 2025 15:31:15 -0700 Subject: [PATCH 08/17] Remove commented code --- llvm/lib/Target/DirectX/DXILOpLowering.cpp | 36 +--------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index f7ed0c5071d75..cecd012011419 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -79,28 +79,7 @@ static SmallVector argVectorFlatten(CallInst *Orig, // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening. return argVectorFlatten(Orig, Builder, Orig->getNumOperands() - 1); } -/* -static SmallVector argVectorFlattenExcludeLastElement(CallInst *Orig, - IRBuilder<> &Builder) { - // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening. - unsigned NumOperands = Orig->getNumOperands() - 2; - assert(NumOperands > 0); - Value *Arg0 = Orig->getOperand(0); - [[maybe_unused]] auto *VecArg0 = dyn_cast(Arg0->getType()); - assert(VecArg0); - SmallVector NewOperands = populateOperands(Arg0, Builder); - for (unsigned I = 1; I < NumOperands; ++I) { - Value *Arg = Orig->getOperand(I); - [[maybe_unused]] auto *VecArg = dyn_cast(Arg->getType()); - assert(VecArg); - assert(VecArg0->getElementType() == VecArg->getElementType()); - assert(VecArg0->getNumElements() == VecArg->getNumElements()); - auto NextOperandList = populateOperands(Arg, Builder); - NewOperands.append(NextOperandList.begin(), NextOperandList.end()); - } - return NewOperands; -} -*/ + namespace { class OpLowerer { Module &M; @@ -200,19 +179,6 @@ class OpLowerer { unsigned NumOperands = CI->getNumOperands() - 2; Args.push_back(CI->getArgOperand(NumOperands)); Args.append(argVectorFlatten(CI, OpBuilder.getIRB(), NumOperands)); - - /*unsigned NumOperands = CI->getNumOperands() - 1; - assert(NumOperands > 0); - Value *LastArg = CI->getOperand(NumOperands - 1); - - Args.push_back(LastArg); - - //dbgs() << "Value of LastArg" << LastArg->getName() << "\n"; - - - //Args = populateOperands(LastArg, OpBuilder.getIRB()); - Args.append(argVectorFlattenExcludeLastElement(CI, OpBuilder.getIRB())); - */ } else { Args.append(CI->arg_begin(), CI->arg_end()); } From 807602e338c04a2dcefc70a490e500a62432cc9a Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Fri, 21 Mar 2025 17:32:42 -0700 Subject: [PATCH 09/17] Addressed PR feedback --- clang/include/clang/Basic/Builtins.td | 2 +- clang/lib/Headers/hlsl/hlsl_intrinsics.h | 9 ++++-- clang/lib/Sema/SemaHLSL.cpp | 5 +-- clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 13 ++++---- .../SemaHLSL/BuiltIns/Dot2Add-errors.hlsl | 31 +++++++++++++++++-- llvm/test/CodeGen/DirectX/dot2add.ll | 2 +- 6 files changed, 44 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 6b55f70fbd69d..5de1da2111471 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4894,7 +4894,7 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> { def HLSLDot2Add : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_dot2add"]; let Attributes = [NoThrow, Const, CustomTypeChecking]; - let Prototype = "void(...)"; + let Prototype = "float(_ExtVector<2, _Float16>,_ExtVector<2, _Float16>, float)"; } def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> { diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 874c1c96f87bb..ed07428c0f12b 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -179,12 +179,15 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR X, // dot2add builtins //===----------------------------------------------------------------------===// -/// \fn float dot2add(half2 a, half2 b, float c) +/// \fn float dot2add(half2 A, half2 B, float C) /// \brief Dot product of 2 vector of type half and add a float scalar value. +/// \param A The first input value to dot product. +/// \param B The second input value to dot product. +/// \param C The input value added to the dot product. _HLSL_AVAILABILITY(shadermodel, 6.4) -const inline float dot2add(half2 a, half2 b, float c) { - return __detail::dot2add_impl(a, b, c); +const inline float dot2add(half2 A, half2 B, float C) { + return __detail::dot2add_impl(A, B, C); } //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index c1a9ceb0d135b..78c643dfe31cd 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2476,11 +2476,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { break; } case Builtin::BI__builtin_hlsl_dot2add: { - // Check number of arguments should be 3 if (SemaRef.checkArgCount(TheCall, 3)) return true; - // Check first two arguments are vector of length 2 with half data type auto checkHalfVectorOfSize2 = [](clang::QualType PassedType) -> bool { if (const auto *VecTy = PassedType->getAs()) return !(VecTy->getNumElements() == 2 && @@ -2496,10 +2494,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { checkHalfVectorOfSize2)) return true; - // Check third argument is a float if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), SemaRef.getASTContext().FloatTy)) return true; - TheCall->setType(TheCall->getArg(2)->getType()); + TheCall->setType(SemaRef.getASTContext().FloatTy); break; } case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl index ce325327a01b5..cea399f6028d2 100644 --- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl @@ -1,17 +1,18 @@ -// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: %clang_cc1 -finclude-default-header -fnative-half-type -triple \ // RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,CHECK-DXIL -// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: %clang_cc1 -finclude-default-header -fnative-half-type -triple \ // RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV // Test basic lowering to runtime function call. float test(half2 p1, half2 p2, float p3) { - // CHECK-SPIRV: %[[MUL:.*]] = call {{.*}} float @llvm.spv.fdot.v2f32(<2 x float> %1, <2 x float> %2) + // CHECK-SPIRV: %[[MUL:.*]] = call {{.*}} half @llvm.spv.fdot.v2f16(<2 x half> %1, <2 x half> %2) + // CHECK-SPIRV: %[[CONVERT:.*]] = fpext {{.*}} half %[[MUL:.*]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr, align 4 - // CHECK-SPIRV: %[[RES:.*]] = fadd {{.*}} float %[[MUL]], %[[C]] - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f32(<2 x float> %0, <2 x float> %1, float %2) + // CHECK-SPIRV: %[[RES:.*]] = fadd {{.*}} float %[[CONVERT:.*]], %[[C:.*]] + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %0, <2 x half> %1, float %2) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); -} \ No newline at end of file +} diff --git a/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl index 61282a319dafd..87bcb931aabe5 100644 --- a/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl @@ -1,11 +1,36 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -bool test_too_few_arg() { +float test_too_few_arg() { return __builtin_hlsl_dot2add(); // expected-error@-1 {{too few arguments to function call, expected 3, have 0}} } -bool test_too_many_arg(half2 p1, half2 p2, float p3) { +float test_too_many_arg(half2 p1, half2 p2, float p3) { return __builtin_hlsl_dot2add(p1, p2, p3, p1); // expected-error@-1 {{too many arguments to function call, expected 3, have 4}} } + +float test_float_arg2_type(half2 p1, float2 p2, float p3) { + return __builtin_hlsl_dot2add(p1, p2, p3); + // expected-error@-1 {{passing 'float2' (aka 'vector') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(half)))) half' (vector of 2 'half' values)}} +} + +float test_float_arg1_type(float2 p1, half2 p2, float p3) { + return __builtin_hlsl_dot2add(p1, p2, p3); + // expected-error@-1 {{passing 'float2' (aka 'vector') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(half)))) half' (vector of 2 'half' values)}} +} + +float test_double_arg3_type(half2 p1, half2 p2, double p3) { + return __builtin_hlsl_dot2add(p1, p2, p3); + // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}} +} + +float test_float_arg1_arg2_type(float2 p1, float2 p2, float p3) { + return __builtin_hlsl_dot2add(p1, p2, p3); + // expected-error@-1 {{passing 'float2' (aka 'vector') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(half)))) half' (vector of 2 'half' values)}} +} + +float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) { + return __builtin_hlsl_dot2add(p1, p2, p3); + // expected-error@-1 {{passing 'int16_t2' (aka 'vector') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(half)))) half' (vector of 2 'half' values)}} +} diff --git a/llvm/test/CodeGen/DirectX/dot2add.ll b/llvm/test/CodeGen/DirectX/dot2add.ll index b1019c36b56e8..40c6cdafc83da 100644 --- a/llvm/test/CodeGen/DirectX/dot2add.ll +++ b/llvm/test/CodeGen/DirectX/dot2add.ll @@ -5,4 +5,4 @@ entry: ; CHECK: call float @dx.op.dot2AddHalf(i32 162, float %c, half %0, half %1, half %2, half %3) %ret = call float @llvm.dx.dot2add(<2 x half> %a, <2 x half> %b, float %c) ret float %ret -} \ No newline at end of file +} From 08a78923cf2f393128391ea2fb120cdae3d9ec6c Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Mon, 24 Mar 2025 12:30:39 -0700 Subject: [PATCH 10/17] update sema --- clang/lib/Sema/SemaHLSL.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 78c643dfe31cd..d063a117c30a9 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2485,6 +2485,22 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { VecTy->getElementType()->isHalfType()); return true; }; + + auto CheckArgTypeIsCorrect = []( + Sema *S, Expr *Arg, QualType ExpectedType, + llvm::function_ref Check) -> bool { + QualType PassedType = Arg->getType(); + if (Check(PassedType)) { + if (auto *VecTyA = PassedType->getAs()) + ExpectedType = S->Context.getVectorType( + ExpectedType, VecTyA->getNumElements(), VecTyA->getVectorKind()); + S->Diag(Arg->getBeginLoc(), diag::err_typecheck_convert_incompatible) + << PassedType << ExpectedType << 1 << 0 << 0; + return true; + } + return false; + }; + if(CheckArgTypeIsCorrect(&SemaRef, TheCall->getArg(0), SemaRef.getASTContext().HalfTy, checkHalfVectorOfSize2)) @@ -2494,8 +2510,12 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { checkHalfVectorOfSize2)) return true; - if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), SemaRef.getASTContext().FloatTy)) + if (!SemaRef.getASTContext().hasSameUnqualifiedType(TheCall->getArg(2)->getType(), SemaRef.getASTContext().FloatTy)) { + SemaRef.Diag(TheCall->getArg(2)->getBeginLoc(), diag::err_typecheck_convert_incompatible) + << TheCall->getArg(2)->getType() << SemaRef.getASTContext().FloatTy << 1 << 0 << 0; return true; + } + TheCall->setType(SemaRef.getASTContext().FloatTy); break; } From a35eaab80a10dfc27a8341b83319acb92e36e8d8 Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Wed, 26 Mar 2025 17:38:54 -0700 Subject: [PATCH 11/17] Addressed PR feedback --- clang/include/clang/Basic/Builtins.td | 4 +- clang/lib/Sema/SemaHLSL.cpp | 44 ------------ clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 70 +++++++++++++++++-- .../SemaHLSL/BuiltIns/Dot2Add-errors.hlsl | 36 ---------- .../SemaHLSL/BuiltIns/dot2add-errors.hlsl | 13 ++++ 5 files changed, 80 insertions(+), 87 deletions(-) delete mode 100644 clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/dot2add-errors.hlsl diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 5de1da2111471..adf8b8d96e3c8 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4893,8 +4893,8 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> { def HLSLDot2Add : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_dot2add"]; - let Attributes = [NoThrow, Const, CustomTypeChecking]; - let Prototype = "float(_ExtVector<2, _Float16>,_ExtVector<2, _Float16>, float)"; + let Attributes = [NoThrow, Const]; + let Prototype = "float(_ExtVector<2, __fp16>,_ExtVector<2, __fp16>, float)"; } def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> { diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index d063a117c30a9..fe600386e6fa9 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2475,50 +2475,6 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } - case Builtin::BI__builtin_hlsl_dot2add: { - if (SemaRef.checkArgCount(TheCall, 3)) - return true; - - auto checkHalfVectorOfSize2 = [](clang::QualType PassedType) -> bool { - if (const auto *VecTy = PassedType->getAs()) - return !(VecTy->getNumElements() == 2 && - VecTy->getElementType()->isHalfType()); - return true; - }; - - auto CheckArgTypeIsCorrect = []( - Sema *S, Expr *Arg, QualType ExpectedType, - llvm::function_ref Check) -> bool { - QualType PassedType = Arg->getType(); - if (Check(PassedType)) { - if (auto *VecTyA = PassedType->getAs()) - ExpectedType = S->Context.getVectorType( - ExpectedType, VecTyA->getNumElements(), VecTyA->getVectorKind()); - S->Diag(Arg->getBeginLoc(), diag::err_typecheck_convert_incompatible) - << PassedType << ExpectedType << 1 << 0 << 0; - return true; - } - return false; - }; - - if(CheckArgTypeIsCorrect(&SemaRef, TheCall->getArg(0), - SemaRef.getASTContext().HalfTy, - checkHalfVectorOfSize2)) - return true; - if(CheckArgTypeIsCorrect(&SemaRef, TheCall->getArg(1), - SemaRef.getASTContext().HalfTy, - checkHalfVectorOfSize2)) - return true; - - if (!SemaRef.getASTContext().hasSameUnqualifiedType(TheCall->getArg(2)->getType(), SemaRef.getASTContext().FloatTy)) { - SemaRef.Diag(TheCall->getArg(2)->getBeginLoc(), diag::err_typecheck_convert_incompatible) - << TheCall->getArg(2)->getType() << SemaRef.getASTContext().FloatTy << 1 << 0 << 0; - return true; - } - - TheCall->setType(SemaRef.getASTContext().FloatTy); - break; - } case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: { if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall)) diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl index cea399f6028d2..8421c6c9b31be 100644 --- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl @@ -7,12 +7,72 @@ // Test basic lowering to runtime function call. +// CHECK-LABEL: define {{.*}}test float test(half2 p1, half2 p2, float p3) { - // CHECK-SPIRV: %[[MUL:.*]] = call {{.*}} half @llvm.spv.fdot.v2f16(<2 x half> %1, <2 x half> %2) - // CHECK-SPIRV: %[[CONVERT:.*]] = fpext {{.*}} half %[[MUL:.*]] to float - // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr, align 4 - // CHECK-SPIRV: %[[RES:.*]] = fadd {{.*}} float %[[CONVERT:.*]], %[[C:.*]] - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %0, <2 x half> %1, float %2) + // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %1, <2 x half> {{.*}} %2, float {{.*}} %3) #3 {{.*}} + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %0, <2 x half> {{.*}} %1, float {{.*}} %2) #2 // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } + +// CHECK-LABEL: define {{.*}}test_float_arg2_type +float test_float_arg2_type(half2 p1, float2 p2, float p3) { + // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %2 to <2 x half> + // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %1, <2 x half> {{.*}} %conv, float {{.*}} %3) #3 {{.*}} + // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %0, <2 x half> {{.*}} %conv, float {{.*}} %2) #2 + // CHECK: ret float %[[RES]] + return dot2add(p1, p2, p3); +} + +// CHECK-LABEL: define {{.*}}test_float_arg1_type +float test_float_arg1_type(float2 p1, half2 p2, float p3) { + // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> + // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %2, float {{.*}} %3) #3 {{.*}} + // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %0 to <2 x half> + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %1, float {{.*}} %2) #2 + // CHECK: ret float %[[RES]] + return dot2add(p1, p2, p3); +} + +// CHECK-LABEL: define {{.*}}test_double_arg3_type +float test_double_arg3_type(half2 p1, half2 p2, double p3) { + // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn double %3 to float + // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %1, <2 x half> {{.*}} %2, float {{.*}} %conv) #3 {{.*}} + // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn double %2 to float + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %0, <2 x half> {{.*}} %1, float {{.*}} %conv) #2 + // CHECK: ret float %[[RES]] + return dot2add(p1, p2, p3); +} + +// CHECK-LABEL: define {{.*}}test_float_arg1_arg2_type +float test_float_arg1_arg2_type(float2 p1, float2 p2, float p3) { + // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> + // CHECK-SPIRV: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %2 to <2 x half> + // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %conv1, float {{.*}} %3) #3 {{.*}} + // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %0 to <2 x half> + // CHECK-DXIL: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %conv1, float {{.*}} %2) #2 + // CHECK: ret float %[[RES]] + return dot2add(p1, p2, p3); +} + +// CHECK-LABEL: define {{.*}}test_int16_arg1_arg2_type +float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) { + // CHECK-SPIRV: %conv = sitofp <2 x i16> %1 to <2 x half> + // CHECK-SPIRV: %conv1 = sitofp <2 x i16> %2 to <2 x half> + // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %conv1, float {{.*}} %3) #3 {{.*}} + // CHECK-DXIL: %conv = sitofp <2 x i16> %0 to <2 x half> + // CHECK-DXIL: %conv1 = sitofp <2 x i16> %1 to <2 x half> + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %conv1, float {{.*}} %2) #2 + // CHECK: ret float %[[RES]] + return dot2add(p1, p2, p3); +} + +// CHECK-LABEL: define {{.*}}dot2add_impl +// CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %1, <2 x half> %2) +// CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float +// CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr, align 4 +// CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] +// CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %0, <2 x half> %1, float %2) +// CHECK: ret float %[[RES]] diff --git a/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl deleted file mode 100644 index 87bcb931aabe5..0000000000000 --- a/clang/test/SemaHLSL/BuiltIns/Dot2Add-errors.hlsl +++ /dev/null @@ -1,36 +0,0 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify - -float test_too_few_arg() { - return __builtin_hlsl_dot2add(); - // expected-error@-1 {{too few arguments to function call, expected 3, have 0}} -} - -float test_too_many_arg(half2 p1, half2 p2, float p3) { - return __builtin_hlsl_dot2add(p1, p2, p3, p1); - // expected-error@-1 {{too many arguments to function call, expected 3, have 4}} -} - -float test_float_arg2_type(half2 p1, float2 p2, float p3) { - return __builtin_hlsl_dot2add(p1, p2, p3); - // expected-error@-1 {{passing 'float2' (aka 'vector') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(half)))) half' (vector of 2 'half' values)}} -} - -float test_float_arg1_type(float2 p1, half2 p2, float p3) { - return __builtin_hlsl_dot2add(p1, p2, p3); - // expected-error@-1 {{passing 'float2' (aka 'vector') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(half)))) half' (vector of 2 'half' values)}} -} - -float test_double_arg3_type(half2 p1, half2 p2, double p3) { - return __builtin_hlsl_dot2add(p1, p2, p3); - // expected-error@-1 {{passing 'double' to parameter of incompatible type 'float'}} -} - -float test_float_arg1_arg2_type(float2 p1, float2 p2, float p3) { - return __builtin_hlsl_dot2add(p1, p2, p3); - // expected-error@-1 {{passing 'float2' (aka 'vector') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(half)))) half' (vector of 2 'half' values)}} -} - -float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) { - return __builtin_hlsl_dot2add(p1, p2, p3); - // expected-error@-1 {{passing 'int16_t2' (aka 'vector') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(half)))) half' (vector of 2 'half' values)}} -} diff --git a/clang/test/SemaHLSL/BuiltIns/dot2add-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/dot2add-errors.hlsl new file mode 100644 index 0000000000000..262ceecbf1d90 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/dot2add-errors.hlsl @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify + +float test_too_few_arg() { + return dot2add(); + // expected-error@-1 {{no matching function for call to 'dot2add'}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 0 were provided}} +} + +float test_too_many_arg(half2 p1, half2 p2, float p3) { + return dot2add(p1, p2, p3, p1); + // expected-error@-1 {{no matching function for call to 'dot2add'}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 4 were provided}} +} From c2b3fd364bee76023e46e7e455d99b2dcfea2d1f Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Thu, 27 Mar 2025 15:16:46 -0700 Subject: [PATCH 12/17] use _Float16 instead of __fp16 --- clang/include/clang/Basic/Builtins.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index adf8b8d96e3c8..bfc47639f25ed 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4894,7 +4894,7 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> { def HLSLDot2Add : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_dot2add"]; let Attributes = [NoThrow, Const]; - let Prototype = "float(_ExtVector<2, __fp16>,_ExtVector<2, __fp16>, float)"; + let Prototype = "float(_ExtVector<2, _Float16>,_ExtVector<2, _Float16>, float)"; } def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> { From be8520706c6e92a8097e0cffe5a416914a4e9254 Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Wed, 2 Apr 2025 14:11:14 -0700 Subject: [PATCH 13/17] WIP: Addressed PR feedback --- clang/lib/CodeGen/CGHLSLBuiltins.cpp | 12 ++++++++++++ clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 99c62808c323d..a48ef314eefe4 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -380,6 +380,18 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()), ArrayRef{Op0, Op1}, nullptr, "hlsl.dot"); } + case Builtin::BI__builtin_hlsl_dot2add: { + llvm::Triple::ArchType Arch = CGM.getTarget().getTriple().getArch(); + assert(Arch == llvm::Triple::dxil && "Intrinsic dot2add is only allowed for dxil architecture"); + Value *A = EmitScalarExpr(E->getArg(0)); + Value *B = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + + Intrinsic::ID ID = llvm ::Intrinsic::dx_dot2add; + return Builder.CreateIntrinsic( + /*ReturnType=*/C->getType(), ID, ArrayRef{A, B, C}, nullptr, + "dx.dot2add"); + } case Builtin::BI__builtin_hlsl_dot4add_i8packed: { Value *A = EmitScalarExpr(E->getArg(0)); Value *B = EmitScalarExpr(E->getArg(1)); diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl index 8421c6c9b31be..a37d44d538b38 100644 --- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl @@ -8,7 +8,7 @@ // Test basic lowering to runtime function call. // CHECK-LABEL: define {{.*}}test -float test(half2 p1, half2 p2, float p3) { +float test_default_parameter(half2 p1, half2 p2, float p3) { // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %1, <2 x half> {{.*}} %2, float {{.*}} %3) #3 {{.*}} // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %0, <2 x half> {{.*}} %1, float {{.*}} %2) #2 // CHECK: ret float %[[RES]] From 3accc04a6b49a2985be0a92cf5315bc67fb71df7 Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Wed, 2 Apr 2025 15:52:22 -0700 Subject: [PATCH 14/17] address pr feedback --- clang/lib/CodeGen/CGHLSLBuiltins.cpp | 3 +- clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 122 +++++++++++++++---- 2 files changed, 100 insertions(+), 25 deletions(-) diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index a48ef314eefe4..b5f620c220cbf 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -382,7 +382,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, } case Builtin::BI__builtin_hlsl_dot2add: { llvm::Triple::ArchType Arch = CGM.getTarget().getTriple().getArch(); - assert(Arch == llvm::Triple::dxil && "Intrinsic dot2add is only allowed for dxil architecture"); + assert(Arch == llvm::Triple::dxil && + "Intrinsic dot2add is only allowed for dxil architecture"); Value *A = EmitScalarExpr(E->getArg(0)); Value *B = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl index a37d44d538b38..d50fb56432f25 100644 --- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl @@ -1,26 +1,32 @@ // RUN: %clang_cc1 -finclude-default-header -fnative-half-type -triple \ -// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,CHECK-DXIL // RUN: %clang_cc1 -finclude-default-header -fnative-half-type -triple \ -// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: spirv-pc-vulkan-compute %s -emit-llvm -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV // Test basic lowering to runtime function call. -// CHECK-LABEL: define {{.*}}test -float test_default_parameter(half2 p1, half2 p2, float p3) { - // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %1, <2 x half> {{.*}} %2, float {{.*}} %3) #3 {{.*}} - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %0, <2 x half> {{.*}} %1, float {{.*}} %2) #2 +// CHECK-LABEL: define {{.*}}test_default_parameter_type +float test_default_parameter_type(half2 p1, half2 p2, float p3) { + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float + // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 + // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } // CHECK-LABEL: define {{.*}}test_float_arg2_type float test_float_arg2_type(half2 p1, float2 p2, float p3) { - // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %2 to <2 x half> - // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %1, <2 x half> {{.*}} %conv, float {{.*}} %3) #3 {{.*}} + // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %2 to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float + // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 + // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %0, <2 x half> {{.*}} %conv, float {{.*}} %2) #2 + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } @@ -28,9 +34,12 @@ float test_float_arg2_type(half2 p1, float2 p2, float p3) { // CHECK-LABEL: define {{.*}}test_float_arg1_type float test_float_arg1_type(float2 p1, half2 p2, float p3) { // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> - // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %2, float {{.*}} %3) #3 {{.*}} + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float + // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 + // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %0 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %1, float {{.*}} %2) #2 + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } @@ -38,9 +47,12 @@ float test_float_arg1_type(float2 p1, half2 p2, float p3) { // CHECK-LABEL: define {{.*}}test_double_arg3_type float test_double_arg3_type(half2 p1, half2 p2, double p3) { // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn double %3 to float - // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %1, <2 x half> {{.*}} %2, float {{.*}} %conv) #3 {{.*}} + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float + // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 + // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn double %2 to float - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %0, <2 x half> {{.*}} %1, float {{.*}} %conv) #2 + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } @@ -49,10 +61,28 @@ float test_double_arg3_type(half2 p1, half2 p2, double p3) { float test_float_arg1_arg2_type(float2 p1, float2 p2, float p3) { // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> // CHECK-SPIRV: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %2 to <2 x half> - // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %conv1, float {{.*}} %3) #3 {{.*}} + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float + // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 + // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %0 to <2 x half> // CHECK-DXIL: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %conv1, float {{.*}} %2) #2 + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK: ret float %[[RES]] + return dot2add(p1, p2, p3); +} + +// CHECK-LABEL: define {{.*}}test_double_arg1_arg2_type +float test_double_arg1_arg2_type(double2 p1, double2 p2, float p3) { + // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x double> %1 to <2 x half> + // CHECK-SPIRV: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x double> %2 to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float + // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 + // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] + // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x double> %0 to <2 x half> + // CHECK-DXIL: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x double> %1 to <2 x half> + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } @@ -61,18 +91,62 @@ float test_float_arg1_arg2_type(float2 p1, float2 p2, float p3) { float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) { // CHECK-SPIRV: %conv = sitofp <2 x i16> %1 to <2 x half> // CHECK-SPIRV: %conv1 = sitofp <2 x i16> %2 to <2 x half> - // CHECK-SPIRV: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %conv1, float {{.*}} %3) #3 {{.*}} + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float + // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 + // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] // CHECK-DXIL: %conv = sitofp <2 x i16> %0 to <2 x half> // CHECK-DXIL: %conv1 = sitofp <2 x i16> %1 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @_ZN4hlsl7dot2addEDv2_DhS0_f(<2 x half> {{.*}} %conv, <2 x half> {{.*}} %conv1, float {{.*}} %2) #2 + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK: ret float %[[RES]] + return dot2add(p1, p2, p3); +} + +// CHECK-LABEL: define {{.*}}test_int32_arg1_arg2_type +float test_int32_arg1_arg2_type(int32_t2 p1, int32_t2 p2, float p3) { + // CHECK-SPIRV: %conv = sitofp <2 x i32> %1 to <2 x half> + // CHECK-SPIRV: %conv1 = sitofp <2 x i32> %2 to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float + // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 + // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] + // CHECK-DXIL: %conv = sitofp <2 x i32> %0 to <2 x half> + // CHECK-DXIL: %conv1 = sitofp <2 x i32> %1 to <2 x half> + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } -// CHECK-LABEL: define {{.*}}dot2add_impl -// CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %1, <2 x half> %2) -// CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float -// CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr, align 4 -// CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] -// CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %0, <2 x half> %1, float %2) -// CHECK: ret float %[[RES]] +// CHECK-LABEL: define {{.*}}test_int64_arg1_arg2_type +float test_int64_arg1_arg2_type(int64_t2 p1, int64_t2 p2, float p3) { + // CHECK-SPIRV: %conv = sitofp <2 x i64> %1 to <2 x half> + // CHECK-SPIRV: %conv1 = sitofp <2 x i64> %2 to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float + // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 + // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] + // CHECK-DXIL: %conv = sitofp <2 x i64> %0 to <2 x half> + // CHECK-DXIL: %conv1 = sitofp <2 x i64> %1 to <2 x half> + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK: ret float %[[RES]] + return dot2add(p1, p2, p3); +} + +// CHECK-LABEL: define {{.*}}test_bool_arg1_arg2_type +float test_bool_arg1_arg2_type(bool2 p1, bool2 p2, float p3) { + // CHECK-SPIRV: %loadedv = trunc <2 x i32> %3 to <2 x i1> + // CHECK-SPIRV: %conv = uitofp <2 x i1> %loadedv to <2 x half> + // CHECK-SPIRV: %loadedv1 = trunc <2 x i32> %4 to <2 x i1> + // CHECK-SPIRV: %conv2 = uitofp <2 x i1> %loadedv1 to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %9, <2 x half> %10) + // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float + // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 + // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] + // CHECK-DXIL: %loadedv = trunc <2 x i32> %2 to <2 x i1> + // CHECK-DXIL: %conv = uitofp <2 x i1> %loadedv to <2 x half> + // CHECK-DXIL: %loadedv1 = trunc <2 x i32> %3 to <2 x i1> + // CHECK-DXIL: %conv2 = uitofp <2 x i1> %loadedv1 to <2 x half> + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %8, <2 x half> %9, float %10) + // CHECK: ret float %[[RES]] + return dot2add(p1, p2, p3); +} From c4fae7975e18e9c340bd371e03208a63e429cb7a Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Wed, 2 Apr 2025 16:13:49 -0700 Subject: [PATCH 15/17] remove specific register from expected output --- clang/test/CodeGenHLSL/builtins/dot2add.hlsl | 91 ++++++++------------ 1 file changed, 37 insertions(+), 54 deletions(-) diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl index d50fb56432f25..2464607dd636c 100644 --- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl @@ -9,144 +9,127 @@ // CHECK-LABEL: define {{.*}}test_default_parameter_type float test_default_parameter_type(half2 p1, half2 p2, float p3) { - // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}}) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } // CHECK-LABEL: define {{.*}}test_float_arg2_type float test_float_arg2_type(half2 p1, float2 p2, float p3) { - // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %2 to <2 x half> - // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %{{.*}} to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] - // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}}) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } // CHECK-LABEL: define {{.*}}test_float_arg1_type float test_float_arg1_type(float2 p1, half2 p2, float p3) { - // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> - // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %{{.*}} to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] - // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %0 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}}) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } // CHECK-LABEL: define {{.*}}test_double_arg3_type float test_double_arg3_type(half2 p1, half2 p2, double p3) { - // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn double %3 to float - // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK: %conv = fptrunc reassoc nnan ninf nsz arcp afn double %{{.*}} to float + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] - // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn double %2 to float - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}}) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } // CHECK-LABEL: define {{.*}}test_float_arg1_arg2_type float test_float_arg1_arg2_type(float2 p1, float2 p2, float p3) { - // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> - // CHECK-SPIRV: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %2 to <2 x half> - // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %{{.*}} to <2 x half> + // CHECK: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %{{.*}} to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] - // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %0 to <2 x half> - // CHECK-DXIL: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x float> %1 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}}) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } // CHECK-LABEL: define {{.*}}test_double_arg1_arg2_type float test_double_arg1_arg2_type(double2 p1, double2 p2, float p3) { - // CHECK-SPIRV: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x double> %1 to <2 x half> - // CHECK-SPIRV: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x double> %2 to <2 x half> - // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x double> %{{.*}} to <2 x half> + // CHECK: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x double> %{{.*}} to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] - // CHECK-DXIL: %conv = fptrunc reassoc nnan ninf nsz arcp afn <2 x double> %0 to <2 x half> - // CHECK-DXIL: %conv1 = fptrunc reassoc nnan ninf nsz arcp afn <2 x double> %1 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}}) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } // CHECK-LABEL: define {{.*}}test_int16_arg1_arg2_type float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) { - // CHECK-SPIRV: %conv = sitofp <2 x i16> %1 to <2 x half> - // CHECK-SPIRV: %conv1 = sitofp <2 x i16> %2 to <2 x half> - // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK: %conv = sitofp <2 x i16> %{{.*}} to <2 x half> + // CHECK: %conv1 = sitofp <2 x i16> %{{.*}} to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] - // CHECK-DXIL: %conv = sitofp <2 x i16> %0 to <2 x half> - // CHECK-DXIL: %conv1 = sitofp <2 x i16> %1 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}}) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } // CHECK-LABEL: define {{.*}}test_int32_arg1_arg2_type float test_int32_arg1_arg2_type(int32_t2 p1, int32_t2 p2, float p3) { - // CHECK-SPIRV: %conv = sitofp <2 x i32> %1 to <2 x half> - // CHECK-SPIRV: %conv1 = sitofp <2 x i32> %2 to <2 x half> - // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK: %conv = sitofp <2 x i32> %{{.*}} to <2 x half> + // CHECK: %conv1 = sitofp <2 x i32> %{{.*}} to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] - // CHECK-DXIL: %conv = sitofp <2 x i32> %0 to <2 x half> - // CHECK-DXIL: %conv1 = sitofp <2 x i32> %1 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}}) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } // CHECK-LABEL: define {{.*}}test_int64_arg1_arg2_type float test_int64_arg1_arg2_type(int64_t2 p1, int64_t2 p2, float p3) { - // CHECK-SPIRV: %conv = sitofp <2 x i64> %1 to <2 x half> - // CHECK-SPIRV: %conv1 = sitofp <2 x i64> %2 to <2 x half> - // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %7, <2 x half> %8) + // CHECK: %conv = sitofp <2 x i64> %{{.*}} to <2 x half> + // CHECK: %conv1 = sitofp <2 x i64> %{{.*}} to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] - // CHECK-DXIL: %conv = sitofp <2 x i64> %0 to <2 x half> - // CHECK-DXIL: %conv1 = sitofp <2 x i64> %1 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %6, <2 x half> %7, float %8) + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}}) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } // CHECK-LABEL: define {{.*}}test_bool_arg1_arg2_type float test_bool_arg1_arg2_type(bool2 p1, bool2 p2, float p3) { - // CHECK-SPIRV: %loadedv = trunc <2 x i32> %3 to <2 x i1> - // CHECK-SPIRV: %conv = uitofp <2 x i1> %loadedv to <2 x half> - // CHECK-SPIRV: %loadedv1 = trunc <2 x i32> %4 to <2 x i1> - // CHECK-SPIRV: %conv2 = uitofp <2 x i1> %loadedv1 to <2 x half> - // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %9, <2 x half> %10) + // CHECK: %loadedv = trunc <2 x i32> %{{.*}} to <2 x i1> + // CHECK: %conv = uitofp <2 x i1> %loadedv to <2 x half> + // CHECK: %loadedv1 = trunc <2 x i32> %{{.*}} to <2 x i1> + // CHECK: %conv2 = uitofp <2 x i1> %loadedv1 to <2 x half> + // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 // CHECK-SPIRV: %[[RES:.*]] = fadd reassoc nnan ninf nsz arcp afn float %[[CONV]], %[[C]] - // CHECK-DXIL: %loadedv = trunc <2 x i32> %2 to <2 x i1> - // CHECK-DXIL: %conv = uitofp <2 x i1> %loadedv to <2 x half> - // CHECK-DXIL: %loadedv1 = trunc <2 x i32> %3 to <2 x i1> - // CHECK-DXIL: %conv2 = uitofp <2 x i1> %loadedv1 to <2 x half> - // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %8, <2 x half> %9, float %10) + // CHECK-DXIL: %[[RES:.*]] = call {{.*}} float @llvm.dx.dot2add.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, float %{{.*}}) // CHECK: ret float %[[RES]] return dot2add(p1, p2, p3); } From e12c90336960eebdd7de3e4ccca20d4dbd455136 Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Thu, 3 Apr 2025 10:36:36 -0700 Subject: [PATCH 16/17] nit --- clang/include/clang/Basic/Builtins.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index bfc47639f25ed..c7ca607e4b3d2 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4894,7 +4894,7 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> { def HLSLDot2Add : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_dot2add"]; let Attributes = [NoThrow, Const]; - let Prototype = "float(_ExtVector<2, _Float16>,_ExtVector<2, _Float16>, float)"; + let Prototype = "float(_ExtVector<2, _Float16>, _ExtVector<2, _Float16>, float)"; } def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> { From 6d0eb98ccc0ebb2adf7deb28d59e593cce4bd375 Mon Sep 17 00:00:00 2001 From: Sumit Agarwal Date: Thu, 3 Apr 2025 12:44:46 -0700 Subject: [PATCH 17/17] Clang format --- clang/lib/CodeGen/CGHLSLBuiltins.cpp | 4 ++-- clang/lib/Headers/hlsl/hlsl_intrinsics.h | 2 +- llvm/lib/Target/DirectX/DXILOpLowering.cpp | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index b5f620c220cbf..07f6d0953f026 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -382,8 +382,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, } case Builtin::BI__builtin_hlsl_dot2add: { llvm::Triple::ArchType Arch = CGM.getTarget().getTriple().getArch(); - assert(Arch == llvm::Triple::dxil && - "Intrinsic dot2add is only allowed for dxil architecture"); + assert(Arch == llvm::Triple::dxil && + "Intrinsic dot2add is only allowed for dxil architecture"); Value *A = EmitScalarExpr(E->getArg(0)); Value *B = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index ed07428c0f12b..1a61fdba4fc19 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -181,7 +181,7 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR X, /// \fn float dot2add(half2 A, half2 B, float C) /// \brief Dot product of 2 vector of type half and add a float scalar value. -/// \param A The first input value to dot product. +/// \param A The first input value to dot product. /// \param B The second input value to dot product. /// \param C The input value added to the dot product. diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index cecd012011419..3dcd3d8fd244a 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -54,9 +54,8 @@ static SmallVector populateOperands(Value *Arg, IRBuilder<> &Builder) { return ExtractedElements; } -static SmallVector argVectorFlatten(CallInst *Orig, - IRBuilder<> &Builder, - unsigned NumOperands) { +static SmallVector +argVectorFlatten(CallInst *Orig, IRBuilder<> &Builder, unsigned NumOperands) { assert(NumOperands > 0); Value *Arg0 = Orig->getOperand(0); [[maybe_unused]] auto *VecArg0 = dyn_cast(Arg0->getType()); @@ -77,7 +76,7 @@ static SmallVector argVectorFlatten(CallInst *Orig, static SmallVector argVectorFlatten(CallInst *Orig, IRBuilder<> &Builder) { // Note: arg[NumOperands-1] is a pointer and is not needed by our flattening. - return argVectorFlatten(Orig, Builder, Orig->getNumOperands() - 1); + return argVectorFlatten(Orig, Builder, Orig->getNumOperands() - 1); } namespace { @@ -175,7 +174,8 @@ class OpLowerer { Args = argVectorFlatten(CI, OpBuilder.getIRB()); } else if (F.getIntrinsicID() == Intrinsic::dx_dot2add) { // arg[NumOperands-1] is a pointer and is not needed by our flattening. - // arg[NumOperands-2] also does not need to be flattened because it is a scalar. + // arg[NumOperands-2] also does not need to be flattened because it is a + // scalar. unsigned NumOperands = CI->getNumOperands() - 2; Args.push_back(CI->getArgOperand(NumOperands)); Args.append(argVectorFlatten(CI, OpBuilder.getIRB(), NumOperands));