-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[HLSL] [DXIL] Implement the AddUint64 HLSL function and the UAddc DXIL op #127137
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
- Defines the AddUint64 HLSL builtin function - Implements the UAddc DXIL op to lower AddUint64 to DXIL
When the input args are of type uint4, uses the vec2 variant of llvm.uadd.with.overflow to sum the low words of both args.
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang-codegen Author: Deric Cheung (Icohedron) ChangesFixes #99205.
Notes:
Patch is 25.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127137.diff 13 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 29939242596ba..2433427a89429 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4753,6 +4753,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
}
// HLSL
+def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_adduint64"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_getpointer"];
let Attributes = [NoThrow];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 2fce5e88ba8a0..e78339ee924ff 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10655,6 +10655,8 @@ def err_second_argument_to_cwsc_not_pointer : Error<
def err_vector_incorrect_num_elements : Error<
"%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
+def err_invalid_even_odd_vector_element_count : Error<
+ "invalid element count of %0 in vector %select{initialization|operand}4 (expected an %select{even|odd}3 element count in the range of %1 and %2)">;
def err_altivec_empty_initializer : Error<"expected initializer">;
def err_invalid_neon_type_code : Error<
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 361e4c4bf2e2e..0fe8cf5179b53 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19445,6 +19445,70 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return nullptr;
switch (BuiltinID) {
+ case Builtin::BI__builtin_hlsl_adduint64: {
+ Value *OpA = EmitScalarExpr(E->getArg(0));
+ Value *OpB = EmitScalarExpr(E->getArg(1));
+ assert(E->getArg(0)->getType()->hasIntegerRepresentation() &&
+ E->getArg(1)->getType()->hasIntegerRepresentation() &&
+ "AddUint64 operands must have an integer representation");
+ assert(((E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+ 2 &&
+ E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ 2) ||
+ (E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+ 4 &&
+ E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ 4)) &&
+ "input vectors must have 2 or 4 elements each");
+
+ uint64_t NumElements =
+ E->getArg(0)->getType()->castAs<VectorType>()->getNumElements();
+
+ llvm::Value *Result = PoisonValue::get(OpA->getType());
+ llvm::Value *LowA;
+ llvm::Value *HighA;
+ llvm::Value *LowB;
+ llvm::Value *HighB;
+
+ // Obtain low and high words of inputs A and B
+ if (NumElements == 2) {
+ LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA");
+ HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA");
+ LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB");
+ HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB");
+ } else {
+ LowA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{0, 2}, "LowA");
+ HighA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{1, 3}, "HighA");
+ LowB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{0, 2}, "LowB");
+ HighB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{1, 3}, "HighB");
+ }
+
+ // Use an uadd_with_overflow to compute the sum of low words and obtain a
+ // carry value
+ llvm::Value *Carry;
+ llvm::Value *LowSum = EmitOverflowIntrinsic(
+ *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
+ llvm::Value *ZExtCarry =
+ Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt");
+
+ // Sum the high words and the carry
+ llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum");
+ llvm::Value *HighSumPlusCarry =
+ Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry");
+
+ // Insert the low and high word sums into the result vector
+ if (NumElements == 2) {
+ Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
+ "hlsl.AddUint64.upto0");
+ Result = Builder.CreateInsertElement(Result, HighSumPlusCarry,
+ (uint64_t)1, "hlsl.AddUint64");
+ } else { /* NumElements == 4 */
+ Result = Builder.CreateShuffleVector(LowSum, HighSumPlusCarry,
+ ArrayRef<int>{0, 2, 1, 3},
+ "hlsl.AddUint64");
+ }
+ return Result;
+ }
case Builtin::BI__builtin_hlsl_resource_getpointer: {
Value *HandleOp = EmitScalarExpr(E->getArg(0));
Value *IndexOp = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index d1f5fdff8b600..513639ed1b81d 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -138,6 +138,27 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos)
float4 acos(float4);
//===----------------------------------------------------------------------===//
+// AddUint64 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T AddUint64(T a, T b)
+/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned
+/// 32-bit integers.
+/// \param x [in] The first unsigned 32-bit integer pair(s)
+/// \param y [in] The second unsigned 32-bit integer pair(s)
+///
+/// This function takes one or two pairs (low, high) of unsigned 32-bit integer
+/// values and returns pairs (low, high) of unsigned 32-bit integer
+/// values representing the result of unsigned 64-bit integer addition.
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t2 AddUint64(uint32_t2, uint32_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t4 AddUint64(uint32_t4, uint32_t4);
+
+// //===----------------------------------------------------------------------===//
// all builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 4abd870ad6aaa..99eb5360ec356 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2038,6 +2038,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
checkAllFloatTypes);
}
+static bool CheckUnsignedIntRepresentations(Sema *S, CallExpr *TheCall) {
+ auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool {
+ clang::QualType BaseType =
+ PassedType->isVectorType()
+ ? PassedType->getAs<clang::VectorType>()->getElementType()
+ : PassedType;
+ return !BaseType->isUnsignedIntegerType();
+ };
+ return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
+ checkUnsignedInteger);
+}
+
static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
clang::QualType BaseType =
@@ -2229,6 +2241,41 @@ static bool CheckResourceHandle(
// returning an ExprError
bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
switch (BuiltinID) {
+ case Builtin::BI__builtin_hlsl_adduint64: {
+ if (SemaRef.checkArgCount(TheCall, 2))
+ return true;
+ if (CheckVectorElementCallArgs(&SemaRef, TheCall))
+ return true;
+ if (CheckUnsignedIntRepresentations(&SemaRef, TheCall))
+ return true;
+
+ // CheckVectorElementCallArgs(...) guarantees both args are the same type.
+ assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() &&
+ "Both args must be of the same type");
+
+ // ensure both args are vectors
+ auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
+ if (!VTy) {
+ SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector)
+ << "AddUint64" << /*all*/ 1;
+ return true;
+ }
+
+ // ensure both args have 2 elements, or both args have 4 elements
+ int NumElementsArg = VTy->getNumElements();
+ if (NumElementsArg != 2 && NumElementsArg != 4) {
+ SemaRef.Diag(TheCall->getBeginLoc(),
+ diag::err_invalid_even_odd_vector_element_count)
+ << NumElementsArg << 2 << 4 << /*even*/ 0 << /*operand*/ 1;
+ return true;
+ }
+
+ ExprResult A = TheCall->getArg(0);
+ QualType ArgTyA = A.get()->getType();
+ // return type is the same as the input type
+ TheCall->setType(ArgTyA);
+ break;
+ }
case Builtin::BI__builtin_hlsl_resource_getpointer: {
if (SemaRef.checkArgCount(TheCall, 2) ||
CheckResourceHandle(&SemaRef, TheCall, 0) ||
diff --git a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
new file mode 100644
index 0000000000000..e1832bdbbf33f
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
@@ -0,0 +1,58 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
+// RUN: -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK
+
+
+// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[LOWA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
+// CHECK-NEXT: [[HIGHA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
+// CHECK-NEXT: [[LOWB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
+// CHECK-NEXT: [[HIGHB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
+// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LOWA]], i32 [[LOWB]])
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext i1 [[TMP3]] to i32
+// CHECK-NEXT: [[HIGHSUM:%.*]] = add i32 [[HIGHA]], [[HIGHB]]
+// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add i32 [[HIGHSUM]], [[CARRYZEXT]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HIGHSUMPLUSCARRY]], i64 1
+// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]]
+//
+uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
+ return AddUint64(a, b);
+}
+
+// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT: [[LOWA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[HIGHA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[LOWB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[HIGHB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[LOWA]], <2 x i32> [[LOWB]])
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 0
+// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[HIGHSUM:%.*]] = add <2 x i32> [[HIGHA]], [[HIGHB]]
+// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add <2 x i32> [[HIGHSUM]], [[CARRYZEXT]]
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[HIGHSUMPLUSCARRY]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]]
+//
+uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
+ return AddUint64(a, b);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
new file mode 100644
index 0000000000000..ec9d026bb6fe7
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+
+uint2 test_too_few_arg() {
+ return __builtin_hlsl_adduint64();
+ // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+}
+
+uint4 test_too_many_arg(uint4 a) {
+ return __builtin_hlsl_adduint64(a, a, a);
+ // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+uint2 test_mismatched_arg_types(uint2 a, uint4 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}}
+}
+
+uint2 test_bad_num_arg_elements(uint3 a, uint3 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error@-1 {{invalid element count of 3 in vector operand (expected an even element count in the range of 2 and 4)}}
+}
+
+uint2 test_scalar_arg_type(uint a) {
+ return __builtin_hlsl_adduint64(a, a);
+ // expected-error@-1 {{all arguments to AddUint64 must be vectors}}
+}
+
+uint2 test_signed_integer_args(int2 a, int2 b) {
+ return __builtin_hlsl_adduint64(a, b);
+// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}}
+}
+
+struct S {
+ uint2 a;
+};
+
+uint2 test_incorrect_arg_type(S a) {
+ return __builtin_hlsl_adduint64(a, a);
+ // expected-error@-1 {{passing 'S' to parameter of incompatible type 'unsigned int'}}
+}
+
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 7cb841d9bd5b5..2f6b4d676edfd 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -50,6 +50,7 @@ def HandleTy : DXILOpParamType;
def ResBindTy : DXILOpParamType;
def ResPropsTy : DXILOpParamType;
def SplitDoubleTy : DXILOpParamType;
+def BinaryWithCarryTy : DXILOpParamType;
class DXILOpClass;
@@ -738,6 +739,18 @@ def UMin : DXILOp<40, binary> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
+def UAddc : DXILOp<44, binaryWithCarryOrBorrow > {
+ let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = (a+b, a+b overflowed ? 1 : 0)";
+ // TODO: This `let intrinsics = ...` line may be uncommented when
+ // https://github.com/llvm/llvm-project/issues/113192 is fixed
+ // let intrinsics = [IntrinSelect<int_uadd_with_overflow>];
+ let arguments = [OverloadTy, OverloadTy];
+ let result = BinaryWithCarryTy;
+ let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+ let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
def FMad : DXILOp<46, tertiary> {
let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
"* a + b.";
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index badd5aabd6432..f0f1bbabb6b23 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -230,6 +230,14 @@ static StructType *getSplitDoubleType(LLVMContext &Context) {
return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble");
}
+static StructType *getBinaryWithCarryType(LLVMContext &Context) {
+ if (auto *ST = StructType::getTypeByName(Context, "dx.types.i32c"))
+ return ST;
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ Type *Int1Ty = Type::getInt1Ty(Context);
+ return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
+}
+
static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
Type *OverloadTy) {
switch (Kind) {
@@ -273,6 +281,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getResPropsType(Ctx);
case OpParamType::SplitDoubleTy:
return getSplitDoubleType(Ctx);
+ case OpParamType::BinaryWithCarryTy:
+ return getBinaryWithCarryType(Ctx);
}
llvm_unreachable("Invalid parameter kind");
return nullptr;
@@ -539,6 +549,10 @@ StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext &Context) {
return ::getSplitDoubleType(Context);
}
+StructType *DXILOpBuilder::getBinaryWithCarryType(LLVMContext &Context) {
+ return ::getBinaryWithCarryType(Context);
+}
+
StructType *DXILOpBuilder::getHandleType() {
return ::getHandleType(IRB.getContext());
}
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h
index df5a0240870f4..8e13b87a2be10 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.h
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h
@@ -53,6 +53,9 @@ class DXILOpBuilder {
/// Get the `%dx.types.splitdouble` type.
StructType *getSplitDoubleType(LLVMContext &Context);
+ /// Get the `%dx.types.i32c` type.
+ StructType *getBinaryWithCarryType(LLVMContext &Context);
+
/// Get the `%dx.types.Handle` type.
StructType *getHandleType();
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 0c245c1a43d31..c9e3d7e284963 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -359,17 +359,16 @@ class OpLowerer {
return lowerToBindAndAnnotateHandle(F);
}
- Error replaceSplitDoubleCallUsages(CallInst *Intrin, CallInst *Op) {
+ Error replaceAggregateTypeOfCallUsages(CallInst *Intrin, CallInst *Op) {
for (Use &U : make_early_inc_range(Intrin->uses())) {
if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
-
- if (EVI->getNumIndices() != 1)
- return createStringError(std::errc::invalid_argument,
- "Splitdouble has only 2 elements");
EVI->setOperand(0, Op);
+ } else if (auto *IVI = dyn_cast<InsertValueInst>(U.getUser())) {
+ IVI->setOperand(0, Op);
} else {
return make_error<StringError>(
- "Splitdouble use is not ExtractValueInst",
+ (Intrin->getCalledFunction()->getName() +
+ " use is not a ExtractValueInst or InsertValueInst"),
inconvertibleErrorCode());
}
}
@@ -821,7 +820,16 @@ class OpLowerer {
F, OpCode::SplitDouble,
OpBuilder.getSplitDoubleType(M.getContext()),
[&](CallInst *CI, CallInst *Op) {
- return replaceSplitDoubleCallUsages(CI, Op);
+ return replaceAggregateTypeOfCallUsages(CI, Op);
+ });
+ break;
+ // TODO: this can be removed when
+ // https://github.com/llvm/llvm-project/issues/113192 is fixed
+ case Intrinsic::uadd_with_overflow:
+ HasErrors |= replaceFunctionWithNamedStructOp(
+ F, OpCode::UAddc, OpBuilder.getBinaryWithCarryType(M.getContext()),
+ [&](CallInst *CI, CallInst *Op) {
+ return replaceAggregateTypeOfCallUsages(CI, Op);
});
break;
case Int...
[truncated]
|
@llvm/pr-subscribers-clang Author: Deric Cheung (Icohedron) ChangesFixes #99205.
Notes:
Patch is 25.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127137.diff 13 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 29939242596ba..2433427a89429 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4753,6 +4753,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
}
// HLSL
+def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_adduint64"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_getpointer"];
let Attributes = [NoThrow];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 2fce5e88ba8a0..e78339ee924ff 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10655,6 +10655,8 @@ def err_second_argument_to_cwsc_not_pointer : Error<
def err_vector_incorrect_num_elements : Error<
"%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
+def err_invalid_even_odd_vector_element_count : Error<
+ "invalid element count of %0 in vector %select{initialization|operand}4 (expected an %select{even|odd}3 element count in the range of %1 and %2)">;
def err_altivec_empty_initializer : Error<"expected initializer">;
def err_invalid_neon_type_code : Error<
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 361e4c4bf2e2e..0fe8cf5179b53 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19445,6 +19445,70 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return nullptr;
switch (BuiltinID) {
+ case Builtin::BI__builtin_hlsl_adduint64: {
+ Value *OpA = EmitScalarExpr(E->getArg(0));
+ Value *OpB = EmitScalarExpr(E->getArg(1));
+ assert(E->getArg(0)->getType()->hasIntegerRepresentation() &&
+ E->getArg(1)->getType()->hasIntegerRepresentation() &&
+ "AddUint64 operands must have an integer representation");
+ assert(((E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+ 2 &&
+ E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ 2) ||
+ (E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+ 4 &&
+ E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ 4)) &&
+ "input vectors must have 2 or 4 elements each");
+
+ uint64_t NumElements =
+ E->getArg(0)->getType()->castAs<VectorType>()->getNumElements();
+
+ llvm::Value *Result = PoisonValue::get(OpA->getType());
+ llvm::Value *LowA;
+ llvm::Value *HighA;
+ llvm::Value *LowB;
+ llvm::Value *HighB;
+
+ // Obtain low and high words of inputs A and B
+ if (NumElements == 2) {
+ LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA");
+ HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA");
+ LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB");
+ HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB");
+ } else {
+ LowA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{0, 2}, "LowA");
+ HighA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{1, 3}, "HighA");
+ LowB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{0, 2}, "LowB");
+ HighB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{1, 3}, "HighB");
+ }
+
+ // Use an uadd_with_overflow to compute the sum of low words and obtain a
+ // carry value
+ llvm::Value *Carry;
+ llvm::Value *LowSum = EmitOverflowIntrinsic(
+ *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
+ llvm::Value *ZExtCarry =
+ Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt");
+
+ // Sum the high words and the carry
+ llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum");
+ llvm::Value *HighSumPlusCarry =
+ Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry");
+
+ // Insert the low and high word sums into the result vector
+ if (NumElements == 2) {
+ Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
+ "hlsl.AddUint64.upto0");
+ Result = Builder.CreateInsertElement(Result, HighSumPlusCarry,
+ (uint64_t)1, "hlsl.AddUint64");
+ } else { /* NumElements == 4 */
+ Result = Builder.CreateShuffleVector(LowSum, HighSumPlusCarry,
+ ArrayRef<int>{0, 2, 1, 3},
+ "hlsl.AddUint64");
+ }
+ return Result;
+ }
case Builtin::BI__builtin_hlsl_resource_getpointer: {
Value *HandleOp = EmitScalarExpr(E->getArg(0));
Value *IndexOp = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index d1f5fdff8b600..513639ed1b81d 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -138,6 +138,27 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos)
float4 acos(float4);
//===----------------------------------------------------------------------===//
+// AddUint64 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T AddUint64(T a, T b)
+/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned
+/// 32-bit integers.
+/// \param x [in] The first unsigned 32-bit integer pair(s)
+/// \param y [in] The second unsigned 32-bit integer pair(s)
+///
+/// This function takes one or two pairs (low, high) of unsigned 32-bit integer
+/// values and returns pairs (low, high) of unsigned 32-bit integer
+/// values representing the result of unsigned 64-bit integer addition.
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t2 AddUint64(uint32_t2, uint32_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t4 AddUint64(uint32_t4, uint32_t4);
+
+// //===----------------------------------------------------------------------===//
// all builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 4abd870ad6aaa..99eb5360ec356 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2038,6 +2038,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
checkAllFloatTypes);
}
+static bool CheckUnsignedIntRepresentations(Sema *S, CallExpr *TheCall) {
+ auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool {
+ clang::QualType BaseType =
+ PassedType->isVectorType()
+ ? PassedType->getAs<clang::VectorType>()->getElementType()
+ : PassedType;
+ return !BaseType->isUnsignedIntegerType();
+ };
+ return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
+ checkUnsignedInteger);
+}
+
static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
clang::QualType BaseType =
@@ -2229,6 +2241,41 @@ static bool CheckResourceHandle(
// returning an ExprError
bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
switch (BuiltinID) {
+ case Builtin::BI__builtin_hlsl_adduint64: {
+ if (SemaRef.checkArgCount(TheCall, 2))
+ return true;
+ if (CheckVectorElementCallArgs(&SemaRef, TheCall))
+ return true;
+ if (CheckUnsignedIntRepresentations(&SemaRef, TheCall))
+ return true;
+
+ // CheckVectorElementCallArgs(...) guarantees both args are the same type.
+ assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() &&
+ "Both args must be of the same type");
+
+ // ensure both args are vectors
+ auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
+ if (!VTy) {
+ SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector)
+ << "AddUint64" << /*all*/ 1;
+ return true;
+ }
+
+ // ensure both args have 2 elements, or both args have 4 elements
+ int NumElementsArg = VTy->getNumElements();
+ if (NumElementsArg != 2 && NumElementsArg != 4) {
+ SemaRef.Diag(TheCall->getBeginLoc(),
+ diag::err_invalid_even_odd_vector_element_count)
+ << NumElementsArg << 2 << 4 << /*even*/ 0 << /*operand*/ 1;
+ return true;
+ }
+
+ ExprResult A = TheCall->getArg(0);
+ QualType ArgTyA = A.get()->getType();
+ // return type is the same as the input type
+ TheCall->setType(ArgTyA);
+ break;
+ }
case Builtin::BI__builtin_hlsl_resource_getpointer: {
if (SemaRef.checkArgCount(TheCall, 2) ||
CheckResourceHandle(&SemaRef, TheCall, 0) ||
diff --git a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
new file mode 100644
index 0000000000000..e1832bdbbf33f
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
@@ -0,0 +1,58 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
+// RUN: -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK
+
+
+// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[LOWA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
+// CHECK-NEXT: [[HIGHA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
+// CHECK-NEXT: [[LOWB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
+// CHECK-NEXT: [[HIGHB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
+// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LOWA]], i32 [[LOWB]])
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext i1 [[TMP3]] to i32
+// CHECK-NEXT: [[HIGHSUM:%.*]] = add i32 [[HIGHA]], [[HIGHB]]
+// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add i32 [[HIGHSUM]], [[CARRYZEXT]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HIGHSUMPLUSCARRY]], i64 1
+// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]]
+//
+uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
+ return AddUint64(a, b);
+}
+
+// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT: [[LOWA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[HIGHA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[LOWB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+// CHECK-NEXT: [[HIGHB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+// CHECK-NEXT: [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[LOWA]], <2 x i32> [[LOWB]])
+// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 1
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 0
+// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
+// CHECK-NEXT: [[HIGHSUM:%.*]] = add <2 x i32> [[HIGHA]], [[HIGHB]]
+// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add <2 x i32> [[HIGHSUM]], [[CARRYZEXT]]
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[HIGHSUMPLUSCARRY]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]]
+//
+uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
+ return AddUint64(a, b);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
new file mode 100644
index 0000000000000..ec9d026bb6fe7
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+
+uint2 test_too_few_arg() {
+ return __builtin_hlsl_adduint64();
+ // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+}
+
+uint4 test_too_many_arg(uint4 a) {
+ return __builtin_hlsl_adduint64(a, a, a);
+ // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+uint2 test_mismatched_arg_types(uint2 a, uint4 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}}
+}
+
+uint2 test_bad_num_arg_elements(uint3 a, uint3 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error@-1 {{invalid element count of 3 in vector operand (expected an even element count in the range of 2 and 4)}}
+}
+
+uint2 test_scalar_arg_type(uint a) {
+ return __builtin_hlsl_adduint64(a, a);
+ // expected-error@-1 {{all arguments to AddUint64 must be vectors}}
+}
+
+uint2 test_signed_integer_args(int2 a, int2 b) {
+ return __builtin_hlsl_adduint64(a, b);
+// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}}
+}
+
+struct S {
+ uint2 a;
+};
+
+uint2 test_incorrect_arg_type(S a) {
+ return __builtin_hlsl_adduint64(a, a);
+ // expected-error@-1 {{passing 'S' to parameter of incompatible type 'unsigned int'}}
+}
+
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 7cb841d9bd5b5..2f6b4d676edfd 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -50,6 +50,7 @@ def HandleTy : DXILOpParamType;
def ResBindTy : DXILOpParamType;
def ResPropsTy : DXILOpParamType;
def SplitDoubleTy : DXILOpParamType;
+def BinaryWithCarryTy : DXILOpParamType;
class DXILOpClass;
@@ -738,6 +739,18 @@ def UMin : DXILOp<40, binary> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
+def UAddc : DXILOp<44, binaryWithCarryOrBorrow > {
+ let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = (a+b, a+b overflowed ? 1 : 0)";
+ // TODO: This `let intrinsics = ...` line may be uncommented when
+ // https://github.com/llvm/llvm-project/issues/113192 is fixed
+ // let intrinsics = [IntrinSelect<int_uadd_with_overflow>];
+ let arguments = [OverloadTy, OverloadTy];
+ let result = BinaryWithCarryTy;
+ let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+ let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
def FMad : DXILOp<46, tertiary> {
let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
"* a + b.";
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index badd5aabd6432..f0f1bbabb6b23 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -230,6 +230,14 @@ static StructType *getSplitDoubleType(LLVMContext &Context) {
return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble");
}
+static StructType *getBinaryWithCarryType(LLVMContext &Context) {
+ if (auto *ST = StructType::getTypeByName(Context, "dx.types.i32c"))
+ return ST;
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ Type *Int1Ty = Type::getInt1Ty(Context);
+ return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
+}
+
static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
Type *OverloadTy) {
switch (Kind) {
@@ -273,6 +281,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getResPropsType(Ctx);
case OpParamType::SplitDoubleTy:
return getSplitDoubleType(Ctx);
+ case OpParamType::BinaryWithCarryTy:
+ return getBinaryWithCarryType(Ctx);
}
llvm_unreachable("Invalid parameter kind");
return nullptr;
@@ -539,6 +549,10 @@ StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext &Context) {
return ::getSplitDoubleType(Context);
}
+StructType *DXILOpBuilder::getBinaryWithCarryType(LLVMContext &Context) {
+ return ::getBinaryWithCarryType(Context);
+}
+
StructType *DXILOpBuilder::getHandleType() {
return ::getHandleType(IRB.getContext());
}
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h
index df5a0240870f4..8e13b87a2be10 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.h
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h
@@ -53,6 +53,9 @@ class DXILOpBuilder {
/// Get the `%dx.types.splitdouble` type.
StructType *getSplitDoubleType(LLVMContext &Context);
+ /// Get the `%dx.types.i32c` type.
+ StructType *getBinaryWithCarryType(LLVMContext &Context);
+
/// Get the `%dx.types.Handle` type.
StructType *getHandleType();
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 0c245c1a43d31..c9e3d7e284963 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -359,17 +359,16 @@ class OpLowerer {
return lowerToBindAndAnnotateHandle(F);
}
- Error replaceSplitDoubleCallUsages(CallInst *Intrin, CallInst *Op) {
+ Error replaceAggregateTypeOfCallUsages(CallInst *Intrin, CallInst *Op) {
for (Use &U : make_early_inc_range(Intrin->uses())) {
if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
-
- if (EVI->getNumIndices() != 1)
- return createStringError(std::errc::invalid_argument,
- "Splitdouble has only 2 elements");
EVI->setOperand(0, Op);
+ } else if (auto *IVI = dyn_cast<InsertValueInst>(U.getUser())) {
+ IVI->setOperand(0, Op);
} else {
return make_error<StringError>(
- "Splitdouble use is not ExtractValueInst",
+ (Intrin->getCalledFunction()->getName() +
+ " use is not a ExtractValueInst or InsertValueInst"),
inconvertibleErrorCode());
}
}
@@ -821,7 +820,16 @@ class OpLowerer {
F, OpCode::SplitDouble,
OpBuilder.getSplitDoubleType(M.getContext()),
[&](CallInst *CI, CallInst *Op) {
- return replaceSplitDoubleCallUsages(CI, Op);
+ return replaceAggregateTypeOfCallUsages(CI, Op);
+ });
+ break;
+ // TODO: this can be removed when
+ // https://github.com/llvm/llvm-project/issues/113192 is fixed
+ case Intrinsic::uadd_with_overflow:
+ HasErrors |= replaceFunctionWithNamedStructOp(
+ F, OpCode::UAddc, OpBuilder.getBinaryWithCarryType(M.getContext()),
+ [&](CallInst *CI, CallInst *Op) {
+ return replaceAggregateTypeOfCallUsages(CI, Op);
});
break;
case Int...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just copying over some comments from the previous pr that might not be completely resolved for new reviewers to consider.
@@ -19445,6 +19445,70 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, | |||
return nullptr; | |||
|
|||
switch (BuiltinID) { | |||
case Builtin::BI__builtin_hlsl_adduint64: { | |||
Value *OpA = EmitScalarExpr(E->getArg(0)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Forwarding: #125319 (comment)
… count check
Addresses Justin Bogner's PR comments regarding assertions and the return. Co-authored-by: Justin Bogner <[email protected]>
✅ With the latest revision this PR passed the C/C++ code formatter. |
We can simplify the op lowering part a bit if we do #128247 first. |
Justin's change merged this morning. After you rebase I'll take a look. |
2502538
to
e2962b1
Compare
e2962b1
to
72b404b
Compare
The SPIRV test case isn't sufficent. Two reasons.
You can add a |
@@ -738,6 +739,16 @@ def UMin : DXILOp<40, binary> { | |||
let attributes = [Attributes<DXIL1_0, [ReadNone]>]; | |||
} | |||
|
|||
def UAddc : DXILOp<44, binaryWithCarryOrBorrow > { | |||
let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = (a+b, a+b overflowed ? 1 : 0)"; | |||
let intrinsics = [IntrinSelect<int_uadd_with_overflow>]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One thing I am a little worried about is if we are misusing int_uadd_with_overflow
here. This kind feels like a square is a rectangle but rectangles are not squares cases where llvm is using int_uadd_with_overflow
much more broadly than we are. It makes me think If something else (ie a different frontend targeting directX) were to come along and emit int_uadd_with_overflow
and it wasn't a uint32_t2
or uint32_t4
case would we be doing the right thing by associating the overflow intrinsic with UAddc
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure I understand the concern. UAddc
and int_uadd_with_overflow
are equivalent operations on inputs of i32
type. Vectors of i32
must be scalarized to be lowered to UAddc
.
Other types accepted by int_uadd_with_overflow
(i16
and i64
scalars or vectors) are not currently able to be lowered to DXIL. Attempting to lower to DXIL results in an error like the following: error: <unknown>:0:0: in function uaddci16 i16 (i16, i16): Cannot create UAddc operation: Invalid overload type
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is the AddUint64
HLSL function that only accepts uint32_t2
and uint32_t4
to perform a 64-bit integer add operation. AddUint64
lowers into several instructions, one (or two) of which is the int_uadd_with_overflow
intrinsic used to compute an i32 add with carry, which is the same exact operation performed by UAddc
.
It should be no issue for some other frontend to emit int_uadd_with_overflow
. It will get scalarized and lowered to UAddc
for i32
inputs and be the same behavior.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i16
and i64
could be supported by UAddc
as well with minimal effort, but I'm not sure if that would conform to spec. It would, however, still be equivalent to int_uadd_with_overflow
behavior.
I think it's the driver compiler that would interpret the DXIL? I don't know if driver compilers would handle UAddc
with i16
or i64
inputs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We would need to rev the DXIL version to introduce UAddc for other types I think. I think Farzon's concern is that we might want to (at some point) handle int_uadd_with_overflow
generically, and since there isn't a DXIL op for it that would presumably involve an expansion. If we use the 32 bit int_uadd_with_overflow
to map to UAddc, then whatever pass did that expansion would need to know not to do it for certain overloads and that might be awkward.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Whatever does the lowering for int_uadd_with_overflow
generically would still need to know that the 32-bit int_uadd_with_overflow
maps directly to the UAddc
DXIL op in order to codegen optimally, as opposed to creating some expansion that does the same thing using more instructions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't a nice tablegen solution, but we have the power to check if we have a non 32 bit case in DXILIntrinsicExpansion.cpp
. It would be a little bit of a new pattern for us because it would be the first time we check an intrinsic and might not apply a transformation.
Downside is it assumes the DXIL version doesn't update UAddc for other types and if it ever did we would have to introduces some versioning to only do this for pre DXIL 1.6.x.
Upside it lets you keep the direct mapping of int_uadd_with_overflow
to UAddc
and only modify for the expansion cases.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All that said I don't think a solution here is pressing atm.
We could alternatively track this as technical debt we need to revisit as part of fully supporting *_with_overflow
intrinsics. I wanted to use these for DXIL ops documented here: #128638
That will give us time to develop a more complete solution.
I added I couldn't add the spirv target to the existing |
; CHECK: %[[#lowsum:]] = OpCompositeExtract %[[#int_32]] %[[#iaddcarry]] 0 | ||
; CHECK: %[[#carry:]] = OpCompositeExtract %[[#int_32]] %[[#iaddcarry]] 1 | ||
; CHECK: %[[#carry_ne0:]] = OpINotEqual %[[#bool]] %[[#carry]] %[[#const_i32_0]] | ||
; CHECK: %[[#select_1_or_0:]] = OpSelect %[[#int_32]] %[[#carry_ne0]] %[[#const_i32_1]] %[[#const_i32_0]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I switch -O0
to -O1
or higher, this OpSelect
applied to the carry value still does not go away. It seems that the SPIRV backend doesn't know OpIAddCarry
's second return value is always an i32 0 or 1.
8930b4a
to
fd356da
Compare
@@ -738,6 +739,16 @@ def UMin : DXILOp<40, binary> { | |||
let attributes = [Attributes<DXIL1_0, [ReadNone]>]; | |||
} | |||
|
|||
def UAddc : DXILOp<44, binaryWithCarryOrBorrow > { | |||
let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = (a+b, a+b overflowed ? 1 : 0)"; | |||
let intrinsics = [IntrinSelect<int_uadd_with_overflow>]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We would need to rev the DXIL version to introduce UAddc for other types I think. I think Farzon's concern is that we might want to (at some point) handle int_uadd_with_overflow
generically, and since there isn't a DXIL op for it that would presumably involve an expansion. If we use the 32 bit int_uadd_with_overflow
to map to UAddc, then whatever pass did that expansion would need to know not to do it for certain overloads and that might be awkward.
Co-authored-by: Justin Bogner <[email protected]>
Includes farzon's refactoring of hlsl_intrinsics.h and introduction of hlsl_alias_intrinsics.h
…L op (llvm#127137) Fixes llvm#99205. - Implements the HLSL intrinsic `AddUint64` used to perform unsigned 64-bit integer addition by using pairs of unsigned 32-bit integers instead of native 64-bit types - The LLVM intrinsic `uadd_with_overflow` is used in the implementation of `AddUint64` in `CGBuiltin.cpp` - The DXIL op `UAddc` was defined in `DXIL.td`, and a lowering of the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op was implemented in `DXILOpLowering.cpp` Notes: - `__builtin_addc` was not able to be used to implement `AddUint64` in `hlsl_intrinsics.h` because its `CarryOut` argument is a pointer, and pointers are not supported in HLSL - A lowering of the LLVM intrinsic `uadd_with_overflow` to SPIR-V [already exists](https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll) - When lowering the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op, the anonymous struct type `{ i32, i1 }` is replaced with a named struct type `%dx.types.i32c`. This aspect of the implementation may be changed when issue llvm#113192 gets addressed - Fixes issues mentioned in the comments on the original PR llvm#125319 --------- Co-authored-by: Finn Plummer <[email protected]> Co-authored-by: Farzon Lotfi <[email protected]> Co-authored-by: Chris B <[email protected]> Co-authored-by: Justin Bogner <[email protected]>
Fixes #99205.
AddUint64
used to perform unsigned 64-bit integer addition by using pairs of unsigned 32-bit integers instead of native 64-bit typesuadd_with_overflow
is used in the implementation ofAddUint64
inCGBuiltin.cpp
UAddc
was defined inDXIL.td
, and a lowering of the LLVM intrinsicuadd_with_overflow
to theUAddc
DXIL op was implemented inDXILOpLowering.cpp
Notes:
__builtin_addc
was not able to be used to implementAddUint64
inhlsl_intrinsics.h
because itsCarryOut
argument is a pointer, and pointers are not supported in HLSLuadd_with_overflow
to SPIR-V already existsuadd_with_overflow
to theUAddc
DXIL op, the anonymous struct type{ i32, i1 }
is replaced with a named struct type%dx.types.i32c
. This aspect of the implementation may be changed when issue [DirectX] Handle named structs in DXILOpLowering in a generic way #113192 gets addressedAddUint64
HLSL function and theUAddc
DXIL op #125319