Skip to content

[HLSL] [DXIL] Implement the AddUint64 HLSL function and the UAddc DXIL op #127137

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Mar 6, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4753,6 +4753,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
}

// HLSL
def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_adduint64"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
}

def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_getpointer"];
let Attributes = [NoThrow];
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -10657,6 +10657,11 @@ def err_vector_incorrect_num_elements : Error<
"%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
def err_altivec_empty_initializer : Error<"expected initializer">;

def err_vector_incorrect_bit_count : Error<
"incorrect number of bits in vector operand (expected %select{|a multiple of}0 %1 bits, have %2)">;
def err_integer_incorrect_bit_count : Error<
"incorrect number of bits in integer (expected %0 bits, have %1)">;

def err_invalid_neon_type_code : Error<
"incompatible constant for this __builtin_neon function">;
def err_argument_invalid_range : Error<
Expand Down
56 changes: 56 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19445,6 +19445,62 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return nullptr;

switch (BuiltinID) {
case Builtin::BI__builtin_hlsl_adduint64: {
Value *OpA = EmitScalarExpr(E->getArg(0));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forwarding: #125319 (comment)

Value *OpB = EmitScalarExpr(E->getArg(1));
QualType Arg0Ty = E->getArg(0)->getType();
uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements();
assert(Arg0Ty == E->getArg(1)->getType() &&
"AddUint64 operand types must match");
assert(Arg0Ty->hasIntegerRepresentation() &&
"AddUint64 operands must have an integer representation");
assert((NumElements == 2 || NumElements == 4) &&
"AddUint64 operands must have 2 or 4 elements");

llvm::Value *LowA;
llvm::Value *HighA;
llvm::Value *LowB;
llvm::Value *HighB;

// Obtain low and high words of inputs A and B
if (NumElements == 2) {
LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA");
HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA");
LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB");
HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB");
} else {
LowA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{0, 2}, "LowA");
HighA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{1, 3}, "HighA");
LowB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{0, 2}, "LowB");
HighB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{1, 3}, "HighB");
}

// Use an uadd_with_overflow to compute the sum of low words and obtain a
// carry value
llvm::Value *Carry;
llvm::Value *LowSum = EmitOverflowIntrinsic(
*this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
llvm::Value *ZExtCarry =
Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt");

// Sum the high words and the carry
llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum");
llvm::Value *HighSumPlusCarry =
Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry");

if (NumElements == 4) {
return Builder.CreateShuffleVector(LowSum, HighSumPlusCarry,
ArrayRef<int>{0, 2, 1, 3},
"hlsl.AddUint64");
}

llvm::Value *Result = PoisonValue::get(OpA->getType());
Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
"hlsl.AddUint64.upto0");
Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, (uint64_t)1,
"hlsl.AddUint64");
return Result;
}
case Builtin::BI__builtin_hlsl_resource_getpointer: {
Value *HandleOp = EmitScalarExpr(E->getArg(0));
Value *IndexOp = EmitScalarExpr(E->getArg(1));
Expand Down
21 changes: 21 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,27 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos)
float4 acos(float4);

//===----------------------------------------------------------------------===//
// AddUint64 builtins
//===----------------------------------------------------------------------===//

/// \fn T AddUint64(T a, T b)
/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned
/// 32-bit integers.
/// \param x [in] The first unsigned 32-bit integer pair(s)
/// \param y [in] The second unsigned 32-bit integer pair(s)
///
/// This function takes one or two pairs (low, high) of unsigned 32-bit integer
/// values and returns pairs (low, high) of unsigned 32-bit integer
/// values representing the result of unsigned 64-bit integer addition.

_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
uint32_t2 AddUint64(uint32_t2, uint32_t2);
_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
uint32_t4 AddUint64(uint32_t4, uint32_t4);

// //===----------------------------------------------------------------------===//
// all builtins
//===----------------------------------------------------------------------===//

Expand Down
58 changes: 58 additions & 0 deletions clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2038,6 +2038,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
checkAllFloatTypes);
}

static bool CheckUnsignedIntRepresentations(Sema *S, CallExpr *TheCall) {
auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool {
clang::QualType BaseType =
PassedType->isVectorType()
? PassedType->getAs<clang::VectorType>()->getElementType()
: PassedType;
return !BaseType->isUnsignedIntegerType();
};
return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
checkUnsignedInteger);
}

static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
clang::QualType BaseType =
Expand Down Expand Up @@ -2229,6 +2241,52 @@ static bool CheckResourceHandle(
// returning an ExprError
bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
switch (BuiltinID) {
case Builtin::BI__builtin_hlsl_adduint64: {
if (SemaRef.checkArgCount(TheCall, 2))
return true;
if (CheckVectorElementCallArgs(&SemaRef, TheCall))
return true;
if (CheckUnsignedIntRepresentations(&SemaRef, TheCall))
return true;

// CheckVectorElementCallArgs(...) guarantees both args are the same type.
assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() &&
"Both args must be of the same type");

// ensure both args are vectors
auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
if (!VTy) {
SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector)
<< TheCall->getDirectCallee() << /*all*/ 1;
return true;
}

// ensure arg integers are 32-bits
uint64_t ElementBitCount = getASTContext()
.getTypeSizeInChars(VTy->getElementType())
.getQuantity() *
8;
if (ElementBitCount != 32) {
SemaRef.Diag(TheCall->getBeginLoc(),
diag::err_integer_incorrect_bit_count)
<< 32 << ElementBitCount;
return true;
}

// ensure both args are vectors of total bit size of a multiple of 64
int NumElementsArg = VTy->getNumElements();
if (NumElementsArg != 2 && NumElementsArg != 4) {
SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count)
<< 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount;
return true;
}

ExprResult A = TheCall->getArg(0);
QualType ArgTyA = A.get()->getType();
// return type is the same as the input type
TheCall->setType(ArgTyA);
break;
}
case Builtin::BI__builtin_hlsl_resource_getpointer: {
if (SemaRef.checkArgCount(TheCall, 2) ||
CheckResourceHandle(&SemaRef, TheCall, 0) ||
Expand Down
58 changes: 58 additions & 0 deletions clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
// RUN: -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK


// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_(
// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[LOWA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
// CHECK-NEXT: [[HIGHA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
// CHECK-NEXT: [[LOWB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
// CHECK-NEXT: [[HIGHB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LOWA]], i32 [[LOWB]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext i1 [[TMP3]] to i32
// CHECK-NEXT: [[HIGHSUM:%.*]] = add i32 [[HIGHA]], [[HIGHB]]
// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add i32 [[HIGHSUM]], [[CARRYZEXT]]
// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0
// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HIGHSUMPLUSCARRY]], i64 1
// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]]
//
uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
return AddUint64(a, b);
}

// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
// CHECK-NEXT: [[LOWA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
// CHECK-NEXT: [[HIGHA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
// CHECK-NEXT: [[LOWB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
// CHECK-NEXT: [[HIGHB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[LOWA]], <2 x i32> [[LOWB]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 0
// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
// CHECK-NEXT: [[HIGHSUM:%.*]] = add <2 x i32> [[HIGHA]], [[HIGHB]]
// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add <2 x i32> [[HIGHSUM]], [[CARRYZEXT]]
// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[HIGHSUMPLUSCARRY]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]]
//
uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
return AddUint64(a, b);
}
46 changes: 46 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify

uint2 test_too_few_arg() {
return __builtin_hlsl_adduint64();
// expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
}

uint4 test_too_many_arg(uint4 a) {
return __builtin_hlsl_adduint64(a, a, a);
// expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
}

uint2 test_mismatched_arg_types(uint2 a, uint4 b) {
return __builtin_hlsl_adduint64(a, b);
// expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}}
}

uint2 test_bad_num_arg_elements(uint3 a, uint3 b) {
return __builtin_hlsl_adduint64(a, b);
// expected-error@-1 {{incorrect number of bits in vector operand (expected a multiple of 64 bits, have 96)}}
}

uint2 test_scalar_arg_type(uint a) {
return __builtin_hlsl_adduint64(a, a);
// expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must be vectors}}
}

uint2 test_uint64_args(uint16_t2 a) {
return __builtin_hlsl_adduint64(a, a);
// expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
}

uint2 test_signed_integer_args(int2 a, int2 b) {
return __builtin_hlsl_adduint64(a, b);
// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}}
}

struct S {
uint2 a;
};

uint2 test_incorrect_arg_type(S a) {
return __builtin_hlsl_adduint64(a, a);
// expected-error@-1 {{passing 'S' to parameter of incompatible type 'unsigned int'}}
}

13 changes: 13 additions & 0 deletions llvm/lib/Target/DirectX/DXIL.td
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def HandleTy : DXILOpParamType;
def ResBindTy : DXILOpParamType;
def ResPropsTy : DXILOpParamType;
def SplitDoubleTy : DXILOpParamType;
def BinaryWithCarryTy : DXILOpParamType;

class DXILOpClass;

Expand Down Expand Up @@ -738,6 +739,18 @@ def UMin : DXILOp<40, binary> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def UAddc : DXILOp<44, binaryWithCarryOrBorrow > {
let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = (a+b, a+b overflowed ? 1 : 0)";
// TODO: This `let intrinsics = ...` line may be uncommented when
// https://github.com/llvm/llvm-project/issues/113192 is fixed
// let intrinsics = [IntrinSelect<int_uadd_with_overflow>];
let arguments = [OverloadTy, OverloadTy];
let result = BinaryWithCarryTy;
let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def FMad : DXILOp<46, tertiary> {
let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
"* a + b.";
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/DirectX/DXILOpBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,14 @@ static StructType *getSplitDoubleType(LLVMContext &Context) {
return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble");
}

static StructType *getBinaryWithCarryType(LLVMContext &Context) {
if (auto *ST = StructType::getTypeByName(Context, "dx.types.i32c"))
return ST;
Type *Int32Ty = Type::getInt32Ty(Context);
Type *Int1Ty = Type::getInt1Ty(Context);
return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
}

static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
Type *OverloadTy) {
switch (Kind) {
Expand Down Expand Up @@ -273,6 +281,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getResPropsType(Ctx);
case OpParamType::SplitDoubleTy:
return getSplitDoubleType(Ctx);
case OpParamType::BinaryWithCarryTy:
return getBinaryWithCarryType(Ctx);
}
llvm_unreachable("Invalid parameter kind");
return nullptr;
Expand Down Expand Up @@ -539,6 +549,10 @@ StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext &Context) {
return ::getSplitDoubleType(Context);
}

StructType *DXILOpBuilder::getBinaryWithCarryType(LLVMContext &Context) {
return ::getBinaryWithCarryType(Context);
}

StructType *DXILOpBuilder::getHandleType() {
return ::getHandleType(IRB.getContext());
}
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/DirectX/DXILOpBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ class DXILOpBuilder {
/// Get the `%dx.types.splitdouble` type.
StructType *getSplitDoubleType(LLVMContext &Context);

/// Get the `%dx.types.i32c` type.
StructType *getBinaryWithCarryType(LLVMContext &Context);

/// Get the `%dx.types.Handle` type.
StructType *getHandleType();

Expand Down
Loading