-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[HLSL] [DXIL] Implement the AddUint64 HLSL function and the UAddc DXIL op #127137
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4fae564
45afa2f
7714dc5
9a08afa
ae46297
4e017e4
3e832cf
21cbec9
ff09962
9f73e67
72b404b
be29207
62d95a4
fd356da
c9b9fde
75c49b1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 | ||
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ | ||
// RUN: -emit-llvm -disable-llvm-passes -o - | \ | ||
// RUN: FileCheck %s --check-prefixes=CHECK | ||
|
||
|
||
// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_( | ||
// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8 | ||
// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8 | ||
// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8 | ||
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8 | ||
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8 | ||
// CHECK-NEXT: [[LOWA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0 | ||
// CHECK-NEXT: [[HIGHA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1 | ||
// CHECK-NEXT: [[LOWB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0 | ||
// CHECK-NEXT: [[HIGHB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1 | ||
// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LOWA]], i32 [[LOWB]]) | ||
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1 | ||
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0 | ||
// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext i1 [[TMP3]] to i32 | ||
// CHECK-NEXT: [[HIGHSUM:%.*]] = add i32 [[HIGHA]], [[HIGHB]] | ||
// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add i32 [[HIGHSUM]], [[CARRYZEXT]] | ||
// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0 | ||
// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HIGHSUMPLUSCARRY]], i64 1 | ||
// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]] | ||
// | ||
uint2 test_AddUint64_uint2(uint2 a, uint2 b) { | ||
return AddUint64(a, b); | ||
} | ||
|
||
// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_( | ||
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 | ||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 | ||
// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16 | ||
// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16 | ||
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16 | ||
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16 | ||
// CHECK-NEXT: [[LOWA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 2> | ||
// CHECK-NEXT: [[HIGHA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 1, i32 3> | ||
// CHECK-NEXT: [[LOWB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2> | ||
// CHECK-NEXT: [[HIGHB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3> | ||
// CHECK-NEXT: [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[LOWA]], <2 x i32> [[LOWB]]) | ||
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 1 | ||
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 0 | ||
// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> | ||
// CHECK-NEXT: [[HIGHSUM:%.*]] = add <2 x i32> [[HIGHA]], [[HIGHB]] | ||
// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add <2 x i32> [[HIGHSUM]], [[CARRYZEXT]] | ||
// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[HIGHSUMPLUSCARRY]], <4 x i32> <i32 0, i32 2, i32 1, i32 3> | ||
// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]] | ||
// | ||
uint4 test_AddUint64_uint4(uint4 a, uint4 b) { | ||
return AddUint64(a, b); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify | ||
|
||
uint2 test_too_few_arg() { | ||
return __builtin_hlsl_adduint64(); | ||
// expected-error@-1 {{too few arguments to function call, expected 2, have 0}} | ||
} | ||
|
||
uint4 test_too_many_arg(uint4 a) { | ||
return __builtin_hlsl_adduint64(a, a, a); | ||
// expected-error@-1 {{too many arguments to function call, expected 2, have 3}} | ||
} | ||
|
||
uint2 test_mismatched_arg_types(uint2 a, uint4 b) { | ||
return __builtin_hlsl_adduint64(a, b); | ||
// expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}} | ||
} | ||
|
||
uint2 test_bad_num_arg_elements(uint3 a, uint3 b) { | ||
return __builtin_hlsl_adduint64(a, b); | ||
// expected-error@-1 {{incorrect number of bits in vector operand (expected a multiple of 64 bits, have 96)}} | ||
} | ||
|
||
uint2 test_scalar_arg_type(uint a) { | ||
return __builtin_hlsl_adduint64(a, a); | ||
// expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must be vectors}} | ||
} | ||
|
||
uint2 test_uint64_args(uint16_t2 a) { | ||
return __builtin_hlsl_adduint64(a, a); | ||
// expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}} | ||
} | ||
|
||
uint2 test_signed_integer_args(int2 a, int2 b) { | ||
return __builtin_hlsl_adduint64(a, b); | ||
// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}} | ||
} | ||
|
||
struct S { | ||
uint2 a; | ||
}; | ||
|
||
uint2 test_incorrect_arg_type(S a) { | ||
return __builtin_hlsl_adduint64(a, a); | ||
// expected-error@-1 {{passing 'S' to parameter of incompatible type 'unsigned int'}} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,6 +56,7 @@ def HandleTy : DXILOpParamType; | |
def ResBindTy : DXILOpParamType; | ||
def ResPropsTy : DXILOpParamType; | ||
def SplitDoubleTy : DXILOpParamType; | ||
def BinaryWithCarryTy : DXILOpParamType; | ||
Icohedron marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
class DXILOpClass; | ||
|
||
|
@@ -744,6 +745,16 @@ def UMin : DXILOp<40, binary> { | |
let attributes = [Attributes<DXIL1_0, [ReadNone]>]; | ||
} | ||
|
||
def UAddc : DXILOp<44, binaryWithCarryOrBorrow > { | ||
let Doc = "unsigned add of 32-bit operand with the carry"; | ||
let intrinsics = [IntrinSelect<int_uadd_with_overflow>]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One thing I am a little worried about is if we are misusing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I understand the concern. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is the It should be no issue for some other frontend to emit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think it's the driver compiler that would interpret the DXIL? I don't know if driver compilers would handle There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We would need to rev the DXIL version to introduce UAddc for other types I think. I think Farzon's concern is that we might want to (at some point) handle There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whatever does the lowering for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't a nice tablegen solution, but we have the power to check if we have a non 32 bit case in Downside is it assumes the DXIL version doesn't update UAddc for other types and if it ever did we would have to introduces some versioning to only do this for pre DXIL 1.6.x. Upside it lets you keep the direct mapping of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All that said I don't think a solution here is pressing atm. We could alternatively track this as technical debt we need to revisit as part of fully supporting That will give us time to develop a more complete solution. |
||
let arguments = [OverloadTy, OverloadTy]; | ||
let result = BinaryWithCarryTy; | ||
let overloads = [Overloads<DXIL1_0, [Int32Ty]>]; | ||
let stages = [Stages<DXIL1_0, [all_stages]>]; | ||
let attributes = [Attributes<DXIL1_0, [ReadNone]>]; | ||
} | ||
|
||
def FMad : DXILOp<46, tertiary> { | ||
let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m " | ||
"* a + b."; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Forwarding: #125319 (comment)