[DXIL] Using a dx intrinsic for directx backend

inbelic · inbelic · commit 9d300da868dd · 2024-11-21T19:20:22.000Z
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -222,6 +222,11 @@ Value *handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E) {
     ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
   }
 
+  if (CGF.CGM.getTarget().getTriple().isDXIL())
+    return CGF.Builder.CreateIntrinsic(
+        /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
+        ArrayRef<Value *>{OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
+
   if (!E->getArg(0)->getType()->isVectorType()) {
     OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
     OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
diff --git a/clang/test/CodeGenHLSL/builtins/asdouble.hlsl b/clang/test/CodeGenHLSL/builtins/asdouble.hlsl
@@ -1,29 +1,37 @@
 // RUN: %clang_cc1 -finclude-default-header -triple \
 // RUN:   dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
-// RUN:   FileCheck %s
+// RUN:   FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
 // RUN: %clang_cc1 -finclude-default-header -triple \
 // RUN:   spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
-// RUN:   FileCheck %s
+// RUN:   FileCheck %s --check-prefixes=CHECK,CHECK-SPV
 
 // Test lowering of asdouble expansion to shuffle/bitcast and splat when required
 
 // CHECK-LABEL: test_uint
 double test_uint(uint low, uint high) {
-  // CHECK: %[[LOW_INSERT:.*]] = insertelement <1 x i32>
-  // CHECK: %[[LOW_SHUFFLE:.*]] = shufflevector <1 x i32> %[[LOW_INSERT]], {{.*}} zeroinitializer
-  // CHECK: %[[HIGH_INSERT:.*]] = insertelement <1 x i32>
-  // CHECK: %[[HIGH_SHUFFLE:.*]] = shufflevector <1 x i32> %[[HIGH_INSERT]], {{.*}} zeroinitializer
-
-  // CHECK:      %[[SHUFFLE0:.*]] = shufflevector <1 x i32> %[[LOW_SHUFFLE]], <1 x i32> %[[HIGH_SHUFFLE]],
-  // CHECK-SAME: {{.*}} <i32 0, i32 1>
-  // CHECK:      bitcast <2 x i32> %[[SHUFFLE0]] to double
+  // CHECK-SPV: %[[LOW_INSERT:.*]] = insertelement <1 x i32>
+  // CHECK-SPV: %[[LOW_SHUFFLE:.*]] = shufflevector <1 x i32> %[[LOW_INSERT]], {{.*}} zeroinitializer
+  // CHECK-SPV: %[[HIGH_INSERT:.*]] = insertelement <1 x i32>
+  // CHECK-SPV: %[[HIGH_SHUFFLE:.*]] = shufflevector <1 x i32> %[[HIGH_INSERT]], {{.*}} zeroinitializer
+
+  // CHECK-SPV:      %[[SHUFFLE0:.*]] = shufflevector <1 x i32> %[[LOW_SHUFFLE]], <1 x i32> %[[HIGH_SHUFFLE]],
+  // CHECK-SPV-SAME: {{.*}} <i32 0, i32 1>
+  // CHECK-SPV:      bitcast <2 x i32> %[[SHUFFLE0]] to double
+
+  // CHECK-DXIL: call double @llvm.dx.asdouble.i32
   return asdouble(low, high);
 }
 
+// CHECK-DXIL: declare double @llvm.dx.asdouble.i32
+
 // CHECK-LABEL: test_vuint
 double3 test_vuint(uint3 low, uint3 high) {
-  // CHECK:      %[[SHUFFLE1:.*]] = shufflevector
-  // CHECK-SAME: {{.*}} <i32 0, i32 3, i32 1, i32 4, i32 2, i32 5>
-  // CHECK:      bitcast <6 x i32> %[[SHUFFLE1]] to <3 x double>
+  // CHECK-SPV:      %[[SHUFFLE1:.*]] = shufflevector
+  // CHECK-SPV-SAME: {{.*}} <i32 0, i32 3, i32 1, i32 4, i32 2, i32 5>
+  // CHECK-SPV:      bitcast <6 x i32> %[[SHUFFLE1]] to <3 x double>
+
+  // CHECK-DXIL: call <3 x double> @llvm.dx.asdouble.v3i32
   return asdouble(low, high);
 }
+
+// CHECK-DXIL: declare <3 x double> @llvm.dx.asdouble.v3i32
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -46,6 +46,7 @@ def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
 
 def int_dx_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
 def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
+def int_dx_asdouble : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [llvm_anyint_ty, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
@@ -818,6 +818,15 @@ def FlattenedThreadIdInGroup :  DXILOp<96, flattenedThreadIdInGroup> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
+def MakeDouble :  DXILOp<101, makeDouble> {
+  let Doc = "creates a double value";
+  let LLVMIntrinsic = int_dx_asdouble;
+  let arguments = [Int32Ty, Int32Ty];
+  let result = DoubleTy;
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
 def SplitDouble :  DXILOp<102, splitDouble> {
   let Doc = "Splits a double into 2 uints";
   let arguments = [OverloadTy];
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -39,6 +39,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
   case Intrinsic::dx_frac:
   case Intrinsic::dx_rsqrt:
   case Intrinsic::dx_wave_readlane:
+  case Intrinsic::dx_asdouble:
   case Intrinsic::dx_splitdouble:
   case Intrinsic::dx_firstbituhigh:
   case Intrinsic::dx_firstbitshigh:
diff --git a/llvm/test/CodeGen/DirectX/asdouble.ll b/llvm/test/CodeGen/DirectX/asdouble.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; Test that for scalar and vector inputs, asdouble maps down to the makeDouble
+; DirectX op
+
+define noundef double @asdouble_scalar(i32 noundef %low, i32 noundef %high) {
+; CHECK: call double @dx.op.makeDouble(i32 101, i32 %low, i32 %high)
+  %ret = call double @llvm.dx.asdouble.i32(i32 %low, i32 %high)
+  ret double %ret
+}
+
+declare double @llvm.dx.asdouble.i32(i32, i32)
+
+define noundef <3 x double> @asdouble_vec(<3 x i32> noundef %low, <3 x i32> noundef %high) {
+; CHECK: call double @dx.op.makeDouble(i32 101, i32 %low.i0, i32 %high.i0)
+; CHECK: call double @dx.op.makeDouble(i32 101, i32 %low.i1, i32 %high.i1)
+; CHECK: call double @dx.op.makeDouble(i32 101, i32 %low.i2, i32 %high.i2)
+  %ret = call <3 x double> @llvm.dx.asdouble.v3i32(<3 x i32> %low, <3 x i32> %high)
+  ret <3 x double> %ret
+}
+
+declare <3 x double> @llvm.dx.asdouble.v3i32(<3 x i32>, <3 x i32>)