diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 871e8e81cbdf..4bc8f04c5797 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -3560,11 +3560,12 @@ def CC_C : I32EnumAttrCase<"C", 1, "c">; def CC_SpirKernel : I32EnumAttrCase<"SpirKernel", 2, "spir_kernel">; def CC_SpirFunction : I32EnumAttrCase<"SpirFunction", 3, "spir_function">; def CC_OpenCLKernel : I32EnumAttrCase<"OpenCLKernel", 4, "opencl_kernel">; +def CC_PTXKernel : I32EnumAttrCase<"PTXKernel", 5, "ptx_kernel">; def CallingConv : I32EnumAttr< "CallingConv", "calling convention", - [CC_C, CC_SpirKernel, CC_SpirFunction, CC_OpenCLKernel]> { + [CC_C, CC_SpirKernel, CC_SpirFunction, CC_OpenCLKernel, CC_PTXKernel]> { let cppNamespace = "::cir"; } diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 124fd2012c41..6a7f8014ce44 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -236,6 +236,7 @@ struct MissingFeatures { static bool exceptions() { return false; } static bool metaDataNode() { return false; } static bool emitDeclMetadata() { return false; } + static bool emitNVVMMetadata() { return false; } static bool emitScalarRangeCheck() { return false; } static bool stmtExprEvaluation() { return false; } static bool setCallingConv() { return false; } @@ -470,9 +471,6 @@ struct MissingFeatures { // can optimize away the store and load ops. Seems like an early optimization. static bool returnValueDominatingStoreOptmiization() { return false; } - // Globals (vars and functions) may have attributes that are target depedent. - static bool setTargetAttributes() { return false; } - // CIR modules parsed from text form may not carry the triple or data layout // specs. We should make it always present. static bool makeTripleAlwaysPresent() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 3cba499aa0cc..16a2b443563c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -903,7 +903,7 @@ void CIRGenModule::setNonAliasAttributes(GlobalDecl gd, mlir::Operation *go) { if (f) assert(!cir::MissingFeatures::setSectionForFuncOp()); } - assert(!cir::MissingFeatures::setTargetAttributes()); + getTargetCIRGenInfo().setTargetAttributes(d, go, *this); } static llvm::SmallVector indexesOfArrayAttr(mlir::ArrayAttr indexes) { @@ -1211,10 +1211,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, // something closer to GlobalValue::isDeclaration instead of checking for // initializer. if (gv.isDeclaration()) { - // TODO(cir): set target attributes + getTargetCIRGenInfo().setTargetAttributes(d, gv, *this); - // External HIP managed variables needed to be recorded for transformation - // in both device and host compilations. // External HIP managed variables needed to be recorded for transformation // in both device and host compilations. if (getLangOpts().CUDA && d && d->hasAttr() && @@ -2920,6 +2918,10 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl, // TODO(cir): Complete the remaining part of the function. assert(!cir::MissingFeatures::setFunctionAttributes()); + if (!isIncompleteFunction && func.isDeclaration()) + getTargetCIRGenInfo().setTargetAttributes(globalDecl.getDecl(), func, + *this); + // TODO(cir): This needs a lot of work to better match CodeGen. That // ultimately ends up in setGlobalVisibility, which already has the linkage of // the LLVM GV (corresponding to our FuncOp) computed, so it doesn't have to diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index d44542de5523..1727ebf3a09c 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -345,11 +345,39 @@ class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo { public: NVPTXTargetCIRGenInfo(CIRGenTypes &cgt) : TargetCIRGenInfo(std::make_unique(cgt)) {} + mlir::Type getCUDADeviceBuiltinSurfaceDeviceType() const override { // On the device side, texture reference is represented as an object handle // in 64-bit integer. return cir::IntType::get(&getABIInfo().CGT.getMLIRContext(), 64, true); } + + void setTargetAttributes(const clang::Decl *decl, mlir::Operation *global, + CIRGenModule &cgm) const override { + if (const auto *vd = clang::dyn_cast_or_null(decl)) { + assert(!cir::MissingFeatures::emitNVVMMetadata()); + return; + } + + if (const auto *fd = clang::dyn_cast_or_null(decl)) { + cir::FuncOp func = mlir::cast(global); + if (func.isDeclaration()) + return; + + if (cgm.getLangOpts().CUDA) { + if (fd->hasAttr()) { + func.setCallingConv(cir::CallingConv::PTXKernel); + + // In LLVM we should create metadata like: + // !{, metadata !"kernel", i32 1} + assert(!cir::MissingFeatures::emitNVVMMetadata()); + } + } + + if (fd->getAttr()) + llvm_unreachable("NYI"); + } + } }; } // namespace diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h index 84cf5d1c691f..abbfcafbe00e 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.h +++ b/clang/lib/CIR/CodeGen/TargetInfo.h @@ -41,6 +41,15 @@ class TargetCIRGenInfo { /// Returns ABI info helper for the target. const ABIInfo &getABIInfo() const { return *Info; } + /// Provides a convenient hook to handle extra target-specific attributes + /// for the given global. + /// In OG, the function receives an llvm::GlobalValue. However, functions + /// and global variables are separate types in Clang IR, so we use a general + /// mlir::Operation*. + virtual void setTargetAttributes(const clang::Decl *decl, + mlir::Operation *global, + CIRGenModule &module) const {} + virtual bool isScalarizableAsmOperand(CIRGenFunction &CGF, mlir::Type Ty) const { return false; diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 55d5396cc84e..4394e8688e6f 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -763,11 +763,11 @@ mlir::LLVM::Linkage convertLinkage(cir::GlobalLinkageKind linkage) { }; } -mlir::LLVM::CConv convertCallingConv(cir::CallingConv callinvConv) { +mlir::LLVM::CConv convertCallingConv(cir::CallingConv callingConv) { using CIR = cir::CallingConv; using LLVM = mlir::LLVM::CConv; - switch (callinvConv) { + switch (callingConv) { case CIR::C: return LLVM::C; case CIR::SpirKernel: @@ -776,6 +776,8 @@ mlir::LLVM::CConv convertCallingConv(cir::CallingConv callinvConv) { return LLVM::SPIR_FUNC; case CIR::OpenCLKernel: llvm_unreachable("NYI"); + case CIR::PTXKernel: + return LLVM::PTX_Kernel; } llvm_unreachable("Unknown calling convention"); } diff --git a/clang/test/CIR/CodeGen/CUDA/simple.cu b/clang/test/CIR/CodeGen/CUDA/simple.cu index 023089c1eb2d..d067e29858b3 100644 --- a/clang/test/CIR/CodeGen/CUDA/simple.cu +++ b/clang/test/CIR/CodeGen/CUDA/simple.cu @@ -22,7 +22,8 @@ __device__ void device_fn(int* a, double b, float c) {} // CIR-DEVICE: cir.func @_Z9device_fnPidf __global__ void global_fn(int a) {} -// CIR-DEVICE: @_Z9global_fni +// CIR-DEVICE: @_Z9global_fni({{.*}} cc(ptx_kernel) +// LLVM-DEVICE: define dso_local ptx_kernel void @_Z9global_fni // Check for device stub emission. @@ -32,9 +33,9 @@ __global__ void global_fn(int a) {} // CIR-HOST: cir.get_global @_Z24__device_stub__global_fni // CIR-HOST: cir.call @cudaLaunchKernel -// COM: LLVM-HOST: void @_Z24__device_stub__global_fni -// COM: LLVM-HOST: call i32 @__cudaPopCallConfiguration -// COM: LLVM-HOST: call i32 @cudaLaunchKernel(ptr @_Z24__device_stub__global_fni +// LLVM-HOST: void @_Z24__device_stub__global_fni +// LLVM-HOST: call i32 @__cudaPopCallConfiguration +// LLVM-HOST: call i32 @cudaLaunchKernel(ptr @_Z24__device_stub__global_fni int main() { global_fn<<<1, 1>>>(1); @@ -51,15 +52,15 @@ int main() { // CIR-HOST: cir.call @_Z24__device_stub__global_fni([[Arg]]) // CIR-HOST: } -// COM: LLVM-HOST: define dso_local i32 @main -// COM: LLVM-HOST: alloca %struct.dim3 -// COM: LLVM-HOST: alloca %struct.dim3 -// COM: LLVM-HOST: call void @_ZN4dim3C1Ejjj -// COM: LLVM-HOST: call void @_ZN4dim3C1Ejjj -// COM: LLVM-HOST: [[LLVMConfigOK:%[0-9]+]] = call i32 @__cudaPushCallConfiguration -// COM: LLVM-HOST: br [[LLVMConfigOK]], label %[[Good:[0-9]+]], label [[Bad:[0-9]+]] -// COM: LLVM-HOST: [[Good]]: -// COM: LLVM-HOST: call void @_Z24__device_stub__global_fni -// COM: LLVM-HOST: br label [[Bad]] -// COM: LLVM-HOST: [[Bad]]: -// COM: LLVM-HOST: ret i32 +// LLVM-HOST: define dso_local i32 @main +// LLVM-HOST: alloca %struct.dim3 +// LLVM-HOST: alloca %struct.dim3 +// LLVM-HOST: call void @_ZN4dim3C1Ejjj +// LLVM-HOST: call void @_ZN4dim3C1Ejjj +// LLVM-HOST: [[LLVMConfigOK:%[0-9]+]] = call i32 @__cudaPushCallConfiguration +// LLVM-HOST: br [[LLVMConfigOK]], label %[[Good:[0-9]+]], label [[Bad:[0-9]+]] +// LLVM-HOST: [[Good]]: +// LLVM-HOST: call void @_Z24__device_stub__global_fni +// LLVM-HOST: br label [[Bad]] +// LLVM-HOST: [[Bad]]: +// LLVM-HOST: ret i32