Skip to content

[CIR][CUDA] Support for built-in CUDA surface type #1455

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Mar 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions clang/lib/CIR/CodeGen/CIRGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,13 +570,13 @@ bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const {
// their device-side incarnations.

if (global->hasAttr<CUDAConstantAttr>() ||
global->getType()->isCUDADeviceBuiltinSurfaceType() ||
global->getType()->isCUDADeviceBuiltinTextureType()) {
llvm_unreachable("NYI");
}

return !langOpts.CUDAIsDevice || global->hasAttr<CUDADeviceAttr>() ||
global->hasAttr<CUDASharedAttr>();
global->hasAttr<CUDASharedAttr>() ||
global->getType()->isCUDADeviceBuiltinSurfaceType();
}

void CIRGenModule::emitGlobal(GlobalDecl gd) {
Expand Down Expand Up @@ -1122,10 +1122,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty,
}
}

// TODO(cir): LLVM codegen makes sure the result is of the correct type
// by issuing a address space cast.
if (entryCIRAS != cirAS)
llvm_unreachable("NYI");
// Address space check removed because it is unnecessary because CIR records
// address space info in types.

// (If global is requested for a definition, we always need to create a new
// global, not just return a bitcast.)
Expand Down Expand Up @@ -1496,7 +1494,8 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *d,
// __shared__ variables is not marked as externally initialized,
// because they must not be initialized.
if (linkage != cir::GlobalLinkageKind::InternalLinkage &&
(d->hasAttr<CUDADeviceAttr>())) {
(d->hasAttr<CUDADeviceAttr>() ||
d->getType()->isCUDADeviceBuiltinSurfaceType())) {
gv->setAttr(CUDAExternallyInitializedAttr::getMnemonic(),
CUDAExternallyInitializedAttr::get(&getMLIRContext()));
}
Expand Down
10 changes: 7 additions & 3 deletions clang/lib/CIR/CodeGen/CIRGenTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,10 +352,14 @@ mlir::Type CIRGenTypes::convertType(QualType T) {
// 1. There is no SurfaceType on HIP,
// 2. There is Texture memory on HIP but accessing the memory goes through
// calls to the runtime. e.g. for a 2D: `tex2D<float>(tex, x, y);`
if (astContext.getLangOpts().CUDA && astContext.getLangOpts().CUDAIsDevice) {
if (Ty->isCUDADeviceBuiltinSurfaceType() ||
Ty->isCUDADeviceBuiltinTextureType())
if (astContext.getLangOpts().CUDAIsDevice) {
if (T->isCUDADeviceBuiltinSurfaceType()) {
if (mlir::Type Ty =
CGM.getTargetCIRGenInfo().getCUDADeviceBuiltinSurfaceDeviceType())
return Ty;
} else if (T->isCUDADeviceBuiltinTextureType()) {
llvm_unreachable("NYI");
}
}

if (const auto *recordType = dyn_cast<RecordType>(T))
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CIR/CodeGen/TargetInfo.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

#include "TargetInfo.h"
#include "ABIInfo.h"
#include "CIRGenCXXABI.h"
Expand Down Expand Up @@ -344,6 +345,11 @@ class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo {
public:
NVPTXTargetCIRGenInfo(CIRGenTypes &cgt)
: TargetCIRGenInfo(std::make_unique<NVPTXABIInfo>(cgt)) {}
mlir::Type getCUDADeviceBuiltinSurfaceDeviceType() const override {
// On the device side, texture reference is represented as an object handle
// in 64-bit integer.
return cir::IntType::get(&getABIInfo().CGT.getMLIRContext(), 64, true);
}
};

} // namespace
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/CIR/CodeGen/TargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ class TargetCIRGenInfo {
// kernels. They should reset the calling convention to OpenCLKernel,
// which will be further resolved by getOpenCLKernelCallingConv().
virtual void setCUDAKernelCallingConvention(const FunctionType *&ft) const {}

virtual mlir::Type getCUDADeviceBuiltinSurfaceDeviceType() const {
return nullptr;
}
virtual ~TargetCIRGenInfo() {}
};

Expand Down
4 changes: 2 additions & 2 deletions clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -657,10 +657,10 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) {
return Kind::offload_private;
case LangAS::opencl_generic:
return Kind::offload_generic;

case LangAS::cuda_constant:
return Kind::offload_constant;
case LangAS::opencl_global_device:
case LangAS::opencl_global_host:
case LangAS::cuda_constant:
case LangAS::sycl_global:
case LangAS::sycl_global_device:
case LangAS::sycl_global_host:
Expand Down
26 changes: 26 additions & 0 deletions clang/test/CIR/CodeGen/CUDA/surface.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target

// RUN: %clang_cc1 -fclangir -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck --check-prefix=DEVICE-LLVM %s
// RUN: %clang_cc1 -fclangir -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-cir -o - %s | FileCheck --check-prefix=DEVICE-CIR %s
// RUN: echo "GPU binary would be here" > %t
// RUN: %clang_cc1 -fclangir -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s

struct surfaceReference {
int desc;
};

template <typename T, int dim = 1>
struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {
};

// Partial specialization over `void`.
template<int dim>
struct __attribute__((device_builtin_surface_type)) surface<void, dim> : public surfaceReference {
};

surface<void, 2> surf;

// DEVICE-LLVM: @surf = addrspace(1) externally_initialized global i64 undef, align 4
// DEVICE-CIR: cir.global external addrspace(offload_global) @surf = #cir.undef : !s64i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized}
// HOST: @surf = global %"struct.surface<void, 2>" zeroinitializer, align 4