From e44025864f28adbc2ad05aab1942006ea8fc9e4d Mon Sep 17 00:00:00 2001 From: Aidan Wong Date: Tue, 25 Feb 2025 02:39:40 +0000 Subject: [PATCH 1/9] emit const attr for cuda getValueFromLangAS: implement cuda constant fix shouldEmitCUDAGlobalVar - copies og emitGlobalVarDefinition: set constant for cudaconstant --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 19 ++++++++----------- clang/lib/CIR/Dialect/IR/CIRAttrs.cpp | 2 +- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 52166ba2efb1..8cfdb92cb7d0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -517,15 +517,11 @@ bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const { // device-side variables because the CUDA runtime needs their // size and host-side address in order to provide access to // their device-side incarnations. - - if (global->hasAttr() || - global->hasAttr() || - global->getType()->isCUDADeviceBuiltinSurfaceType() || - global->getType()->isCUDADeviceBuiltinTextureType()) { - llvm_unreachable("NYI"); - } - - return !langOpts.CUDAIsDevice || global->hasAttr(); + return !langOpts.CUDAIsDevice || global->hasAttr() || + global->hasAttr() || + global->hasAttr() || + global->getType()->isCUDADeviceBuiltinSurfaceType() || + global->getType()->isCUDADeviceBuiltinTextureType(); } void CIRGenModule::emitGlobal(GlobalDecl gd) { @@ -1452,8 +1448,9 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *d, emitter->finalize(gv); // TODO(cir): If it is safe to mark the global 'constant', do so now. - gv.setConstant(!needsGlobalCtor && !needsGlobalDtor && - isTypeConstant(d->getType(), true, true)); + gv.setConstant((d->hasAttr() && langOpts.CUDAIsDevice) || + (!needsGlobalCtor && !needsGlobalDtor && + isTypeConstant(d->getType(), true, true))); // If it is in a read-only section, mark it 'constant'. if (const SectionAttr *sa = d->getAttr()) diff --git a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp index eaa5ffd151b0..96acf8d84ae8 100644 --- a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp @@ -652,6 +652,7 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) { case LangAS::cuda_device: return Kind::offload_global; case LangAS::opencl_constant: + case LangAS::cuda_constant: return Kind::offload_constant; case LangAS::opencl_private: return Kind::offload_private; @@ -660,7 +661,6 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) { case LangAS::opencl_global_device: case LangAS::opencl_global_host: - case LangAS::cuda_constant: case LangAS::sycl_global: case LangAS::sycl_global_device: case LangAS::sycl_global_host: From 73fabd6fb71c93b035a992c5abb0f51ba9cdf93d Mon Sep 17 00:00:00 2001 From: Aidan Wong Date: Fri, 28 Feb 2025 21:16:43 +0000 Subject: [PATCH 2/9] emitGlobal: move shouldEmitCUDAGlobalVar to first check - matches og --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 8cfdb92cb7d0..e9afbdf9d544 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -545,7 +545,10 @@ void CIRGenModule::emitGlobal(GlobalDecl gd) { if (langOpts.CUDA || langOpts.HIP) { // clang uses the same flag when building HIP code - if (langOpts.CUDAIsDevice) { + if (const auto *vd = dyn_cast(global)) { + if (!shouldEmitCUDAGlobalVar(vd)) + return; + } else if (langOpts.CUDAIsDevice) { // This will implicitly mark templates and their // specializations as __host__ __device__. if (langOpts.OffloadImplicitHostDeviceTemplates) @@ -567,11 +570,6 @@ void CIRGenModule::emitGlobal(GlobalDecl gd) { return; } } - - if (const auto *vd = dyn_cast(global)) { - if (!shouldEmitCUDAGlobalVar(vd)) - return; - } } if (langOpts.OpenMP) { From 76901bbf6b0c8b770f5e81d563f2cbdb4daa2dfd Mon Sep 17 00:00:00 2001 From: Aidan Wong Date: Mon, 3 Mar 2025 20:17:25 +0000 Subject: [PATCH 3/9] tests: add constant to global-vars.cu - also fixed __device__ check --- clang/test/CIR/CodeGen/CUDA/global-vars.cu | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/test/CIR/CodeGen/CUDA/global-vars.cu b/clang/test/CIR/CodeGen/CUDA/global-vars.cu index 5b1374c085eb..f8babab01c45 100644 --- a/clang/test/CIR/CodeGen/CUDA/global-vars.cu +++ b/clang/test/CIR/CodeGen/CUDA/global-vars.cu @@ -8,4 +8,8 @@ __device__ int a; -// CIR-DEVICE: cir.global external addrspace(offload_global) @a = #cir.int<0> : !s32i {alignment = 4 : i64} loc(#loc3) \ No newline at end of file +// CIR-DEVICE: cir.global external addrspace(offload_global) @a ={{.*}} + +__constant__ int b; + +// CIR-DEVICE: cir.global constant external addrspace(offload_constant) @b ={{.*}} From 2617e0ffc1f14366675c69fa92b912c78ad73da0 Mon Sep 17 00:00:00 2001 From: Aidan Wong Date: Thu, 6 Mar 2025 12:34:20 +0000 Subject: [PATCH 4/9] implement const --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 11 ++++++----- clang/lib/CIR/Dialect/IR/CIRAttrs.cpp | 2 +- clang/test/CIR/CodeGen/CUDA/global-vars.cu | 3 +++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 4581d321764b..4e0d84e62526 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -569,13 +569,13 @@ bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const { // size and host-side address in order to provide access to // their device-side incarnations. - if (global->hasAttr() || - global->getType()->isCUDADeviceBuiltinSurfaceType() || + if (global->getType()->isCUDADeviceBuiltinSurfaceType() || global->getType()->isCUDADeviceBuiltinTextureType()) { llvm_unreachable("NYI"); } return !langOpts.CUDAIsDevice || global->hasAttr() || + global->hasAttr() || global->hasAttr(); } @@ -1496,7 +1496,7 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *d, // __shared__ variables is not marked as externally initialized, // because they must not be initialized. if (linkage != cir::GlobalLinkageKind::InternalLinkage && - (d->hasAttr())) { + (d->hasAttr() || d->hasAttr())) { gv->setAttr(CUDAExternallyInitializedAttr::getMnemonic(), CUDAExternallyInitializedAttr::get(&getMLIRContext())); } @@ -1508,8 +1508,9 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *d, emitter->finalize(gv); // TODO(cir): If it is safe to mark the global 'constant', do so now. - gv.setConstant(!needsGlobalCtor && !needsGlobalDtor && - isTypeConstant(d->getType(), true, true)); + gv.setConstant((d->hasAttr() && langOpts.CUDAIsDevice) || + (!needsGlobalCtor && !needsGlobalDtor && + isTypeConstant(d->getType(), true, true))); // If it is in a read-only section, mark it 'constant'. if (const SectionAttr *sa = d->getAttr()) diff --git a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp index eaa5ffd151b0..96acf8d84ae8 100644 --- a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp @@ -652,6 +652,7 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) { case LangAS::cuda_device: return Kind::offload_global; case LangAS::opencl_constant: + case LangAS::cuda_constant: return Kind::offload_constant; case LangAS::opencl_private: return Kind::offload_private; @@ -660,7 +661,6 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) { case LangAS::opencl_global_device: case LangAS::opencl_global_host: - case LangAS::cuda_constant: case LangAS::sycl_global: case LangAS::sycl_global_device: case LangAS::sycl_global_host: diff --git a/clang/test/CIR/CodeGen/CUDA/global-vars.cu b/clang/test/CIR/CodeGen/CUDA/global-vars.cu index f6e630a7e797..cf4bacb5a63e 100644 --- a/clang/test/CIR/CodeGen/CUDA/global-vars.cu +++ b/clang/test/CIR/CodeGen/CUDA/global-vars.cu @@ -17,3 +17,6 @@ __device__ int a; __shared__ int shared; // CIR-DEVICE: cir.global external addrspace(offload_local) @shared = #cir.undef // LLVM-DEVICE: @shared = addrspace(3) global i32 undef, align 4 + +__constant__ int b; +// CIR-DEVICE: cir.global constant external addrspace(offload_constant) @b = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} From a250e36b087d4223d2b4a6a68cc1892ffb8e6a93 Mon Sep 17 00:00:00 2001 From: Aidan Wong Date: Thu, 6 Mar 2025 13:31:39 +0000 Subject: [PATCH 5/9] fix address space for offload_constant --- .../lib/CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp | 2 +- clang/test/CIR/CodeGen/CUDA/global-vars.cu | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp index 64c13331d9ba..82e1afd79f09 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp @@ -52,7 +52,7 @@ class NVPTXTargetLoweringInfo : public TargetLoweringInfo { case Kind::offload_global: return 1; case Kind::offload_constant: - return 2; + return 4; case Kind::offload_generic: return 4; default: diff --git a/clang/test/CIR/CodeGen/CUDA/global-vars.cu b/clang/test/CIR/CodeGen/CUDA/global-vars.cu index cf4bacb5a63e..25de5f28dd91 100644 --- a/clang/test/CIR/CodeGen/CUDA/global-vars.cu +++ b/clang/test/CIR/CodeGen/CUDA/global-vars.cu @@ -20,3 +20,4 @@ __shared__ int shared; __constant__ int b; // CIR-DEVICE: cir.global constant external addrspace(offload_constant) @b = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized} +// LLVM-DEVICE: @b = addrspace(4) externally_initialized constant i32 0, align 4 From 33c353f2fdd2b378f37a4d8dfe20c7b5cf122793 Mon Sep 17 00:00:00 2001 From: Aidan Wong Date: Thu, 6 Mar 2025 14:03:44 +0000 Subject: [PATCH 6/9] Fix address space values for NVPTX --- .../CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp index 64c13331d9ba..004272e8b3d2 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp @@ -52,9 +52,9 @@ class NVPTXTargetLoweringInfo : public TargetLoweringInfo { case Kind::offload_global: return 1; case Kind::offload_constant: - return 2; - case Kind::offload_generic: return 4; + case Kind::offload_generic: + return 0; default: cir_cconv_unreachable("Unknown CIR address space for this target"); } From 92cc36e45c2296d4a37f26efcd562fb5543c1b48 Mon Sep 17 00:00:00 2001 From: Aidan Wong Date: Fri, 7 Mar 2025 23:24:07 +0000 Subject: [PATCH 7/9] add testcase --- .../CIR/CodeGen/CUDA/addrspace-lowering.cu | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 clang/test/CIR/CodeGen/CUDA/addrspace-lowering.cu diff --git a/clang/test/CIR/CodeGen/CUDA/addrspace-lowering.cu b/clang/test/CIR/CodeGen/CUDA/addrspace-lowering.cu new file mode 100644 index 000000000000..d6c8e3590968 --- /dev/null +++ b/clang/test/CIR/CodeGen/CUDA/addrspace-lowering.cu @@ -0,0 +1,19 @@ +#include "../Inputs/cuda.h" + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \ +// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \ +// RUN: %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s + + +__shared__ int a; + +// LLVM-DEVICE: @a = addrspace(3) {{.*}} + +__device__ int b; + +// LLVM-DEVICE: @b = addrspace(1) {{.*}} + +// __constant__ int c; + +// XFAIL-LLVM-DEVICE: @c = addrspace(4) {{.*}} From 07ac4f579419210510443ccc1ca56955cf264f34 Mon Sep 17 00:00:00 2001 From: Aidan Wong Date: Fri, 7 Mar 2025 23:31:45 +0000 Subject: [PATCH 8/9] test: enable const test in addrspace-lowering.cu --- clang/test/CIR/CodeGen/CUDA/addrspace-lowering.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CIR/CodeGen/CUDA/addrspace-lowering.cu b/clang/test/CIR/CodeGen/CUDA/addrspace-lowering.cu index d6c8e3590968..91f26fa29597 100644 --- a/clang/test/CIR/CodeGen/CUDA/addrspace-lowering.cu +++ b/clang/test/CIR/CodeGen/CUDA/addrspace-lowering.cu @@ -14,6 +14,6 @@ __device__ int b; // LLVM-DEVICE: @b = addrspace(1) {{.*}} -// __constant__ int c; +__constant__ int c; -// XFAIL-LLVM-DEVICE: @c = addrspace(4) {{.*}} +// LLVM-DEVICE: @c = addrspace(4) {{.*}} From bf4c99521a3a3ad66f7b2149eccee0ee27c87109 Mon Sep 17 00:00:00 2001 From: Aidan Wong Date: Sat, 8 Mar 2025 16:50:15 +0000 Subject: [PATCH 9/9] remove duplicated case --- clang/lib/CIR/Dialect/IR/CIRAttrs.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp index 3c6049e901ff..32ceb0096d71 100644 --- a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp @@ -658,8 +658,6 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) { return Kind::offload_private; case LangAS::opencl_generic: return Kind::offload_generic; - case LangAS::cuda_constant: - return Kind::offload_constant; case LangAS::opencl_global_device: case LangAS::opencl_global_host: case LangAS::sycl_global: