rust-lang · Flakebi · Sep 3, 2025 · workingjubilee · Sep 4, 2025 · Flakebi
diff --git a/compiler/rustc_abi/src/lib.rs b/compiler/rustc_abi/src/lib.rs
@@ -1716,6 +1716,8 @@ pub struct AddressSpace(pub u32);
 impl AddressSpace {
     /// LLVM's `0` address space.
     pub const ZERO: Self = AddressSpace(0);
+    /// The address space for shared memory on nvptx and amdgpu.
+    pub const SHARED: Self = AddressSpace(3);
 }
 
 /// The way we represent values to the backend

diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs
@@ -14,6 +14,7 @@
 use std::borrow::Borrow;
 
 use itertools::Itertools;
+use rustc_abi::AddressSpace;
 use rustc_codegen_ssa::traits::TypeMembershipCodegenMethods;
 use rustc_data_structures::fx::FxIndexSet;
 use rustc_middle::ty::{Instance, Ty};
@@ -99,6 +100,28 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
             )
         }
     }
+
+    /// Declare a global value in a specific address space.
+    ///
+    /// If there’s a value with the same name already declared, the function will
+    /// return its Value instead.
+    pub(crate) fn declare_global_in_addrspace(
+        &self,
+        name: &str,
+        ty: &'ll Type,
+        addr_space: AddressSpace,
+    ) -> &'ll Value {
+        debug!("declare_global(name={name:?}, addrspace={addr_space:?})");
+        unsafe {
+            llvm::LLVMRustGetOrInsertGlobalInAddrspace(
+                (**self).borrow().llmod,
+                name.as_c_char_ptr(),
+                name.len(),
+                ty,
+                addr_space.0,
+            )
+        }
+    }
 }
 
 impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {

diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1,7 +1,9 @@
 use std::assert_matches::assert_matches;
 use std::cmp::Ordering;
 
-use rustc_abi::{Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size};
+use rustc_abi::{
+    AddressSpace, Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size,
+};
 use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
 use rustc_codegen_ssa::codegen_attrs::autodiff_attrs;
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
@@ -532,6 +534,22 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 return Ok(());
             }
 
+            sym::dynamic_shared_memory => {
+                let global = self.declare_global_in_addrspace(
+                    "dynamic_shared_memory",
+                    self.type_array(self.type_i8(), 0),
+                    AddressSpace::SHARED,
+                );
+                let ty::RawPtr(inner_ty, _) = result.layout.ty.kind() else { unreachable!() };
+                let alignment = self.align_of(*inner_ty).bytes() as u32;
+                unsafe {
+                    if alignment > llvm::LLVMGetAlignment(global) {
+                        llvm::LLVMSetAlignment(global, alignment);
+                    }
+                }
+                self.cx().const_pointercast(global, self.type_ptr())
+            }
+
             _ if name.as_str().starts_with("simd_") => {
                 // Unpack non-power-of-2 #[repr(packed, simd)] arguments.
                 // This gives them the expected layout of a regular #[repr(simd)] vector.

diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1907,6 +1907,13 @@ unsafe extern "C" {
         NameLen: size_t,
         T: &'a Type,
     ) -> &'a Value;
+    pub(crate) fn LLVMRustGetOrInsertGlobalInAddrspace<'a>(
+        M: &'a Module,
+        Name: *const c_char,
+        NameLen: size_t,
+        T: &'a Type,
+        AddressSpace: c_uint,
+    ) -> &'a Value;
     pub(crate) fn LLVMRustInsertPrivateGlobal<'a>(M: &'a Module, T: &'a Type) -> &'a Value;
     pub(crate) fn LLVMRustGetNamedValue(
         M: &Module,

diff --git a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
@@ -110,6 +110,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 sym::abort
                 | sym::unreachable
                 | sym::cold_path
+                | sym::dynamic_shared_memory
                 | sym::breakpoint
                 | sym::assert_zero_valid
                 | sym::assert_mem_uninitialized_valid

diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs
@@ -74,6 +74,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi
         | sym::align_of
         | sym::needs_drop
         | sym::caller_location
+        | sym::dynamic_shared_memory
         | sym::add_with_overflow
         | sym::sub_with_overflow
         | sym::mul_with_overflow
@@ -213,6 +214,7 @@ pub(crate) fn check_intrinsic_type(
         }
         sym::rustc_peek => (1, 0, vec![param(0)], param(0)),
         sym::caller_location => (0, 0, vec![], tcx.caller_location_ty()),
+        sym::dynamic_shared_memory => (1, 0, vec![], Ty::new_mut_ptr(tcx, param(0))),
         sym::assert_inhabited | sym::assert_zero_valid | sym::assert_mem_uninitialized_valid => {
             (1, 0, vec![], tcx.types.unit)
         }

diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
@@ -209,10 +209,10 @@ extern "C" LLVMValueRef LLVMRustGetOrInsertFunction(LLVMModuleRef M,
                   .getCallee());
 }
 
-extern "C" LLVMValueRef LLVMRustGetOrInsertGlobal(LLVMModuleRef M,
-                                                  const char *Name,
-                                                  size_t NameLen,
-                                                  LLVMTypeRef Ty) {
+extern "C" LLVMValueRef
+LLVMRustGetOrInsertGlobalInAddrspace(LLVMModuleRef M, const char *Name,
+                                     size_t NameLen, LLVMTypeRef Ty,
+                                     unsigned AddressSpace) {
   Module *Mod = unwrap(M);
   auto NameRef = StringRef(Name, NameLen);
 
@@ -223,10 +223,21 @@ extern "C" LLVMValueRef LLVMRustGetOrInsertGlobal(LLVMModuleRef M,
   GlobalVariable *GV = Mod->getGlobalVariable(NameRef, true);
   if (!GV)
     GV = new GlobalVariable(*Mod, unwrap(Ty), false,
-                            GlobalValue::ExternalLinkage, nullptr, NameRef);
+                            GlobalValue::ExternalLinkage, nullptr, NameRef,
+                            nullptr, GlobalValue::NotThreadLocal, AddressSpace);
   return wrap(GV);
 }
 
+extern "C" LLVMValueRef LLVMRustGetOrInsertGlobal(LLVMModuleRef M,
+                                                  const char *Name,
+                                                  size_t NameLen,
+                                                  LLVMTypeRef Ty) {
+  Module *Mod = unwrap(M);
+  unsigned AddressSpace = Mod->getDataLayout().getDefaultGlobalsAddressSpace();
+  return LLVMRustGetOrInsertGlobalInAddrspace(M, Name, NameLen, Ty,
+                                              AddressSpace);
+}
+
 extern "C" LLVMValueRef LLVMRustInsertPrivateGlobal(LLVMModuleRef M,
                                                     LLVMTypeRef Ty) {
   return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,

diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
@@ -903,6 +903,7 @@ symbols! {
         dyn_star,
         dyn_trait,
         dynamic_no_pic: "dynamic-no-pic",
+        dynamic_shared_memory,
         e,
         edition_panic,
         effective_target_features,

diff --git a/library/core/src/intrinsics/mod.rs b/library/core/src/intrinsics/mod.rs
@@ -3238,6 +3238,23 @@ pub(crate) const fn miri_promise_symbolic_alignment(ptr: *const (), align: usize
     )
 }
 
+/// Returns a pointer to dynamic shared memory.
+///
+/// The returned pointer is the start of the dynamic shared memory region.
+/// All pointers returned by `dynamic_shared_memory` point to the same address,
+/// so alias the same memory.
+/// The returned pointer is aligned by at least the alignment of `T`.
+///
+/// # Other APIs
+///
+/// CUDA and HIP call this shared memory.
+/// OpenCL and SYCL call this local memory.
+#[rustc_intrinsic]
+#[rustc_nounwind]
+#[unstable(feature = "dynamic_shared_memory", issue = "135513")]
+#[cfg(any(target_arch = "amdgpu", target_arch = "nvptx64"))]
+pub fn dynamic_shared_memory<T: ?Sized>() -> *mut T;
+
 /// Copies the current location of arglist `src` to the arglist `dst`.
 ///
 /// FIXME: document safety requirements

diff --git a/src/bootstrap/src/core/build_steps/compile.rs b/src/bootstrap/src/core/build_steps/compile.rs
@@ -668,6 +668,14 @@ pub fn std_cargo(builder: &Builder<'_>, target: TargetSelection, cargo: &mut Car
         cargo.rustflag("-Cforce-unwind-tables=yes");
     }
 
+    // amdgcn must have a cpu specified, otherwise it refuses to compile.
+    // We want to be able to run tests for amdgcn that depend on core, therefore
+    // we need to be able to compiler core.
+    // The cpu used here must match in tests that use the standard library.
+    if target.contains("amdgcn") && target.file.is_none() {
+        cargo.rustflag("-Ctarget-cpu=gfx900");
+    }
+
     // Enable frame pointers by default for the library. Note that they are still controlled by a
     // separate setting for the compiler.
     cargo.rustflag("-Zunstable-options");

diff --git a/src/build_helper/src/targets.rs b/src/build_helper/src/targets.rs
@@ -6,6 +6,7 @@
 // `compiletest`.
 pub fn target_supports_std(target_tuple: &str) -> bool {
     !(target_tuple.contains("-none")
+        || target_tuple.contains("amdgcn")
         || target_tuple.contains("nvptx")
         || target_tuple.contains("switch"))
 }
diff --git a/src/tools/compiletest/src/directives/directive_names.rs b/src/tools/compiletest/src/directives/directive_names.rs
@@ -188,6 +188,7 @@ pub(crate) const KNOWN_DIRECTIVE_NAMES: &[&str] = &[
     "only-aarch64",
     "only-aarch64-apple-darwin",
     "only-aarch64-unknown-linux-gnu",
+    "only-amdgpu",
     "only-apple",
     "only-arm",
     "only-avr",

diff --git a/tests/codegen-llvm/dynamic_shared_memory.rs b/tests/codegen-llvm/dynamic_shared_memory.rs
@@ -0,0 +1,27 @@
+// Checks that dynamic_shared_memory works.
+
+//@ revisions: amdgpu nvptx x86
+//@ compile-flags: --crate-type=rlib
+//
+//@ [amdgpu] compile-flags: --target amdgcn-amd-amdhsa -Ctarget-cpu=gfx900
+//@ [amdgpu] only-amdgpu
+//@ [amdgpu] needs-llvm-components: amdgpu
+//@ [nvptx] compile-flags: --target nvptx64-nvidia-cuda
+//@ [nvptx] only-nvptx64
+//@ [nvptx] needs-llvm-components: nvptx
+//@ [x86] compile-flags: --target x86_64-unknown-linux-gnu
+//@ [x86] only-x86_64
+//@ [x86] needs-llvm-components: x86
+//@ [x86] should-fail
+#![feature(core_intrinsics, dynamic_shared_memory)]
+#![no_std]
+
+use core::intrinsics::dynamic_shared_memory;
+
+// CHECK: @dynamic_shared_memory = external addrspace(3) global [0 x i8], align 8
+// CHECK: ret ptr addrspacecast (ptr addrspace(3) @dynamic_shared_memory to ptr)
+pub fn fun() -> *mut i32 {
+    let res = dynamic_shared_memory::<i32>();
+    dynamic_shared_memory::<f64>(); // Increase alignment to 8
+    res
+}