From 118f5a91af1f5472f42fee8bbcd19c6abdea80e3 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Wed, 26 Feb 2025 20:06:25 -0700 Subject: [PATCH] add nvptx_target_feature Add target features for sm_* and ptx*, both of which form a partial order, but cannot be combined to a single partial order. These mirror the LLVM target features, but we do not provide LLVM target processors (which imply both an sm_* and ptx* feature). Add some documentation for the nvptx target. --- compiler/rustc_codegen_llvm/src/llvm_util.rs | 9 +++ compiler/rustc_feature/src/unstable.rs | 1 + compiler/rustc_span/src/symbol.rs | 1 + compiler/rustc_target/src/target_features.rs | 67 +++++++++++++++++++ library/core/src/lib.rs | 1 + .../platform-support/nvptx64-nvidia-cuda.md | 34 ++++++++++ tests/ui/check-cfg/target_feature.stderr | 55 +++++++++++++++ tests/ui/target-feature/gate.rs | 1 + tests/ui/target-feature/gate.stderr | 2 +- 9 files changed, 170 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 36e35f81392bc..a4ad3bd238862 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -271,6 +271,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option None, // only existed in 18 ("arm", "fp16") => Some(LLVMFeature::new("fullfp16")), + // NVPTX targets added in LLVM 20 + ("nvptx64", "sm_100") if get_version().0 < 20 => None, + ("nvptx64", "sm_100a") if get_version().0 < 20 => None, + ("nvptx64", "sm_101") if get_version().0 < 20 => None, + ("nvptx64", "sm_101a") if get_version().0 < 20 => None, + ("nvptx64", "sm_120") if get_version().0 < 20 => None, + ("nvptx64", "sm_120a") if get_version().0 < 20 => None, + ("nvptx64", "ptx86") if get_version().0 < 20 => None, + ("nvptx64", "ptx87") if get_version().0 < 20 => None, // Filter out features that are not supported by the current LLVM version ("riscv32" | "riscv64", "zacas") if get_version().0 < 20 => None, // Enable the evex512 target feature if an avx512 target feature is enabled. diff --git a/compiler/rustc_feature/src/unstable.rs b/compiler/rustc_feature/src/unstable.rs index 7edb05c7307b5..3c968ac7d9ed5 100644 --- a/compiler/rustc_feature/src/unstable.rs +++ b/compiler/rustc_feature/src/unstable.rs @@ -325,6 +325,7 @@ declare_features! ( (unstable, m68k_target_feature, "1.85.0", Some(134328)), (unstable, mips_target_feature, "1.27.0", Some(44839)), (unstable, movrs_target_feature, "CURRENT_RUSTC_VERSION", Some(137976)), + (unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)), (unstable, powerpc_target_feature, "1.27.0", Some(44839)), (unstable, prfchw_target_feature, "1.78.0", Some(44839)), (unstable, riscv_target_feature, "1.45.0", Some(44839)), diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 31847ae3b4658..7520e3db8c3e6 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1472,6 +1472,7 @@ symbols! { not, notable_trait, note, + nvptx_target_feature, object_safe_for_dispatch, of, off, diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs index b4ec1879fed5c..d56649ed6e116 100644 --- a/compiler/rustc_target/src/target_features.rs +++ b/compiler/rustc_target/src/target_features.rs @@ -488,6 +488,70 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ // tidy-alphabetical-end ]; +const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ + // tidy-alphabetical-start + ("sm_20", Unstable(sym::nvptx_target_feature), &[]), + ("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]), + ("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]), + ("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]), + ("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]), + ("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]), + ("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]), + ("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]), + ("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]), + ("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]), + ("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]), + ("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]), + ("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]), + ("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]), + ("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]), + ("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]), + ("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]), + ("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]), + ("sm_90", Unstable(sym::nvptx_target_feature), &["sm_87"]), + ("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]), + // tidy-alphabetical-end + // tidy-alphabetical-start + ("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]), + ("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]), + ("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]), + ("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]), + ("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]), + ("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]), + // tidy-alphabetical-end + // tidy-alphabetical-start + ("ptx32", Unstable(sym::nvptx_target_feature), &[]), + ("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]), + ("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]), + ("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]), + ("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]), + ("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]), + ("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]), + ("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]), + ("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]), + ("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]), + ("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]), + ("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]), + ("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]), + ("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]), + ("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]), + ("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]), + ("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]), + ("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]), + ("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]), + ("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]), + ("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]), + ("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]), + ("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]), + ("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]), + ("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]), + ("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]), + ("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]), + ("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]), + ("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]), + // tidy-alphabetical-end +]; + static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ // tidy-alphabetical-start ("a", Stable, &["zaamo", "zalrsc"]), @@ -726,6 +790,7 @@ pub fn all_rust_features() -> impl Iterator { .chain(HEXAGON_FEATURES.iter()) .chain(POWERPC_FEATURES.iter()) .chain(MIPS_FEATURES.iter()) + .chain(NVPTX_FEATURES.iter()) .chain(RISCV_FEATURES.iter()) .chain(WASM_FEATURES.iter()) .chain(BPF_FEATURES.iter()) @@ -791,6 +856,7 @@ impl Target { "x86" | "x86_64" => X86_FEATURES, "hexagon" => HEXAGON_FEATURES, "mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES, + "nvptx64" => NVPTX_FEATURES, "powerpc" | "powerpc64" => POWERPC_FEATURES, "riscv32" | "riscv64" => RISCV_FEATURES, "wasm32" | "wasm64" => WASM_FEATURES, @@ -817,6 +883,7 @@ impl Target { "sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI, "hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI, "mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI, + "nvptx64" => &[], // no vector ABI "bpf" | "m68k" => &[], // no vector ABI "csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI, // FIXME: for some tier3 targets, we are overly cautious and always give warnings diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index dc06aa4c38d55..da9910d17c2df 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -190,6 +190,7 @@ // // Target features: // tidy-alphabetical-start +#![cfg_attr(not(bootstrap), feature(nvptx_target_feature))] #![feature(aarch64_unstable_target_feature)] #![feature(arm_target_feature)] #![feature(avx512_target_feature)] diff --git a/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md b/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md index ab8641ff69ae2..f4a3c1f9ed49d 100644 --- a/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md +++ b/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md @@ -10,6 +10,40 @@ platform. - Riccardo D'Ambrosio, https://github.com/RDambrosio016 - Kjetil Kjeka, https://github.com/kjetilkjeka +## Requirements + +This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX. +The necessary components for this workflow are: + +- `rustup toolchain add nightly` +- `rustup component add llvm-tools --toolchain nightly` +- `rustup component add llvm-bitcode-linker --toolchain nightly` + +There are two options for using the core library: + +- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`. +- `rustup target add nvptx64-nvidia-cuda --toolchain nightly` + +### Target and features + +It is necessary to specify the target, such as `-C target-cpu=sm_89`. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility. +One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default `ptx78` requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0). + +Although `ptx*` is represented as a target feature, it is a compile-time property and it is not possible to build a crate that uses instructions not present in the PTX version specified at compile-time (either via `target-cpu` or `target-feature`). +For example, consider an unaligned barrier `barrier.sync`, which requires both `sm_70` and `ptx60`. +If one wants to support building for older devices (e.g., `-C target-cpu=sm_62`; ensuring that this unaligned barrier is unreachable at run-time on such devices), the relevant function could use attributes: +``` +#[cfg(target_feature = "ptx60")] +#[target_feature(enable = "sm_70")] +``` + +## Building Rust kernels + +A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following. + +```console +$ cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Ctarget-cpu=sm_89 -Zunstable-options +``` $DIR/gate.rs:30:18 + --> $DIR/gate.rs:31:18 | LL | #[target_feature(enable = "avx512bw")] | ^^^^^^^^^^^^^^^^^^^