Skip to content

add nvptx_target_feature #138689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions compiler/rustc_codegen_llvm/src/llvm_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
// Filter out features that are not supported by the current LLVM version
("aarch64", "fpmr") => None, // only existed in 18
("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
// NVPTX targets added in LLVM 20
("nvptx64", "sm_100") if get_version().0 < 20 => None,
("nvptx64", "sm_100a") if get_version().0 < 20 => None,
("nvptx64", "sm_101") if get_version().0 < 20 => None,
("nvptx64", "sm_101a") if get_version().0 < 20 => None,
("nvptx64", "sm_120") if get_version().0 < 20 => None,
("nvptx64", "sm_120a") if get_version().0 < 20 => None,
("nvptx64", "ptx86") if get_version().0 < 20 => None,
("nvptx64", "ptx87") if get_version().0 < 20 => None,
// Filter out features that are not supported by the current LLVM version
("riscv32" | "riscv64", "zacas") if get_version().0 < 20 => None,
// Enable the evex512 target feature if an avx512 target feature is enabled.
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_feature/src/unstable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ declare_features! (
(unstable, m68k_target_feature, "1.85.0", Some(134328)),
(unstable, mips_target_feature, "1.27.0", Some(44839)),
(unstable, movrs_target_feature, "CURRENT_RUSTC_VERSION", Some(137976)),
(unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)),
(unstable, powerpc_target_feature, "1.27.0", Some(44839)),
(unstable, prfchw_target_feature, "1.78.0", Some(44839)),
(unstable, riscv_target_feature, "1.45.0", Some(44839)),
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1472,6 +1472,7 @@ symbols! {
not,
notable_trait,
note,
nvptx_target_feature,
object_safe_for_dispatch,
of,
off,
Expand Down
67 changes: 67 additions & 0 deletions compiler/rustc_target/src/target_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,70 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-end
];

const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-start
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]),
("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]),
("sm_90", Unstable(sym::nvptx_target_feature), &["sm_87"]),
("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]),
// tidy-alphabetical-end
// tidy-alphabetical-start
("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]),
("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]),
("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]),
("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]),
("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]),
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
// tidy-alphabetical-end
// tidy-alphabetical-start
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),
("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]),
("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]),
("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]),
("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]),
("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]),
("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]),
("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]),
("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]),
("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]),
("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]),
("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]),
("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]),
("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]),
// tidy-alphabetical-end
];

static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-start
("a", Stable, &["zaamo", "zalrsc"]),
Expand Down Expand Up @@ -726,6 +790,7 @@ pub fn all_rust_features() -> impl Iterator<Item = (&'static str, Stability)> {
.chain(HEXAGON_FEATURES.iter())
.chain(POWERPC_FEATURES.iter())
.chain(MIPS_FEATURES.iter())
.chain(NVPTX_FEATURES.iter())
.chain(RISCV_FEATURES.iter())
.chain(WASM_FEATURES.iter())
.chain(BPF_FEATURES.iter())
Expand Down Expand Up @@ -791,6 +856,7 @@ impl Target {
"x86" | "x86_64" => X86_FEATURES,
"hexagon" => HEXAGON_FEATURES,
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES,
"nvptx64" => NVPTX_FEATURES,
"powerpc" | "powerpc64" => POWERPC_FEATURES,
"riscv32" | "riscv64" => RISCV_FEATURES,
"wasm32" | "wasm64" => WASM_FEATURES,
Expand All @@ -817,6 +883,7 @@ impl Target {
"sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI,
"hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI,
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI,
"nvptx64" => &[], // no vector ABI
"bpf" | "m68k" => &[], // no vector ABI
"csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI,
// FIXME: for some tier3 targets, we are overly cautious and always give warnings
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@
//
// Target features:
// tidy-alphabetical-start
#![cfg_attr(not(bootstrap), feature(nvptx_target_feature))]
#![feature(aarch64_unstable_target_feature)]
#![feature(arm_target_feature)]
#![feature(avx512_target_feature)]
Expand Down
34 changes: 34 additions & 0 deletions src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,40 @@ platform.
- Riccardo D'Ambrosio, https://github.com/RDambrosio016
- Kjetil Kjeka, https://github.com/kjetilkjeka

## Requirements

This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX.
The necessary components for this workflow are:

- `rustup toolchain add nightly`
- `rustup component add llvm-tools --toolchain nightly`
- `rustup component add llvm-bitcode-linker --toolchain nightly`

There are two options for using the core library:

- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`.
- `rustup target add nvptx64-nvidia-cuda --toolchain nightly`

### Target and features

It is necessary to specify the target, such as `-C target-cpu=sm_89`. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default `ptx78` requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).

Although `ptx*` is represented as a target feature, it is a compile-time property and it is not possible to build a crate that uses instructions not present in the PTX version specified at compile-time (either via `target-cpu` or `target-feature`).
For example, consider an unaligned barrier `barrier.sync`, which requires both `sm_70` and `ptx60`.
If one wants to support building for older devices (e.g., `-C target-cpu=sm_62`; ensuring that this unaligned barrier is unreachable at run-time on such devices), the relevant function could use attributes:
```
#[cfg(target_feature = "ptx60")]
#[target_feature(enable = "sm_70")]
```

## Building Rust kernels

A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.

```console
$ cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Ctarget-cpu=sm_89 -Zunstable-options
```
Comment on lines +42 to +46
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a few moving parts to complete kernels and how to use those on the host. That seems like too much for these docs, and would be better as a stand-alone repo or an example in cudarc. Let me know if you have other suggestions (or examples for other arches) that I should consider.

<!-- FIXME: fill this out

## Requirements
Expand Down
55 changes: 55 additions & 0 deletions tests/ui/check-cfg/target_feature.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,35 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`power9-altivec`
`power9-vector`
`prfchw`
`ptx32`
`ptx40`
`ptx41`
`ptx42`
`ptx43`
`ptx50`
`ptx60`
`ptx61`
`ptx62`
`ptx63`
`ptx64`
`ptx65`
`ptx70`
`ptx71`
`ptx72`
`ptx73`
`ptx74`
`ptx75`
`ptx76`
`ptx77`
`ptx78`
`ptx80`
`ptx81`
`ptx82`
`ptx83`
`ptx84`
`ptx85`
`ptx86`
`ptx87`
`quadword-atomics`
`rand`
`ras`
Expand All @@ -209,6 +238,32 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`simd128`
`sm3`
`sm4`
`sm_100`
`sm_100a`
`sm_101`
`sm_101a`
`sm_120`
`sm_120a`
`sm_20`
`sm_21`
`sm_30`
`sm_32`
`sm_35`
`sm_37`
`sm_50`
`sm_52`
`sm_53`
`sm_60`
`sm_61`
`sm_62`
`sm_70`
`sm_72`
`sm_75`
`sm_80`
`sm_86`
`sm_87`
`sm_90`
`sm_90a`
`sme`
`sme-b16b16`
`sme-f16f16`
Expand Down
1 change: 1 addition & 0 deletions tests/ui/target-feature/gate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// gate-test-arm_target_feature
// gate-test-hexagon_target_feature
// gate-test-mips_target_feature
// gate-test-nvptx_target_feature
// gate-test-wasm_target_feature
// gate-test-adx_target_feature
// gate-test-cmpxchg16b_target_feature
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/target-feature/gate.stderr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
error[E0658]: the target feature `avx512bw` is currently unstable
--> $DIR/gate.rs:30:18
--> $DIR/gate.rs:31:18
|
LL | #[target_feature(enable = "avx512bw")]
| ^^^^^^^^^^^^^^^^^^^
Expand Down
Loading