diff options
| author | Stuart Cook <Zalathar@users.noreply.github.com> | 2025-08-07 20:49:36 +1000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-07 20:49:36 +1000 |
| commit | 1cd368a744e556d48e698670cc97928f5e320f70 (patch) | |
| tree | 8319dc1f836c0e622ab176ef07dd30ce7ba42e1b | |
| parent | bcd50fd45fb92a6c96af7e91893488bd4499153c (diff) | |
| parent | 35a485ddd86229101c4c17d9167f23cf75cae644 (diff) | |
| download | rust-1cd368a744e556d48e698670cc97928f5e320f70.tar.gz rust-1cd368a744e556d48e698670cc97928f5e320f70.zip | |
Rollup merge of #138689 - jedbrown:jed/nvptx-target-feature, r=ZuseZ4
add nvptx_target_feature Tracking issue: #141468 (nvptx), which is part of #44839 (catch-all arches) The feature gate is `#![feature(nvptx_target_feature)]` This exposes the target features `sm_20` through `sm_120a` [as defined](https://github.com/llvm/llvm-project/blob/llvmorg-20.1.1/llvm/lib/Target/NVPTX/NVPTX.td#L59-L85) by LLVM. Cc: ``````@gonzalobg`````` ``````@rustbot`````` label +O-NVPTX +A-target-feature
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/llvm_util.rs | 18 | ||||
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/target_features.rs | 15 | ||||
| -rw-r--r-- | compiler/rustc_feature/src/unstable.rs | 1 | ||||
| -rw-r--r-- | compiler/rustc_span/src/symbol.rs | 1 | ||||
| -rw-r--r-- | compiler/rustc_target/src/target_features.rs | 69 | ||||
| -rw-r--r-- | library/core/src/lib.rs | 1 | ||||
| -rw-r--r-- | src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md | 40 | ||||
| -rw-r--r-- | tests/ui/check-cfg/target_feature.stderr | 56 | ||||
| -rw-r--r-- | tests/ui/target-feature/gate.rs | 1 | ||||
| -rw-r--r-- | tests/ui/target-feature/gate.stderr | 2 | ||||
| -rw-r--r-- | tests/ui/target-feature/implied-features-nvptx.rs | 28 |
11 files changed, 222 insertions, 10 deletions
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 53899da183a..3b290e5a129 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -262,6 +262,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea // Filter out features that are not supported by the current LLVM version ("aarch64", "fpmr") => None, // only existed in 18 ("arm", "fp16") => Some(LLVMFeature::new("fullfp16")), + // NVPTX targets added in LLVM 20 + ("nvptx64", "sm_100") if get_version().0 < 20 => None, + ("nvptx64", "sm_100a") if get_version().0 < 20 => None, + ("nvptx64", "sm_101") if get_version().0 < 20 => None, + ("nvptx64", "sm_101a") if get_version().0 < 20 => None, + ("nvptx64", "sm_120") if get_version().0 < 20 => None, + ("nvptx64", "sm_120a") if get_version().0 < 20 => None, + ("nvptx64", "ptx86") if get_version().0 < 20 => None, + ("nvptx64", "ptx87") if get_version().0 < 20 => None, // Filter out features that are not supported by the current LLVM version ("loongarch64", "div32" | "lam-bh" | "lamcas" | "ld-seq-sa" | "scq") if get_version().0 < 20 => @@ -324,15 +333,12 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea /// /// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen. pub(crate) fn target_config(sess: &Session) -> TargetConfig { - // Add base features for the target. - // We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below. - // The reason is that if LLVM considers a feature implied but we do not, we don't want that to - // show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of - // the target CPU, that is still expanded to target features (with all their implied features) - // by LLVM. let target_machine = create_informational_target_machine(sess, true); let (unstable_target_features, target_features) = cfg_target_feature(sess, |feature| { + // This closure determines whether the target CPU has the feature according to LLVM. We do + // *not* consider the `-Ctarget-feature`s here, as that will be handled later in + // `cfg_target_feature`. if let Some(feat) = to_llvm_features(sess, feature) { // All the LLVM features this expands to must be enabled. for llvm_feature in feat { diff --git a/compiler/rustc_codegen_ssa/src/target_features.rs b/compiler/rustc_codegen_ssa/src/target_features.rs index d984156c674..7e4341a8236 100644 --- a/compiler/rustc_codegen_ssa/src/target_features.rs +++ b/compiler/rustc_codegen_ssa/src/target_features.rs @@ -197,7 +197,10 @@ fn parse_rust_feature_flag<'a>( /// 2nd component of the return value, respectively). /// /// `target_base_has_feature` should check whether the given feature (a Rust feature name!) is -/// enabled in the "base" target machine, i.e., without applying `-Ctarget-feature`. +/// enabled in the "base" target machine, i.e., without applying `-Ctarget-feature`. Note that LLVM +/// may consider features to be implied that we do not and vice-versa. We want `cfg` to be entirely +/// consistent with Rust feature implications, and thus only consult LLVM to expand the target CPU +/// to target features. /// /// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled elsewhere. pub fn cfg_target_feature( @@ -211,7 +214,15 @@ pub fn cfg_target_feature( .rust_target_features() .iter() .filter(|(feature, _, _)| target_base_has_feature(feature)) - .map(|(feature, _, _)| Symbol::intern(feature)) + .flat_map(|(base_feature, _, _)| { + // Expand the direct base feature into all transitively-implied features. Note that we + // cannot simply use the `implied` field of the tuple since that only contains + // directly-implied features. + // + // Iteration order is irrelevant because we're collecting into an `UnordSet`. + #[allow(rustc::potential_query_instability)] + sess.target.implied_target_features(base_feature).into_iter().map(|f| Symbol::intern(f)) + }) .collect(); // Add enabled and remove disabled features. diff --git a/compiler/rustc_feature/src/unstable.rs b/compiler/rustc_feature/src/unstable.rs index 1303b3317e0..266a940b054 100644 --- a/compiler/rustc_feature/src/unstable.rs +++ b/compiler/rustc_feature/src/unstable.rs @@ -327,6 +327,7 @@ declare_features! ( (unstable, m68k_target_feature, "1.85.0", Some(134328)), (unstable, mips_target_feature, "1.27.0", Some(44839)), (unstable, movrs_target_feature, "1.88.0", Some(137976)), + (unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)), (unstable, powerpc_target_feature, "1.27.0", Some(44839)), (unstable, prfchw_target_feature, "1.78.0", Some(44839)), (unstable, riscv_target_feature, "1.45.0", Some(44839)), diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index d54175548e3..752a3355718 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1512,6 +1512,7 @@ symbols! { not, notable_trait, note, + nvptx_target_feature, object_safe_for_dispatch, of, off, diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs index b2af99228fe..297d9ed84c5 100644 --- a/compiler/rustc_target/src/target_features.rs +++ b/compiler/rustc_target/src/target_features.rs @@ -517,6 +517,71 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ // tidy-alphabetical-end ]; +const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ + // tidy-alphabetical-start + ("sm_20", Unstable(sym::nvptx_target_feature), &[]), + ("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]), + ("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]), + ("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]), + ("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]), + ("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]), + ("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]), + ("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]), + ("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]), + ("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]), + ("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]), + ("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]), + ("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]), + ("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]), + ("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]), + ("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]), + ("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]), + ("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]), + ("sm_89", Unstable(sym::nvptx_target_feature), &["sm_87"]), + ("sm_90", Unstable(sym::nvptx_target_feature), &["sm_89"]), + ("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]), + // tidy-alphabetical-end + // tidy-alphabetical-start + ("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]), + ("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]), + ("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]), + ("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]), + ("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]), + ("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]), + // tidy-alphabetical-end + // tidy-alphabetical-start + ("ptx32", Unstable(sym::nvptx_target_feature), &[]), + ("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]), + ("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]), + ("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]), + ("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]), + ("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]), + ("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]), + ("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]), + ("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]), + ("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]), + ("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]), + ("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]), + ("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]), + ("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]), + ("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]), + ("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]), + ("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]), + ("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]), + ("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]), + ("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]), + ("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]), + ("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]), + ("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]), + ("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]), + ("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]), + ("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]), + ("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]), + ("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]), + ("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]), + // tidy-alphabetical-end +]; + static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ // tidy-alphabetical-start ("a", Stable, &["zaamo", "zalrsc"]), @@ -782,6 +847,7 @@ pub fn all_rust_features() -> impl Iterator<Item = (&'static str, Stability)> { .chain(HEXAGON_FEATURES.iter()) .chain(POWERPC_FEATURES.iter()) .chain(MIPS_FEATURES.iter()) + .chain(NVPTX_FEATURES.iter()) .chain(RISCV_FEATURES.iter()) .chain(WASM_FEATURES.iter()) .chain(BPF_FEATURES.iter()) @@ -847,6 +913,7 @@ impl Target { "x86" | "x86_64" => X86_FEATURES, "hexagon" => HEXAGON_FEATURES, "mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES, + "nvptx64" => NVPTX_FEATURES, "powerpc" | "powerpc64" => POWERPC_FEATURES, "riscv32" | "riscv64" => RISCV_FEATURES, "wasm32" | "wasm64" => WASM_FEATURES, @@ -873,7 +940,7 @@ impl Target { "sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI, "hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI, "mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI, - "bpf" | "m68k" => &[], // no vector ABI + "nvptx64" | "bpf" | "m68k" => &[], // no vector ABI "csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI, // FIXME: for some tier3 targets, we are overly cautious and always give warnings // when passing args in vector registers. diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 3c33f4b1368..d5bce6ad233 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -195,6 +195,7 @@ #![feature(hexagon_target_feature)] #![feature(loongarch_target_feature)] #![feature(mips_target_feature)] +#![feature(nvptx_target_feature)] #![feature(powerpc_target_feature)] #![feature(riscv_target_feature)] #![feature(rtm_target_feature)] diff --git a/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md b/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md index 106ec562bfc..36598982481 100644 --- a/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md +++ b/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md @@ -10,6 +10,46 @@ platform. [@RDambrosio016](https://github.com/RDambrosio016) [@kjetilkjeka](https://github.com/kjetilkjeka) +## Requirements + +This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX. +The necessary components for this workflow are: + +- `rustup toolchain add nightly` +- `rustup component add llvm-tools --toolchain nightly` +- `rustup component add llvm-bitcode-linker --toolchain nightly` + +There are two options for using the core library: + +- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`. +- `rustup target add nvptx64-nvidia-cuda --toolchain nightly` + +### Target and features + +It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility. +One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0). +Later PTX versions may allow more efficient code generation. + +Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`). +While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future. + +## Building Rust kernels + +A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following. + +```console +$ RUSTFLAGS='-Ctarget-cpu=sm_89' cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Zunstable-options +``` + +Intrinsics in `core::arch::nvptx` may use `#[cfg(target_feature = "...")]`, thus it's necessary to use `-Zbuild-std=core` with appropriate `RUSTFLAGS`. The following components are needed for this workflow: + +```console +$ rustup component add rust-src --toolchain nightly +$ rustup component add llvm-tools --toolchain nightly +$ rustup component add llvm-bitcode-linker --toolchain nightly +``` + + <!-- FIXME: fill this out ## Requirements diff --git a/tests/ui/check-cfg/target_feature.stderr b/tests/ui/check-cfg/target_feature.stderr index f422919983b..44fc23b6390 100644 --- a/tests/ui/check-cfg/target_feature.stderr +++ b/tests/ui/check-cfg/target_feature.stderr @@ -198,6 +198,35 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE"); `power9-altivec` `power9-vector` `prfchw` +`ptx32` +`ptx40` +`ptx41` +`ptx42` +`ptx43` +`ptx50` +`ptx60` +`ptx61` +`ptx62` +`ptx63` +`ptx64` +`ptx65` +`ptx70` +`ptx71` +`ptx72` +`ptx73` +`ptx74` +`ptx75` +`ptx76` +`ptx77` +`ptx78` +`ptx80` +`ptx81` +`ptx82` +`ptx83` +`ptx84` +`ptx85` +`ptx86` +`ptx87` `quadword-atomics` `rand` `ras` @@ -222,6 +251,33 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE"); `simd128` `sm3` `sm4` +`sm_100` +`sm_100a` +`sm_101` +`sm_101a` +`sm_120` +`sm_120a` +`sm_20` +`sm_21` +`sm_30` +`sm_32` +`sm_35` +`sm_37` +`sm_50` +`sm_52` +`sm_53` +`sm_60` +`sm_61` +`sm_62` +`sm_70` +`sm_72` +`sm_75` +`sm_80` +`sm_86` +`sm_87` +`sm_89` +`sm_90` +`sm_90a` `sme` `sme-b16b16` `sme-f16f16` diff --git a/tests/ui/target-feature/gate.rs b/tests/ui/target-feature/gate.rs index 9244a98d82f..81ed8b3de76 100644 --- a/tests/ui/target-feature/gate.rs +++ b/tests/ui/target-feature/gate.rs @@ -6,6 +6,7 @@ // gate-test-arm_target_feature // gate-test-hexagon_target_feature // gate-test-mips_target_feature +// gate-test-nvptx_target_feature // gate-test-wasm_target_feature // gate-test-adx_target_feature // gate-test-cmpxchg16b_target_feature diff --git a/tests/ui/target-feature/gate.stderr b/tests/ui/target-feature/gate.stderr index 32d60ce4382..3e9374be73d 100644 --- a/tests/ui/target-feature/gate.stderr +++ b/tests/ui/target-feature/gate.stderr @@ -1,5 +1,5 @@ error[E0658]: the target feature `x87` is currently unstable - --> $DIR/gate.rs:29:18 + --> $DIR/gate.rs:30:18 | LL | #[target_feature(enable = "x87")] | ^^^^^^^^^^^^^^ diff --git a/tests/ui/target-feature/implied-features-nvptx.rs b/tests/ui/target-feature/implied-features-nvptx.rs new file mode 100644 index 00000000000..1550c99f67a --- /dev/null +++ b/tests/ui/target-feature/implied-features-nvptx.rs @@ -0,0 +1,28 @@ +//@ assembly-output: ptx-linker +//@ compile-flags: --crate-type cdylib -C target-cpu=sm_80 -Z unstable-options -Clinker-flavor=llbc +//@ only-nvptx64 +//@ build-pass +#![no_std] +#![allow(dead_code)] + +#[panic_handler] +pub fn panic(_info: &core::panic::PanicInfo) -> ! { + loop {} +} + +// -Ctarget-cpu=sm_80 directly enables sm_80 and ptx70 +#[cfg(not(all(target_feature = "sm_80", target_feature = "ptx70")))] +compile_error!("direct target features not enabled"); + +// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features. +#[cfg(not(all( + target_feature = "sm_60", + target_feature = "sm_70", + target_feature = "ptx50", + target_feature = "ptx60", +)))] +compile_error!("implied target features not enabled"); + +// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features. +#[cfg(target_feature = "ptx71")] +compile_error!("sm_80 requires only ptx70, but ptx71 enabled"); |
