about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs9
-rw-r--r--compiler/rustc_feature/src/unstable.rs1
-rw-r--r--compiler/rustc_span/src/symbol.rs1
-rw-r--r--compiler/rustc_target/src/target_features.rs69
-rw-r--r--library/core/src/lib.rs1
-rw-r--r--src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md40
-rw-r--r--tests/ui/check-cfg/target_feature.stderr56
-rw-r--r--tests/ui/target-feature/gate.rs1
-rw-r--r--tests/ui/target-feature/gate.stderr2
9 files changed, 178 insertions, 2 deletions
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 6fd07d562af..202b9641e56 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -262,6 +262,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         // Filter out features that are not supported by the current LLVM version
         ("aarch64", "fpmr") => None, // only existed in 18
         ("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
+        // NVPTX targets added in LLVM 20
+        ("nvptx64", "sm_100") if get_version().0 < 20 => None,
+        ("nvptx64", "sm_100a") if get_version().0 < 20 => None,
+        ("nvptx64", "sm_101") if get_version().0 < 20 => None,
+        ("nvptx64", "sm_101a") if get_version().0 < 20 => None,
+        ("nvptx64", "sm_120") if get_version().0 < 20 => None,
+        ("nvptx64", "sm_120a") if get_version().0 < 20 => None,
+        ("nvptx64", "ptx86") if get_version().0 < 20 => None,
+        ("nvptx64", "ptx87") if get_version().0 < 20 => None,
         // Filter out features that are not supported by the current LLVM version
         ("loongarch64", "div32" | "lam-bh" | "lamcas" | "ld-seq-sa" | "scq")
             if get_version().0 < 20 =>
diff --git a/compiler/rustc_feature/src/unstable.rs b/compiler/rustc_feature/src/unstable.rs
index 91715851226..bc48b45bce1 100644
--- a/compiler/rustc_feature/src/unstable.rs
+++ b/compiler/rustc_feature/src/unstable.rs
@@ -329,6 +329,7 @@ declare_features! (
     (unstable, m68k_target_feature, "1.85.0", Some(134328)),
     (unstable, mips_target_feature, "1.27.0", Some(44839)),
     (unstable, movrs_target_feature, "1.88.0", Some(137976)),
+    (unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)),
     (unstable, powerpc_target_feature, "1.27.0", Some(44839)),
     (unstable, prfchw_target_feature, "1.78.0", Some(44839)),
     (unstable, riscv_target_feature, "1.45.0", Some(44839)),
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index da69f6c4492..b0dd144bf47 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -1509,6 +1509,7 @@ symbols! {
         not,
         notable_trait,
         note,
+        nvptx_target_feature,
         object_safe_for_dispatch,
         of,
         off,
diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs
index 3eea1e070a6..3449c16ee4a 100644
--- a/compiler/rustc_target/src/target_features.rs
+++ b/compiler/rustc_target/src/target_features.rs
@@ -517,6 +517,71 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
     // tidy-alphabetical-end
 ];
 
+const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
+    // tidy-alphabetical-start
+    ("sm_20", Unstable(sym::nvptx_target_feature), &[]),
+    ("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
+    ("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
+    ("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
+    ("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
+    ("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
+    ("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
+    ("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
+    ("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
+    ("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
+    ("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
+    ("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
+    ("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
+    ("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
+    ("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
+    ("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
+    ("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]),
+    ("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]),
+    ("sm_89", Unstable(sym::nvptx_target_feature), &["sm_87"]),
+    ("sm_90", Unstable(sym::nvptx_target_feature), &["sm_89"]),
+    ("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]),
+    // tidy-alphabetical-end
+    // tidy-alphabetical-start
+    ("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]),
+    ("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]),
+    ("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]),
+    ("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]),
+    ("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]),
+    ("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
+    // tidy-alphabetical-end
+    // tidy-alphabetical-start
+    ("ptx32", Unstable(sym::nvptx_target_feature), &[]),
+    ("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
+    ("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
+    ("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
+    ("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
+    ("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
+    ("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
+    ("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
+    ("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
+    ("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
+    ("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
+    ("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
+    ("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
+    ("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
+    ("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
+    ("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),
+    ("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]),
+    ("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]),
+    ("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]),
+    ("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]),
+    ("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]),
+    ("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]),
+    ("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]),
+    ("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]),
+    ("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]),
+    ("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]),
+    ("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]),
+    ("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]),
+    ("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]),
+    // tidy-alphabetical-end
+];
+
 static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
     // tidy-alphabetical-start
     ("a", Stable, &["zaamo", "zalrsc"]),
@@ -782,6 +847,7 @@ pub fn all_rust_features() -> impl Iterator<Item = (&'static str, Stability)> {
         .chain(HEXAGON_FEATURES.iter())
         .chain(POWERPC_FEATURES.iter())
         .chain(MIPS_FEATURES.iter())
+        .chain(NVPTX_FEATURES.iter())
         .chain(RISCV_FEATURES.iter())
         .chain(WASM_FEATURES.iter())
         .chain(BPF_FEATURES.iter())
@@ -847,6 +913,7 @@ impl Target {
             "x86" | "x86_64" => X86_FEATURES,
             "hexagon" => HEXAGON_FEATURES,
             "mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES,
+            "nvptx64" => NVPTX_FEATURES,
             "powerpc" | "powerpc64" => POWERPC_FEATURES,
             "riscv32" | "riscv64" => RISCV_FEATURES,
             "wasm32" | "wasm64" => WASM_FEATURES,
@@ -873,7 +940,7 @@ impl Target {
             "sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI,
             "hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI,
             "mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI,
-            "bpf" | "m68k" => &[], // no vector ABI
+            "nvptx64" | "bpf" | "m68k" => &[], // no vector ABI
             "csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI,
             // FIXME: for some tier3 targets, we are overly cautious and always give warnings
             // when passing args in vector registers.
diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs
index 39d5399101d..4ff142bf5d0 100644
--- a/library/core/src/lib.rs
+++ b/library/core/src/lib.rs
@@ -192,6 +192,7 @@
 #![feature(hexagon_target_feature)]
 #![feature(loongarch_target_feature)]
 #![feature(mips_target_feature)]
+#![feature(nvptx_target_feature)]
 #![feature(powerpc_target_feature)]
 #![feature(riscv_target_feature)]
 #![feature(rtm_target_feature)]
diff --git a/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md b/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md
index 106ec562bfc..36598982481 100644
--- a/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md
+++ b/src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md
@@ -10,6 +10,46 @@ platform.
 [@RDambrosio016](https://github.com/RDambrosio016)
 [@kjetilkjeka](https://github.com/kjetilkjeka)
 
+## Requirements
+
+This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX.
+The necessary components for this workflow are:
+
+- `rustup toolchain add nightly`
+- `rustup component add llvm-tools --toolchain nightly`
+- `rustup component add llvm-bitcode-linker --toolchain nightly`
+
+There are two options for using the core library:
+
+- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`.
+- `rustup target add nvptx64-nvidia-cuda --toolchain nightly`
+
+### Target and features
+
+It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
+One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
+Later PTX versions may allow more efficient code generation.
+
+Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`).
+While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future.
+
+## Building Rust kernels
+
+A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.
+
+```console
+$ RUSTFLAGS='-Ctarget-cpu=sm_89' cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Zunstable-options
+```
+
+Intrinsics in `core::arch::nvptx` may use `#[cfg(target_feature = "...")]`, thus it's necessary to use `-Zbuild-std=core` with appropriate `RUSTFLAGS`. The following components are needed for this workflow:
+
+```console
+$ rustup component add rust-src --toolchain nightly
+$ rustup component add llvm-tools --toolchain nightly
+$ rustup component add llvm-bitcode-linker --toolchain nightly
+```
+
+
 <!-- FIXME: fill this out
 
 ## Requirements
diff --git a/tests/ui/check-cfg/target_feature.stderr b/tests/ui/check-cfg/target_feature.stderr
index f422919983b..44fc23b6390 100644
--- a/tests/ui/check-cfg/target_feature.stderr
+++ b/tests/ui/check-cfg/target_feature.stderr
@@ -198,6 +198,35 @@ LL |     cfg!(target_feature = "_UNEXPECTED_VALUE");
 `power9-altivec`
 `power9-vector`
 `prfchw`
+`ptx32`
+`ptx40`
+`ptx41`
+`ptx42`
+`ptx43`
+`ptx50`
+`ptx60`
+`ptx61`
+`ptx62`
+`ptx63`
+`ptx64`
+`ptx65`
+`ptx70`
+`ptx71`
+`ptx72`
+`ptx73`
+`ptx74`
+`ptx75`
+`ptx76`
+`ptx77`
+`ptx78`
+`ptx80`
+`ptx81`
+`ptx82`
+`ptx83`
+`ptx84`
+`ptx85`
+`ptx86`
+`ptx87`
 `quadword-atomics`
 `rand`
 `ras`
@@ -222,6 +251,33 @@ LL |     cfg!(target_feature = "_UNEXPECTED_VALUE");
 `simd128`
 `sm3`
 `sm4`
+`sm_100`
+`sm_100a`
+`sm_101`
+`sm_101a`
+`sm_120`
+`sm_120a`
+`sm_20`
+`sm_21`
+`sm_30`
+`sm_32`
+`sm_35`
+`sm_37`
+`sm_50`
+`sm_52`
+`sm_53`
+`sm_60`
+`sm_61`
+`sm_62`
+`sm_70`
+`sm_72`
+`sm_75`
+`sm_80`
+`sm_86`
+`sm_87`
+`sm_89`
+`sm_90`
+`sm_90a`
 `sme`
 `sme-b16b16`
 `sme-f16f16`
diff --git a/tests/ui/target-feature/gate.rs b/tests/ui/target-feature/gate.rs
index 9244a98d82f..81ed8b3de76 100644
--- a/tests/ui/target-feature/gate.rs
+++ b/tests/ui/target-feature/gate.rs
@@ -6,6 +6,7 @@
 // gate-test-arm_target_feature
 // gate-test-hexagon_target_feature
 // gate-test-mips_target_feature
+// gate-test-nvptx_target_feature
 // gate-test-wasm_target_feature
 // gate-test-adx_target_feature
 // gate-test-cmpxchg16b_target_feature
diff --git a/tests/ui/target-feature/gate.stderr b/tests/ui/target-feature/gate.stderr
index 32d60ce4382..3e9374be73d 100644
--- a/tests/ui/target-feature/gate.stderr
+++ b/tests/ui/target-feature/gate.stderr
@@ -1,5 +1,5 @@
 error[E0658]: the target feature `x87` is currently unstable
-  --> $DIR/gate.rs:29:18
+  --> $DIR/gate.rs:30:18
    |
 LL | #[target_feature(enable = "x87")]
    |                  ^^^^^^^^^^^^^^